outDirs.add(outDir);
LOG.info("* Merging all segments into " + output.getName());
s1 = System.currentTimeMillis();
delta = s1;
nfs.mkdirs(outDir);
SegmentWriter sw = new SegmentWriter(nfs, outDir, true);
LOG.fine(" - opening first output segment in " + outDir.getName());
FetcherOutput fo = new FetcherOutput();
Content co = new Content();
ParseText pt = new ParseText();
ParseData pd = new ParseData();
int outputCnt = 0;
for (int n = 0; n < ir.maxDoc(); n++) {
if (ir.isDeleted(n)) {
//System.out.println("-del");
continue;
}
Document doc = ir.document(n);
String segDoc = doc.get("sd");
int idx = segDoc.indexOf('|');
String segName = segDoc.substring(0, idx);
String docName = segDoc.substring(idx + 1);
SegmentReader sr = (SegmentReader) readers.get(segName);
long docid;
try {
docid = Long.parseLong(docName);
} catch (Exception e) {
continue;
}
try {
// get data from the reader
sr.get(docid, fo, co, pt, pd);
} catch (Throwable thr) {
// don't break the loop, because only one of the segments
// may be corrupted...
LOG.fine(" - corrupt record no. " + docid + " in segment " + sr.segmentDir.getName() + " - skipping.");
continue;
}
sw.append(fo, co, pt, pd);
outputCnt++;
processedRecords++;
if (processedRecords > 0 && (processedRecords % LOG_STEP == 0)) {
LOG.info(" Processed " + processedRecords + " records (" +
(float)(LOG_STEP * 1000)/(float)(System.currentTimeMillis() - delta) + " rec/s)");
delta = System.currentTimeMillis();
}
if (processedRecords % maxCount == 0) {
sw.close();
outDir = new File(output, SegmentWriter.getNewSegmentName());
LOG.fine(" - starting next output segment in " + outDir.getName());
nfs.mkdirs(outDir);
sw = new SegmentWriter(nfs, outDir, true);
outDirs.add(outDir);
}
}
LOG.info("* Merging took " + (System.currentTimeMillis() - s1) + " ms");
ir.close();
sw.close();
FileUtil.fullyDelete(fsmtIndexDir);
for (Iterator iter = readers.keySet().iterator(); iter.hasNext();) {
SegmentReader sr = (SegmentReader) readers.get(iter.next());
sr.close();
}