String outputPrefix = "-0";
DataInputStream currentStream = new DataInputStream(Files.openFileStream(index.getPath() + ApplicationSetup.FILE_SEPARATOR + index.getPrefix() + "." + targetStructureName +outputPrefix+ ".pointers"));
//logger.info("Adding pointers to the document index");
while(diis.hasNext())
{
DocumentIndexEntry die = diis.next();
pointer.readFields(currentStream);
DocumentIndexEntry newDIentry = fields
? new FieldDocumentIndexEntry(die)
: new BasicDocumentIndexEntry(die);
newDIentry.setOffset(pointer);
newDIentry.setNumberOfEntries(pointer.getNumberOfEntries());
dios.addEntryToBuffer(newDIentry);
}
//logger.info("Renaming reducer output as direct file");
Files.delete(index.getPath() + ApplicationSetup.FILE_SEPARATOR + index.getPrefix() + "." + targetStructureName+ BitIn.USUAL_EXTENSION);
Files.rename(
index.getPath() + ApplicationSetup.FILE_SEPARATOR + index.getPrefix() + "." + targetStructureName+outputPrefix + BitIn.USUAL_EXTENSION,
index.getPath() + ApplicationSetup.FILE_SEPARATOR + index.getPrefix() + "." + targetStructureName+ BitIn.USUAL_EXTENSION);
currentStream.close();
Files.delete(index.getPath() + ApplicationSetup.FILE_SEPARATOR + index.getPrefix() + "." + targetStructureName +outputPrefix+ ".pointers");
}
else if (numberOfReducers <= numberOfReduceTaskLimits)
{
//logger.info("Merging direct index pointers from "+ numberOfReducers + " reducers");
final int partitionSize = (int)Math.ceil( (double)(index.getCollectionStatistics().getNumberOfDocuments()) / (double)numberOfReducers);
for(byte reduce = 0; reduce < numberOfReducers; reduce++)
{
//logger.info("Merging in pointers from reduce task " + reduce);
String outputPrefix = "-" + reduce;
DataInputStream currentStream = new DataInputStream(Files.openFileStream(index.getPath() + ApplicationSetup.FILE_SEPARATOR + index.getPrefix() + "." + targetStructureName +outputPrefix+ ".pointers"));
for(int docOffset = 0; docOffset < partitionSize && diis.hasNext(); docOffset++)
{
DocumentIndexEntry die = diis.next();
pointer.readFields(currentStream);
DocumentIndexEntry newDIentry = fields
? new FieldDocumentIndexEntry(die)
: new BasicDocumentIndexEntry(die);
newDIentry.setOffset(pointer);
newDIentry.setFileNumber(reduce);
newDIentry.setNumberOfEntries(pointer.getNumberOfEntries());
dios.addEntryToBuffer(newDIentry);
}
currentStream.close();
Files.delete(index.getPath() + ApplicationSetup.FILE_SEPARATOR + index.getPrefix() + "." + targetStructureName +outputPrefix+ ".pointers");
//logger.info("Renaming direct file part for reduce task " + reduce);
String sourcePartDFfilename = index.getPath() + ApplicationSetup.FILE_SEPARATOR + index.getPrefix() + "." + targetStructureName+outputPrefix + BitIn.USUAL_EXTENSION;
String destPartDFfilename = index.getPath() + ApplicationSetup.FILE_SEPARATOR + index.getPrefix() + "." + targetStructureName+ BitIn.USUAL_EXTENSION + reduce;
Files.rename(sourcePartDFfilename, destPartDFfilename);
}
index.setIndexProperty("index."+targetStructureName+".data-files", ""+numberOfReducers);
index.flush();
IndexUtil.close(diis);
}
else
{
//logger.info("Merging direct index output from "+ numberOfReducers + " reducers");
final int partitionSize = (int)Math.ceil( (double)(index.getCollectionStatistics().getNumberOfDocuments()) / (double)numberOfReducers);
final OutputStream DFout = Files.writeFileStream(index.getPath() + ApplicationSetup.FILE_SEPARATOR + index.getPrefix() + "." + targetStructureName+ BitIn.USUAL_EXTENSION);
long finalFileOffset = 0;
for(int reduce = 0; reduce < numberOfReducers; reduce++)
{
//logger.info("Copying document index part for reduce task " + reduce);
String outputPrefix = "-" + reduce;
DataInputStream currentStream = new DataInputStream(Files.openFileStream(index.getPath() + ApplicationSetup.FILE_SEPARATOR + index.getPrefix() + "." + targetStructureName +outputPrefix+ ".pointers"));
for(int docOffset = 0; docOffset < partitionSize && diis.hasNext(); docOffset++)
{
DocumentIndexEntry die = diis.next();
pointer.readFields(currentStream);
DocumentIndexEntry newDIentry = fields
? new FieldDocumentIndexEntry(die)
: new BasicDocumentIndexEntry(die);
newDIentry.setOffset(finalFileOffset + pointer.getOffset(), pointer.getOffsetBits());
newDIentry.setNumberOfEntries(pointer.getNumberOfEntries());
dios.addEntryToBuffer(newDIentry);
}
currentStream.close();
Files.delete(index.getPath() + ApplicationSetup.FILE_SEPARATOR + index.getPrefix() + "." + targetStructureName +outputPrefix+ ".pointers");
//logger.info("Copying direct file part for reduce task " + reduce);