String sourceStructureName, String targetStructureName,
boolean[] blocksfields, final int numberOfReducers, final int numberOfReduceTaskLimits)
throws IOException, Exception
{
Iterator<DocumentIndexEntry> diis = (Iterator<DocumentIndexEntry>)index.getIndexStructureInputStream("document");
DocumentIndexBuilder dios = new DocumentIndexBuilder(index, "document-df");
BitIndexPointer pointer = new SimpleBitIndexPointer();
final boolean blocks = blocksfields[0];
final boolean fields = blocksfields[1];
if (numberOfReducers == 1)
{
String outputPrefix = "-0";
DataInputStream currentStream = new DataInputStream(Files.openFileStream(index.getPath() + ApplicationSetup.FILE_SEPARATOR + index.getPrefix() + "." + targetStructureName +outputPrefix+ ".pointers"));
//logger.info("Adding pointers to the document index");
while(diis.hasNext())
{
DocumentIndexEntry die = diis.next();
pointer.readFields(currentStream);
DocumentIndexEntry newDIentry = fields
? new FieldDocumentIndexEntry(die)
: new BasicDocumentIndexEntry(die);
newDIentry.setOffset(pointer);
newDIentry.setNumberOfEntries(pointer.getNumberOfEntries());
dios.addEntryToBuffer(newDIentry);
}
//logger.info("Renaming reducer output as direct file");
Files.delete(index.getPath() + ApplicationSetup.FILE_SEPARATOR + index.getPrefix() + "." + targetStructureName+ BitIn.USUAL_EXTENSION);
Files.rename(
index.getPath() + ApplicationSetup.FILE_SEPARATOR + index.getPrefix() + "." + targetStructureName+outputPrefix + BitIn.USUAL_EXTENSION,
index.getPath() + ApplicationSetup.FILE_SEPARATOR + index.getPrefix() + "." + targetStructureName+ BitIn.USUAL_EXTENSION);
currentStream.close();
Files.delete(index.getPath() + ApplicationSetup.FILE_SEPARATOR + index.getPrefix() + "." + targetStructureName +outputPrefix+ ".pointers");
}
else if (numberOfReducers <= numberOfReduceTaskLimits)
{
//logger.info("Merging direct index pointers from "+ numberOfReducers + " reducers");
final int partitionSize = (int)Math.ceil( (double)(index.getCollectionStatistics().getNumberOfDocuments()) / (double)numberOfReducers);
for(byte reduce = 0; reduce < numberOfReducers; reduce++)
{
//logger.info("Merging in pointers from reduce task " + reduce);
String outputPrefix = "-" + reduce;
DataInputStream currentStream = new DataInputStream(Files.openFileStream(index.getPath() + ApplicationSetup.FILE_SEPARATOR + index.getPrefix() + "." + targetStructureName +outputPrefix+ ".pointers"));
for(int docOffset = 0; docOffset < partitionSize && diis.hasNext(); docOffset++)
{
DocumentIndexEntry die = diis.next();
pointer.readFields(currentStream);
DocumentIndexEntry newDIentry = fields
? new FieldDocumentIndexEntry(die)
: new BasicDocumentIndexEntry(die);
newDIentry.setOffset(pointer);
newDIentry.setFileNumber(reduce);
newDIentry.setNumberOfEntries(pointer.getNumberOfEntries());
dios.addEntryToBuffer(newDIentry);
}
currentStream.close();
Files.delete(index.getPath() + ApplicationSetup.FILE_SEPARATOR + index.getPrefix() + "." + targetStructureName +outputPrefix+ ".pointers");
//logger.info("Renaming direct file part for reduce task " + reduce);
String sourcePartDFfilename = index.getPath() + ApplicationSetup.FILE_SEPARATOR + index.getPrefix() + "." + targetStructureName+outputPrefix + BitIn.USUAL_EXTENSION;
String destPartDFfilename = index.getPath() + ApplicationSetup.FILE_SEPARATOR + index.getPrefix() + "." + targetStructureName+ BitIn.USUAL_EXTENSION + reduce;
Files.rename(sourcePartDFfilename, destPartDFfilename);
}
index.setIndexProperty("index."+targetStructureName+".data-files", ""+numberOfReducers);
index.flush();
IndexUtil.close(diis);
}
else
{
//logger.info("Merging direct index output from "+ numberOfReducers + " reducers");
final int partitionSize = (int)Math.ceil( (double)(index.getCollectionStatistics().getNumberOfDocuments()) / (double)numberOfReducers);
final OutputStream DFout = Files.writeFileStream(index.getPath() + ApplicationSetup.FILE_SEPARATOR + index.getPrefix() + "." + targetStructureName+ BitIn.USUAL_EXTENSION);
long finalFileOffset = 0;
for(int reduce = 0; reduce < numberOfReducers; reduce++)
{
//logger.info("Copying document index part for reduce task " + reduce);
String outputPrefix = "-" + reduce;
DataInputStream currentStream = new DataInputStream(Files.openFileStream(index.getPath() + ApplicationSetup.FILE_SEPARATOR + index.getPrefix() + "." + targetStructureName +outputPrefix+ ".pointers"));
for(int docOffset = 0; docOffset < partitionSize && diis.hasNext(); docOffset++)
{
DocumentIndexEntry die = diis.next();
pointer.readFields(currentStream);
DocumentIndexEntry newDIentry = fields
? new FieldDocumentIndexEntry(die)
: new BasicDocumentIndexEntry(die);
newDIentry.setOffset(finalFileOffset + pointer.getOffset(), pointer.getOffsetBits());
newDIentry.setNumberOfEntries(pointer.getNumberOfEntries());
dios.addEntryToBuffer(newDIentry);
}
currentStream.close();
Files.delete(index.getPath() + ApplicationSetup.FILE_SEPARATOR + index.getPrefix() + "." + targetStructureName +outputPrefix+ ".pointers");
//logger.info("Copying direct file part for reduce task " + reduce);
String partDFfilename = index.getPath() + ApplicationSetup.FILE_SEPARATOR + index.getPrefix() + "." + targetStructureName+outputPrefix + BitIn.USUAL_EXTENSION;
InputStream partDF = Files.openFileStream(partDFfilename);
finalFileOffset += Files.length(partDFfilename);
IOUtils.copyBytes(partDF, DFout, conf, false);
partDF.close();
Files.delete(partDFfilename);
}
IndexUtil.close(diis);
DFout.close();
}
dios.close();
Files.copyFile(index.getPath() + ApplicationSetup.FILE_SEPARATOR + index.getPrefix() + "." + "document.fsarrayfile", index.getPath() + ApplicationSetup.FILE_SEPARATOR + index.getPrefix() + "." + "document-backup.fsarrayfile");
IndexUtil.renameIndexStructure(index, "document-df", "document");
if (fields)
{
index.addIndexStructure("document-factory", FieldDocumentIndexEntry.Factory.class.getName(), "java.lang.String", "${index.direct.fields.count}");