Package org.terrier.structures

Examples of org.terrier.structures.BasicDocumentIndexEntry


      {
        DocumentIndexEntry die =  diis.next();
        pointer.readFields(currentStream);
        DocumentIndexEntry newDIentry = fields
          ? new FieldDocumentIndexEntry(die)
          : new BasicDocumentIndexEntry(die);
        newDIentry.setOffset(pointer);
        newDIentry.setNumberOfEntries(pointer.getNumberOfEntries());
        dios.addEntryToBuffer(newDIentry);
      }
      //logger.info("Renaming reducer output as direct file");
      Files.delete(index.getPath() + ApplicationSetup.FILE_SEPARATOR + index.getPrefix() + "." + targetStructureName+ BitIn.USUAL_EXTENSION);
      Files.rename(
          index.getPath() + ApplicationSetup.FILE_SEPARATOR + index.getPrefix() + "." + targetStructureName+outputPrefix + BitIn.USUAL_EXTENSION,
          index.getPath() + ApplicationSetup.FILE_SEPARATOR + index.getPrefix() + "." + targetStructureName+ BitIn.USUAL_EXTENSION);
      currentStream.close();
      Files.delete(index.getPath() + ApplicationSetup.FILE_SEPARATOR + index.getPrefix() + "." + targetStructureName +outputPrefix+ ".pointers");
    }
    else if (numberOfReducers <= numberOfReduceTaskLimits)
    {
      //logger.info("Merging direct index pointers from "+ numberOfReducers + " reducers");
      final int partitionSize = (int)Math.ceil( (double)(index.getCollectionStatistics().getNumberOfDocuments()) / (double)numberOfReducers);     
      for(byte reduce = 0; reduce < numberOfReducers; reduce++)
      {
        //logger.info("Merging in pointers from reduce task " + reduce);
        String outputPrefix = "-" + reduce;
        DataInputStream currentStream = new DataInputStream(Files.openFileStream(index.getPath() + ApplicationSetup.FILE_SEPARATOR + index.getPrefix() + "." + targetStructureName +outputPrefix+ ".pointers"));
        for(int docOffset = 0; docOffset < partitionSize && diis.hasNext(); docOffset++)
        {
          DocumentIndexEntry die =  diis.next();
          pointer.readFields(currentStream);
          DocumentIndexEntry newDIentry = fields
            ? new FieldDocumentIndexEntry(die)
            : new BasicDocumentIndexEntry(die);
          newDIentry.setOffset(pointer);
          newDIentry.setFileNumber(reduce);
          newDIentry.setNumberOfEntries(pointer.getNumberOfEntries());
          dios.addEntryToBuffer(newDIentry);
        }
        currentStream.close();
        Files.delete(index.getPath() + ApplicationSetup.FILE_SEPARATOR + index.getPrefix() + "." + targetStructureName +outputPrefix+ ".pointers");
        //logger.info("Renaming direct file part for reduce task " + reduce);
        String sourcePartDFfilename = index.getPath() + ApplicationSetup.FILE_SEPARATOR + index.getPrefix() + "." + targetStructureName+outputPrefix + BitIn.USUAL_EXTENSION;
        String destPartDFfilename = index.getPath() + ApplicationSetup.FILE_SEPARATOR + index.getPrefix() + "." + targetStructureName+ BitIn.USUAL_EXTENSION + reduce;       
        Files.rename(sourcePartDFfilename, destPartDFfilename);
      }
      index.setIndexProperty("index."+targetStructureName+".data-files", ""+numberOfReducers);
      index.flush();
      IndexUtil.close(diis);
    }
    else
    {
      //logger.info("Merging direct index output from "+ numberOfReducers + " reducers");
     
      final int partitionSize = (int)Math.ceil( (double)(index.getCollectionStatistics().getNumberOfDocuments()) / (double)numberOfReducers);
      final OutputStream DFout = Files.writeFileStream(index.getPath() + ApplicationSetup.FILE_SEPARATOR + index.getPrefix() + "." + targetStructureName+ BitIn.USUAL_EXTENSION);
      long finalFileOffset = 0;
     
      for(int reduce = 0; reduce < numberOfReducers; reduce++)
      {
        //logger.info("Copying document index part for reduce task " + reduce);
        String outputPrefix = "-" + reduce;
        DataInputStream currentStream = new DataInputStream(Files.openFileStream(index.getPath() + ApplicationSetup.FILE_SEPARATOR + index.getPrefix() + "." + targetStructureName +outputPrefix+ ".pointers"));
        for(int docOffset = 0; docOffset < partitionSize && diis.hasNext(); docOffset++)
        {
          DocumentIndexEntry die =  diis.next();
          pointer.readFields(currentStream);
          DocumentIndexEntry newDIentry = fields
            ? new FieldDocumentIndexEntry(die)
            : new BasicDocumentIndexEntry(die);
          newDIentry.setOffset(finalFileOffset + pointer.getOffset(), pointer.getOffsetBits());
          newDIentry.setNumberOfEntries(pointer.getNumberOfEntries());
          dios.addEntryToBuffer(newDIentry);
        }
        currentStream.close();
View Full Code Here


        while (docidInput.hasNext())
      {
          DocumentIndexEntry old = docidInput.next();
          if (fieldCount == 0)
          {
            die = new BasicDocumentIndexEntry(old);
          }
          else
          {
            die = old;
          }
View Full Code Here

    }

    /** Return a DocumentIndexEntry for this document */
    public DocumentIndexEntry getDocumentStatistics()
  {
    DocumentIndexEntry die = new BasicDocumentIndexEntry();
    die.setDocumentLength(this.getDocumentLength());
    die.setNumberOfEntries(this.getNumberOfPointers());
    return die;
  }
View Full Code Here

  //    logger.error("Cannot make DirectInvertedOutputStream:", ioe);
    }
      //  new DirectIndexBuilder(currentIndex, "direct");
    docIndexBuilder = new DocumentIndexBuilder(currentIndex, "document");
    metaBuilder = createMetaIndexBuilder();
    emptyDocIndexEntry = (FieldScore.FIELDS_COUNT > 0) ? new FieldDocumentIndexEntry(FieldScore.FIELDS_COUNT) : new BasicDocumentIndexEntry();
       
    //int LexiconCount = 0;
    int numberOfDocuments = 0; int numberOfTokens = 0;
    //final long startBunchOfDocuments = System.currentTimeMillis();
    final int collections_length = collections.length;
View Full Code Here

    } catch (IOException ioe) {
  //    logger.error("Cannot make DirectInvertedOutputStream:", ioe);
    }
    docIndexBuilder = new DocumentIndexBuilder(currentIndex, "document");
    metaBuilder = createMetaIndexBuilder();
    emptyDocIndexEntry = (FieldScore.FIELDS_COUNT > 0) ? new FieldDocumentIndexEntry(FieldScore.FIELDS_COUNT) : new BasicDocumentIndexEntry();
   
    //int LexiconCount = 0;
    int numberOfDocuments = 0;
    int numberOfTokens = 0;
    //long startBunchOfDocuments = System.currentTimeMillis();
View Full Code Here

    BitIndexPointer p;
    for(int[] list : postings)
    {
      final int doclen = StaTools.sum(list);
      p = dios.writePostings(new ArrayOfIdsIterablePosting(list));
      DocumentIndexEntry die = new BasicDocumentIndexEntry(doclen, p);
      dib.addEntryToBuffer(die);
    }
    dios.close();
    dib.finishedCollections();
    index.addIndexStructure(
View Full Code Here

TOP

Related Classes of org.terrier.structures.BasicDocumentIndexEntry

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.