Package org.terrier.structures

Examples of org.terrier.structures.PostingIndexInputStream


        return;
      }
     
     
      final Iterator<DocumentIndexEntry> docidInput1 = (Iterator<DocumentIndexEntry>)srcIndex1.getIndexStructureInputStream("document");
      final PostingIndexInputStream dfInput1 = (PostingIndexInputStream)srcIndex1.getIndexStructureInputStream("direct");
      final MetaIndex metaInput1 = srcIndex1.getMetaIndex();
     
      int sourceDocid = 0;
      //traversing the direct index, without any change
      while(docidInput1.hasNext())
      {
        BitIndexPointer pointerDF = emptyPointer;
        DocumentIndexEntry die = docidInput1.next();
        if (die.getDocumentLength() > 0)
        {
          pointerDF = dfOutput.writePostings(dfInput1.next());
        }
        die.setBitIndexPointer(pointerDF);
        docidOutput.addEntryToBuffer(die);
        metaBuilder.writeDocumentEntry(metaInput1.getAllItems(sourceDocid));
        sourceDocid++;
      }
      dfInput1.close();
      metaInput1.close();
      IndexUtil.close(docidInput1);
      final Iterator<DocumentIndexEntry> docidInput2 = (Iterator<DocumentIndexEntry>)srcIndex2.getIndexStructureInputStream("document");
      final PostingIndexInputStream dfInput2 = (PostingIndexInputStream)srcIndex2.getIndexStructureInputStream("direct");
      final MetaIndex metaInput2 = srcIndex2.getMetaIndex();
     
      sourceDocid = 0;
      while (docidInput2.hasNext())
      {
        DocumentIndexEntry die = docidInput2.next();
     
        BitIndexPointer pointerDF = emptyPointer;
        if (die.getDocumentLength() > 0)
        {
          final IterablePosting postings = dfInput2.next();
         
          List<Posting> postingList = new ArrayList<Posting>();
          while(postings.next() != IterablePosting.EOL)
          {
            final Posting p = postings.asWritablePosting();
            p.setId(termcodeHashmap.get(postings.getId()));
            postingList.add(p);
          }
          Collections.sort(postingList, new PostingIdComparator());
          pointerDF = dfOutput.writePostings(postingList.iterator());
        }
        die.setBitIndexPointer(pointerDF);
        docidOutput.addEntryToBuffer(die);
        metaBuilder.writeDocumentEntry(metaInput2.getAllItems(sourceDocid));
        sourceDocid++;
      }
      dfInput2.close();
      IndexUtil.close(docidInput2);
      metaInput2.close();
     
      metaBuilder.close();
      dfOutput.close();
View Full Code Here


    TIntHashSet termIds = new TIntHashSet();
   
    long tokens = 0;
    long pointers = 0;
    int docid = 0;
    final PostingIndexInputStream piis = (PostingIndexInputStream) index.getIndexStructureInputStream("direct");
    assertNotNull("No direct index input stream found", piis);
    while(piis.hasNext())
    {
      IterablePosting ip = piis.next();
      int doclen = 0int docpointers = 0;   
      docid += piis.getEntriesSkipped();
      //System.err.println("getEntriesSkipped=" + piis.getEntriesSkipped());
      //System.err.println("docid=" + docid);
      while(ip.next() != IterablePosting.EOL)
      {
        //System.err.println("termid" +ip.getId() + " f=" + ip.getFrequency());
View Full Code Here

TOP

Related Classes of org.terrier.structures.PostingIndexInputStream

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.