Package org.terrier.structures

Examples of org.terrier.structures.BitPostingIndexInputStream


    if (index == null)
      throw new IOException("Index not found in JobConf:" + Index.getLastIndexLoadError());
   
    final String bitPostingStructureName = job.get(BITPOSTING_STRUCTURE_KEY);
   
    final BitPostingIndexInputStream postingStream = (BitPostingIndexInputStream)index.getIndexStructureInputStream(bitPostingStructureName);
    postingStream.skip(split.getStartingEntryIndex());
    //logger.info("BitPostingIndexRecordReader for structure "+ bitPostingStructureName + " start entry "+ split.getStartingEntryIndex() + " split size " + split.getEntryCount());
    return new BitPostingIndexRecordReader(postingStream, split.getStartingEntryIndex(), split.getEntryCount());
  }
View Full Code Here


      assertEquals(611, index.getCollectionStatistics().getFieldTokens()[1]);
     
     
     
      //now check correct type of all structures
      BitPostingIndexInputStream bpiis;
      IterablePosting ip;
      BitPostingIndex bpi;
     
      //check stream structures
      bpiis = (BitPostingIndexInputStream) index.getIndexStructureInputStream("direct");
      ip = bpiis.next();
      assertTrue(ip instanceof FieldPosting);
      bpiis = (BitPostingIndexInputStream) index.getIndexStructureInputStream("inverted");
      ip = bpiis.next();
      assertTrue(ip instanceof FieldPosting);
     
      //check random structures
      bpi = (BitPostingIndex) index.getInvertedIndex();
      ip = bpi.getPostings((BitIndexPointer) index.getLexicon().getLexiconEntry(0).getValue());
View Full Code Here

    assertNotNull(meta);
    assertEquals("doc1", index.getMetaIndex().getItem("filename", 0));
    assertEquals("doc2", index.getMetaIndex().getItem("filename", 1));
   
    IterablePosting ip = null;
    BitPostingIndexInputStream bpiis = null;
   
    /** INVERTED FILE */   
   
    Lexicon<String> lexicon = index.getLexicon();
   
    /**
     * Test {@link IterablePosting} entries from a {@link InvertedIndex}
     */
    InvertedIndex invertedIndex = index.getInvertedIndex();
    assertNotNull(invertedIndex);
    // for each term
    for (int t = 0; t < termStrings.length; t++) {
      LexiconEntry le = lexicon.getLexiconEntry(termStrings[t]);
      assertNotNull(le);
      ip = invertedIndex.getPostings((BitIndexPointer) le);
      // for each document
      int d = 0;
      while (ip.next() != IterablePosting.EOL) {
        assertEquals(invIds[t][d], ip.getId());
        assertEquals(invTfs[t][d], ip.getFrequency());
        assertEquals(doclens[invIds[t][d]], ip.getDocumentLength());
        if (fieldsExpected) {
          assertEquals(2, invFfs[t][d].length);
          for (int f = 0; f < 2; f++) {
            assertEquals(invFfs[t][d][f], ((FieldIterablePosting) ip).getFieldFrequencies()[f]);
          }
        }
        d++;
      }
      ip.close();
    }
    // post-check
    assertEquals(IterablePosting.EOL, ip.next());

    /**
     * Test {@link IterablePosting} entries from a {@link InvertedIndexInputStream}
     */
    bpiis = (BitPostingIndexInputStream) index.getIndexStructureInputStream("inverted");
    assertNotNull(bpiis);
    // for each term
    for (int t = 0; t < invIds.length; t++) {
      assertTrue(bpiis.hasNext());
      ip = bpiis.next();
      assertNotNull(ip);
      // for each document
      int d = 0;
      while (ip.next() != IterablePosting.EOL) {
        assertEquals(invIds[t][d], ip.getId());
        assertEquals(invTfs[t][d], ip.getFrequency());
        assertEquals(doclens[invIds[t][d]], ip.getDocumentLength());
        if (fieldsExpected) {
          assertEquals(2, invFfs[t][d].length);
          for (int f = 0; f < 2; f++) {
            assertEquals(invFfs[t][d][f], ((FieldIterablePosting) ip).getFieldFrequencies()[f]);
          }
        }
        d++;
      }
    }
    // post-check
    assertFalse(bpiis.hasNext());

    /**
     * Test posting array entries from a {@link InvertedIndex}
     */
    // for each term
    for (int t = 0; t < termStrings.length; t++) {
      LexiconEntry le = lexicon.getLexiconEntry(termStrings[t]);
      assertNotNull(le);
     
      int[][] documents = invertedIndex.getDocuments(le);
     
      if (!fieldsExpected) {
        assertTrue(documents.length >= 2);
      }
      else {
        // array should have length at least 4: 1 for the id, 1 for the
        // frequency, 2 for the fields (optionally more for the blocks)
        assertTrue(documents.length >= 4);
      }
     
      // check number of terms
      assertEquals(invIds[t].length, documents[0].length);
      assertEquals(invTfs[t].length, documents[1].length);
     
      // for each document
      for (int d = 0; d < documents[0].length; d++) {
        // test document id
        assertEquals(invIds[t][d], documents[0][d]);
        // test document frequency
        assertEquals(invTfs[t][d], documents[1][d]);
        if (fieldsExpected) {
          // test number of indexed fields
          assertEquals(2, invFfs[t][d].length);
          // test field frequency
          for (int f = 0; f < 2; f++) {
            assertEquals(invFfs[t][d][f], documents[2+f][d]);
          }
        }
      }
    }   
           
    /** DIRECT FILE */
   
    if (directExpected) {
      DocumentIndex documentIndex = index.getDocumentIndex();

      /**
       * Test {@link IterablePosting} entries from a {@link DirectIndex}
       */
      DirectIndex directIndex = index.getDirectIndex();
      assertNotNull(directIndex);
      // for each document
      for (int d = 0; d < dirTfs.length; d++) {
        DocumentIndexEntry de = documentIndex.getDocumentEntry(d);
        assertNotNull(de);
        ip = directIndex.getPostings((BitIndexPointer) de);
        FieldPosting fp = fieldsExpected ? (FieldPosting)ip : null;
        // for each term
        int t = 0;
        int countFoundTerms = 0;
        while (ip.next() != IterablePosting.EOL) {
          int termid = ip.getId();
          assertTrue(termid >= 0);
          String term = lexicon.getLexiconEntry(termid).getKey();
          assertNotNull(term);
          countFoundTerms++;
          assertTrue(dirTfs[d].containsKey(term));
          assertEquals(dirTfs[d].get(term), ip.getFrequency());
          assertEquals(doclens[d], ip.getDocumentLength());         
         
          if (fieldsExpected) {
            assertEquals(2, fp.getFieldFrequencies().length);
            for (int f = 0; f < 2; f++) {
              assertEquals(dirFfs[d].get(term)[f], fp.getFieldFrequencies()[f]);
            }
          }
          t++;
        }
        assertEquals(dirTfs[d].size() ,countFoundTerms);
        ip.close();
      }
      // post-check
      assertEquals(IterablePosting.EOL, ip.next());

      /**
       * Test {@link IterablePosting} entries from a {@link DirectIndexInputStream}
       */
      bpiis = (BitPostingIndexInputStream) index.getIndexStructureInputStream("direct");
      assertNotNull(bpiis);
      // for each document
      for (int d = 0; d < dirTfs.length; d++) {
        assertTrue(bpiis.hasNext());
        ip = bpiis.next();
        assertNotNull(ip);
        FieldPosting fp = fieldsExpected ? (FieldPosting)ip : null;
        // for each term
        int t = 0;
        int countFoundTerms = 0;
        while (ip.next() != IterablePosting.EOL) {
          int termid = ip.getId();
          assertTrue(termid >= 0);
          String term = lexicon.getLexiconEntry(termid).getKey();
          assertNotNull(term);
          countFoundTerms++;
          assertTrue(dirTfs[d].containsKey(term));
          assertEquals(dirTfs[d].get(term), ip.getFrequency());
          assertEquals(doclens[d], ip.getDocumentLength());         
         
          if (fieldsExpected) {
            assertEquals(2, fp.getFieldFrequencies().length);
            for (int f = 0; f < 2; f++) {
              assertEquals(dirFfs[d].get(term)[f], fp.getFieldFrequencies()[f]);
            }
          }
          t++;
        }
        assertEquals(dirTfs[d].size() ,countFoundTerms);
      }
      // post-check
      assertFalse(bpiis.hasNext());

      /**
       * Test posting array entries from a {@link DirectIndex}
       */
      // for each document
View Full Code Here

    @Override
    public void checkIndex(BatchEndToEndTest test, Index index)
        throws Exception
    {     
      //no check correct type of all structures
      BitPostingIndexInputStream bpiis;
      IterablePosting ip;
      BitPostingIndex bpi;
     
      //check stream structures
      bpiis = (BitPostingIndexInputStream) index.getIndexStructureInputStream("direct");
      ip = bpiis.next();
      assertTrue(ip instanceof BlockPosting);
      bpiis = (BitPostingIndexInputStream) index.getIndexStructureInputStream("inverted");
      ip = bpiis.next();
      assertTrue(ip instanceof BlockPosting);
     
      //check random structures
      bpi = (BitPostingIndex) index.getInvertedIndex();
      ip = bpi.getPostings((BitIndexPointer) index.getLexicon().getLexiconEntry(0).getValue());
View Full Code Here

TOP

Related Classes of org.terrier.structures.BitPostingIndexInputStream

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.