Package org.terrier.structures

Examples of org.terrier.structures.DocumentIndex


    }   
           
    /** DIRECT FILE */
   
    if (directExpected) {
      DocumentIndex documentIndex = index.getDocumentIndex();

      /**
       * Test {@link IterablePosting} entries from a {@link DirectIndex}
       */
      DirectIndex directIndex = index.getDirectIndex();
      assertNotNull(directIndex);
      // for each document
      for (int d = 0; d < dirTfs.length; d++) {
        DocumentIndexEntry de = documentIndex.getDocumentEntry(d);
        assertNotNull(de);
        ip = directIndex.getPostings((BitIndexPointer) de);
        FieldPosting fp = fieldsExpected ? (FieldPosting)ip : null;
        // for each term
        int t = 0;
        int countFoundTerms = 0;
        while (ip.next() != IterablePosting.EOL) {
          int termid = ip.getId();
          assertTrue(termid >= 0);
          String term = lexicon.getLexiconEntry(termid).getKey();
          assertNotNull(term);
          countFoundTerms++;
          assertTrue(dirTfs[d].containsKey(term));
          assertEquals(dirTfs[d].get(term), ip.getFrequency());
          assertEquals(doclens[d], ip.getDocumentLength());         
         
          if (fieldsExpected) {
            assertEquals(2, fp.getFieldFrequencies().length);
            for (int f = 0; f < 2; f++) {
              assertEquals(dirFfs[d].get(term)[f], fp.getFieldFrequencies()[f]);
            }
          }
          t++;
        }
        assertEquals(dirTfs[d].size() ,countFoundTerms);
        ip.close();
      }
      // post-check
      assertEquals(IterablePosting.EOL, ip.next());

      /**
       * Test {@link IterablePosting} entries from a {@link DirectIndexInputStream}
       */
      bpiis = (BitPostingIndexInputStream) index.getIndexStructureInputStream("direct");
      assertNotNull(bpiis);
      // for each document
      for (int d = 0; d < dirTfs.length; d++) {
        assertTrue(bpiis.hasNext());
        ip = bpiis.next();
        assertNotNull(ip);
        FieldPosting fp = fieldsExpected ? (FieldPosting)ip : null;
        // for each term
        int t = 0;
        int countFoundTerms = 0;
        while (ip.next() != IterablePosting.EOL) {
          int termid = ip.getId();
          assertTrue(termid >= 0);
          String term = lexicon.getLexiconEntry(termid).getKey();
          assertNotNull(term);
          countFoundTerms++;
          assertTrue(dirTfs[d].containsKey(term));
          assertEquals(dirTfs[d].get(term), ip.getFrequency());
          assertEquals(doclens[d], ip.getDocumentLength());         
         
          if (fieldsExpected) {
            assertEquals(2, fp.getFieldFrequencies().length);
            for (int f = 0; f < 2; f++) {
              assertEquals(dirFfs[d].get(term)[f], fp.getFieldFrequencies()[f]);
            }
          }
          t++;
        }
        assertEquals(dirTfs[d].size() ,countFoundTerms);
      }
      // post-check
      assertFalse(bpiis.hasNext());

      /**
       * Test posting array entries from a {@link DirectIndex}
       */
      // for each document
      for (int d = 0; d < dirTfs.length; d++) {
        DocumentIndexEntry de = documentIndex.getDocumentEntry(d);
        assertNotNull(de);
       
        int[][] terms = directIndex.getTerms(de);
       
        if (!fieldsExpected) {
View Full Code Here


    //check docid is as large as expected
    assertEquals("Metaindex as stream didnt have expected number of entries", lengths.length -1, docid);
    IndexUtil.close(iDie);
   
    //check index in random access
    DocumentIndex di = index.getDocumentIndex();
    assertNotNull("Failed to get a document index", di);
    final int numberOfDocuments = index.getCollectionStatistics().getNumberOfDocuments();
    for(docid =0; docid < numberOfDocuments; docid++)
    {
      assertEquals("Document lengths for docid "+ docid + " dont match", lengths[docid], di.getDocumentLength(docid));
      assertEquals("Document lengths for docid "+ docid + " dont match", lengths[docid], di.getDocumentEntry(docid).getDocumentLength());
      if (document_unique_terms.length > 0)
        assertEquals("Number of pointers for docid " + docid + " dont match", document_unique_terms[docid], di.getDocumentEntry(docid).getNumberOfEntries());
    }
   
    di = null;
  }
View Full Code Here

        "index,structureName,"+ BasicIterablePostingDocidOnly.class.getName());
    index.setIndexProperty("index.direct.fields.count", ""+FieldScore.FIELDS_COUNT );
    index.setIndexProperty("index.direct.fields.names", ArrayUtils.join(FieldScore.FIELD_NAMES, ","));
    index.addIndexStructure("document-factory", BasicDocumentIndexEntry.Factory.class.getName(), "", "");
    index.flush();
    DocumentIndex di = index.getDocumentIndex();
    assertNotNull(di);
    assertEquals(postings.length, di.getNumberOfDocuments());
    return index;
  }
View Full Code Here

TOP

Related Classes of org.terrier.structures.DocumentIndex

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.