Examples of org.terrier.structures.LexiconEntry

org.terrier.structures.LexiconEntry
Represents the statistics of a term in the {@link Lexicon}, and a pointer to the term's location in the {@link InvertedIndex}. For these reasons, this class implements {@link Pointer} and {@link EntryStatistics}. @see Lexicon @see Pointer @see EntryStatistics @author Craig Macdonald

    List<LexiconEntry> _le = new ArrayList<LexiconEntry>(terms.length);
    List<IterablePosting> _joinedPostings = new ArrayList<IterablePosting>(terms.length);
        
    for(String alternative : terms)
    {
      LexiconEntry t = lexicon.getLexiconEntry(alternative);
      if (t == null) {
        logger.debug("Term Not Found: " + alternative);
        //previousTerm = false;      
      } else if (IGNORE_LOW_IDF_TERMS && collectionStatistics.getNumberOfDocuments() < t.getFrequency()) {
        //logger.warn("query term " + alternative + " has low idf - ignored from scoring.");
        //previousTerm = false;
      } else if (wmodels.length == 0) {
        //logger.warn("No weighting models for term " + alternative +", skipping scoring");
        //previousTerm = false;

View Full Code Here

    int queryLength = queryTermsToMatchList.size();
    // The posting list iterator from the inverted file
    IterablePosting postings;    
    for (int i = 0; i < queryLength; i++) 
    {
      LexiconEntry lexiconEntry = queryTermsToMatchList.get(i).getValue();
      postings = invertedIndex.getPostings((BitIndexPointer)lexiconEntry);
      assignScores(i, wm[i], (AccumulatorResultSet) resultSet, postings);
    }


    resultSet.initialise();

View Full Code Here

    
    final int queryLength = queryTermsToMatchList.size();
    // The posting list iterator array (one per term) and initialization
    IterablePosting postingListArray[] = new IterablePosting[queryLength];
        for (int i = 0; i < queryLength; i++) {
      LexiconEntry           lexiconEntry = queryTermsToMatchList.get(i).getValue();
      if(logger.isDebugEnabled()) logger.debug((i + 1) + ": " + queryTermStrings[i].trim() + " with " + lexiconEntry.getDocumentFrequency() + " documents (TF is " + lexiconEntry.getFrequency() + ").");
      postingListArray[i] = invertedIndex.getPostings((BitIndexPointer)lexiconEntry);
      postingListArray[i].next();
      long docid = postingListArray[i].getId();
      assert(docid != -1);
      postingHeap.enqueue((docid << 32) + i);

View Full Code Here

    for (int i = 0; i < phraseLength; i++) {
      docidsMap[i] = new TIntIntHashMap();
      String t = ((SingleTermQuery) phraseTerms.get(i)).getTerm();
      if (terms.getStatistics(t) == null)
      {
        LexiconEntry le = index.getLexicon().getLexiconEntry(t);
        if (le == null)
          continue;
        terms.setTermProperty(t, le);
      }

View Full Code Here

      {
        final String term1 = queryTermPair[0];
        final String term2 = queryTermPair[1];
        
        //we seek the query term in the lexicon
        LexiconEntry tEntry1 = lexicon.getLexiconEntry(term1);
        if (tEntry1 == null)//and if it is not found, we continue with the next term pair
          continue;
        //double term1KeyFrequency = query.getTermWeight(term1);
        
        double term1DocumentFrequency = (double)tEntry1.getDocumentFrequency();
        
        //we seek the 2nd query term in the lexicon
        LexiconEntry tEntry2 = lexicon.getLexiconEntry(term2);
        //and if it is not found, we continue with the next term pair
        if (tEntry1 == null)
          continue;
        //double term2KeyFrequency = query.getTermWeight(term2);
        double term2DocumentFrequency = (double)tEntry2.getDocumentFrequency();
        term1Pointers = invertedIndex.getDocuments(tEntry1);
        
        term1docids = term1Pointers[0];
        term1blockfreqs = term1Pointers[2];
        term1blockids = term1Pointers[3];

View Full Code Here

     */
    InvertedIndex invertedIndex = index.getInvertedIndex();
    assertNotNull(invertedIndex);
    // for each term
    for (int t = 0; t < termStrings.length; t++) {
      LexiconEntry le = lexicon.getLexiconEntry(termStrings[t]);
      assertNotNull(le);
      ip = invertedIndex.getPostings((BitIndexPointer) le);
      // for each document
      int d = 0;
      while (ip.next() != IterablePosting.EOL) {
        assertEquals(invIds[t][d], ip.getId());
        assertEquals(invTfs[t][d], ip.getFrequency());
        assertEquals(doclens[invIds[t][d]], ip.getDocumentLength());
        if (fieldsExpected) {
          assertEquals(2, invFfs[t][d].length);
          for (int f = 0; f < 2; f++) {
            assertEquals(invFfs[t][d][f], ((FieldIterablePosting) ip).getFieldFrequencies()[f]); 
          }
        }
        d++;
      }
      ip.close();
    }
    // post-check
    assertEquals(IterablePosting.EOL, ip.next());


    /**
     * Test {@link IterablePosting} entries from a {@link InvertedIndexInputStream}
     */
    bpiis = (BitPostingIndexInputStream) index.getIndexStructureInputStream("inverted");
    assertNotNull(bpiis);
    // for each term
    for (int t = 0; t < invIds.length; t++) {
      assertTrue(bpiis.hasNext());
      ip = bpiis.next();
      assertNotNull(ip);
      // for each document
      int d = 0;
      while (ip.next() != IterablePosting.EOL) {
        assertEquals(invIds[t][d], ip.getId());
        assertEquals(invTfs[t][d], ip.getFrequency());
        assertEquals(doclens[invIds[t][d]], ip.getDocumentLength());
        if (fieldsExpected) {
          assertEquals(2, invFfs[t][d].length);
          for (int f = 0; f < 2; f++) {
            assertEquals(invFfs[t][d][f], ((FieldIterablePosting) ip).getFieldFrequencies()[f]); 
          }
        }
        d++;
      }
    }
    // post-check
    assertFalse(bpiis.hasNext());


    /**
     * Test posting array entries from a {@link InvertedIndex}
     */
    // for each term
    for (int t = 0; t < termStrings.length; t++) {
      LexiconEntry le = lexicon.getLexiconEntry(termStrings[t]);
      assertNotNull(le);
      
      int[][] documents = invertedIndex.getDocuments(le);
      
      if (!fieldsExpected) {

View Full Code Here

    Matching matching = makeMatching(index);
    assertNotNull(matching);
    
    MatchingQueryTerms mqt = new MatchingQueryTerms();
    mqt.setDefaultTermWeightingModel(new DLH13());
    LexiconEntry le = index.getLexicon().getLexiconEntry("quick");
    assertNotNull(le);
    le.setStatistics(1, 40);
    mqt.setTermProperty("quick", le);
    ResultSet rs = matching.match("query1", mqt);
    assertNotNull(rs);
    assertEquals(1, rs.getResultSize());
    assertEquals(0, rs.getDocids()[0]);

View Full Code Here

    
    TObjectIntHashMap<String> checkFreqs = new TObjectIntHashMap<String>(CHECK_TERMS.length);
    for(String foundTerm: CHECK_TERMS)
    {
      i++;
      LexiconEntry le = lex.getLexiconEntry(foundTerm);
      assertNotNull("Didnt find lexicon entry for term " + foundTerm, le);
      assertEquals("Dcoument frequency incorrect for term " + foundTerm, CHECK_TERMS_DFS[i], le.getDocumentFrequency());
      
      //check lookup by termid 
      Map.Entry<String, LexiconEntry> lee2 = lex.getLexiconEntry(le.getTermId());
      assertNotNull(lee2);
      assertEquals(foundTerm, lee2.getKey());
      assertEquals(CHECK_TERMS_DFS[i], lee2.getValue().getDocumentFrequency());
      
      //make a note of this term for the stream checking

View Full Code Here

0 1

TOP

Related Classes of org.terrier.structures.LexiconEntry

org.terrier.indexing.TestIndexers

org.terrier.matching.BaseMatching

org.terrier.matching.daat.FullNoPLM

org.terrier.matching.dsms.BlockScoreModifier

org.terrier.matching.dsms.PhraseScoreModifier

org.terrier.matching.OldBasicMatching

org.terrier.matching.PostingListManager

org.terrier.matching.taat.FullNoPLM

org.terrier.matching.TestMatching

org.terrier.structures.indexing.BlockInvertedIndexBuilder

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.