Package org.terrier.structures

Examples of org.terrier.structures.EntryStatistics


   
   
    for (int i = 0; i < queryLength; i++) {
     
      //get the entry statistics - perhaps this came from "far away"
      EntryStatistics entryStats = queryTerms.getStatistics(queryTermStrings[i]);
      //we seek the query term in the lexicon
      LexiconEntry lEntry = lexicon.getLexiconEntry(queryTermStrings[i]);
      if (entryStats == null)
        entryStats = lEntry;
     
      //and if it is not found, we continue with the next term
      if (lEntry==null)
      {
        //logger.info("Term Not Found: "+queryTermStrings[i]);
        continue;
      }
      queryTerms.setTermProperty(queryTermStrings[i], lEntry);
      logger.debug((i + 1) + ": " + queryTermStrings[i].trim() + " with " + entryStats.getDocumentFrequency()
          + " documents (TF is " + entryStats.getFrequency() + ").");
     
      //check if the IDF is very low.
      if (IGNORE_LOW_IDF_TERMS && collectionStatistics.getNumberOfDocuments() < lEntry.getFrequency()) {
        logger.debug("query term " + queryTermStrings[i] + " has low idf - ignored from scoring.");
        continue;
View Full Code Here


    {
      Map.Entry<String, LexiconEntry> termEntry    = queryTermsToMatchList.get(i);
      String               queryTerm    = termEntry.getKey();
      LexiconEntry           lexiconEntry = termEntry.getValue();
      //get the entry statistics - perhaps this came from "far away"
      EntryStatistics entryStats = queryTerms.getStatistics(queryTerm);
      //if none were provided with the query we seek the entry statistics query term in the lexicon
      if (entryStats == null)
      {
        entryStats = lexiconEntry;
        //save them as they may be useful for query expansion. HOWEVER ONLY IF we didnt
View Full Code Here

  /** Knows how to merge several EntryStatistics for a single effective term */
  public static EntryStatistics mergeStatistics(EntryStatistics[] entryStats)
  {
    if (entryStats == null)
      return null;
    EntryStatistics rtr = entryStats[0];
    for(int i=1;i<entryStats.length;i++)
      rtr.add(entryStats[i]);
    return rtr;
  }
View Full Code Here

   * @param wmodels WeightingModels for the synonym group (NOT one per member).
   * @throws IOException
   */
  public void addSingleTermAlternatives(String[] terms, String stringForm,  double weight, EntryStatistics[] entryStats, WeightingModel[] wmodels) throws IOException
  {   
    EntryStatistics joined = mergeStatistics(entryStats);
    addSingleTermAlternatives(terms, stringForm, weight, joined, wmodels);
  }
View Full Code Here

  public void setOriginalQueryTerms(MatchingQueryTerms query){
    String[] terms = query.getTerms();
    this.originalTermids.clear();
    for (int i=0; i<terms.length; i++)
      {
      EntryStatistics te = query.getStatistics(terms[i]);
      if (te != null) {
        this.originalTermids.put(te.getTermId(), terms[i]);
        this.originalTermFreqs.adjustOrPutValue(te.getTermId(), query.getTermWeight(terms[i]), query.getTermWeight(terms[i]));
      }
    }
  }
View Full Code Here

    assertEquals(1, mqt.length())
    assertEquals(1, mqt.getTerms().length)
    assertEquals(term, mqt.getTerms()[0]);
    assertEquals(1.0d, mqt.getTermWeight(term), 0.0d);
    assertNull(mqt.getStatistics(term));
    EntryStatistics e = new BasicLexiconEntry(2, 1, 100);
    mqt.setTermProperty(term, e);
    assertEquals(2, mqt.getStatistics(term).getTermId());
    assertEquals(1, mqt.getStatistics(term).getDocumentFrequency());
    assertEquals(100, mqt.getStatistics(term).getFrequency());
  }
View Full Code Here

    assertEquals(1, mqt.length())
    assertEquals(1, mqt.getTerms().length)
    assertEquals(term, mqt.getTerms()[0]);
    assertEquals(1.0d, mqt.getTermWeight(term), 0.0d);
    assertNull(mqt.getStatistics(term));
    EntryStatistics e = new BasicLexiconEntry(2, 1, 100);
    mqt.setTermProperty(term, e);
    assertEquals(2, mqt.getStatistics(term).getTermId());
    assertEquals(1, mqt.getStatistics(term).getDocumentFrequency());
    assertEquals(100, mqt.getStatistics(term).getFrequency());
  }
View Full Code Here

    assertEquals(1.0d, mqt.getTermWeight(term2), 0.0d);
   
    assertNull(mqt.getStatistics(term1));
    assertNull(mqt.getStatistics(term2));
       
    EntryStatistics e1 = new BasicLexiconEntry(2, 1, 100);
    EntryStatistics e2 = new BasicLexiconEntry(40, 100, 102);
    mqt.setTermProperty(term1, e1);
    mqt.setTermProperty(term2, e2);
   
    assertEquals(2, mqt.getStatistics(term1).getTermId());
    assertEquals(40, mqt.getStatistics(term2).getTermId());
View Full Code Here

    assertEquals(1, mqt.length())
    assertEquals(1, mqt.getTerms().length)
    assertEquals(term, mqt.getTerms()[0]);
    assertEquals(1.0d, mqt.getTermWeight(term), 0.0d);
    assertNull(mqt.getStatistics(term));
    EntryStatistics e = new BasicLexiconEntry(2, 1, 100);
    mqt.setTermProperty(term, e);
    assertEquals(2, mqt.getStatistics(term).getTermId());
    assertEquals(1, mqt.getStatistics(term).getDocumentFrequency());
    assertEquals(100, mqt.getStatistics(term).getFrequency());
  }
View Full Code Here

    assertEquals(1, mqt.length())
    assertEquals(1, mqt.getTerms().length)
    assertEquals(term, mqt.getTerms()[0]);
    assertEquals(1.0d, mqt.getTermWeight(term), 0.0d);
    assertNull(mqt.getStatistics(term));
    EntryStatistics e = new BasicLexiconEntry(2, 1, 100);
    mqt.setTermProperty(term, e);
    assertEquals(2, mqt.getStatistics(term).getTermId());
    assertEquals(1, mqt.getStatistics(term).getDocumentFrequency());
    assertEquals(100, mqt.getStatistics(term).getFrequency());
  }
View Full Code Here

TOP

Related Classes of org.terrier.structures.EntryStatistics

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.