Package org.apache.lucene.index

Examples of org.apache.lucene.index.TermEnum.docFreq()


    TermEnum terms = reader.terms();
     
    int minFreq = 0;
    while (terms.next()) {
      if (terms.docFreq() > minFreq) {
        tiq.put(new TermFreq(terms.term(), terms.docFreq()));
        if (tiq.size() >= count) {                 // if tiq overfull
          tiq.pop();                              // remove lowest in tiq
          minFreq = ((TermFreq)tiq.top()).docFreq; // reset minFreq
        }
      }
View Full Code Here


            FuzzyTermEnum fe=new FuzzyTermEnum(reader,startTerm,f.minSimilarity,f.prefixLength);
            TermEnum origEnum = reader.terms(startTerm);
            int df=0;
            if(startTerm.equals(origEnum.term()))
            {
                df=origEnum.docFreq(); //store the df so all variants use same idf
            }
            int numVariants=0;
            int totalVariantDocFreqs=0;
            do
            {
View Full Code Here

      hashInt = 0;
      while(tnum.next()){
        t = tnum.term();
        term = t.text();
       
        tfreq = tnum.docFreq()// get DF for the term
       
        // skip rare terms
        if(tfreq < 3){
          continue;
        }
View Full Code Here

      tcount = 0;
      while(tnum.next()){
        t = tnum.term();
        term = t.text();
       
        tfreq = tnum.docFreq()// get DF for the term
       
        // skip rare terms
        if(tfreq < 3){
          continue;
        }
View Full Code Here

      if (fields == null || fields.contains(termEnum.term().field())) { // todo skipto if not using field
        InstantiatedTerm instantiatedTerm = new InstantiatedTerm(termEnum.term().field(), termEnum.term().text());
        getTermsByFieldAndText().get(termEnum.term().field()).put(termEnum.term().text(), instantiatedTerm);
        instantiatedTerm.setTermIndex(terms.size());
        terms.add(instantiatedTerm);
        instantiatedTerm.setAssociatedDocuments(new InstantiatedTermDocumentInformation[termEnum.docFreq()]);
      }
    }
    termEnum.close();
    orderedTerms = terms.toArray(new InstantiatedTerm[terms.size()]);
View Full Code Here

    TermEnum terms = reader.terms();

    if (field != null) {
      while (terms.next()) {
        if (terms.term().field().equals(field)) {
          tiq.insert(new TermInfo(terms.term(), terms.docFreq()));
        }
      }
    }
    else {
      while (terms.next()) {
View Full Code Here

        }
      }
    }
    else {
      while (terms.next()) {
        tiq.insert(new TermInfo(terms.term(), terms.docFreq()));
      }
    }
    while (tiq.size() != 0) {
      TermInfo termInfo = (TermInfo) tiq.pop();
      System.out.println(termInfo.term + " " + termInfo.docFreq);
View Full Code Here

    do {
      Term term = te.term();
      if (term == null || !term.field().equals(field)) {
        break;
      }
      int df = te.docFreq();
      if (df < minDf || df > percent) {
        continue;
      }
      TermEntry entry = new TermEntry(term.text(), count++, df);
      termEntries.put(entry.getTerm(), entry);
View Full Code Here

        break;
      }
      assertTrue(testTermEnum.next());

      assertEquals(aprioriTermEnum.term(), testTermEnum.term());
      assertTrue(aprioriTermEnum.docFreq() == testTermEnum.docFreq());

      // compare termDocs seeking

      TermDocs aprioriTermDocsSeeker = aprioriReader.termDocs(aprioriTermEnum.term());
      TermDocs testTermDocsSeeker = testReader.termDocs(testTermEnum.term());
View Full Code Here

    {
      Term currTerm=te.term();
     
      while((currTerm!=null)&&(currTerm.field()==startTerm.field())) //term fieldnames are interned
      {
        if(te.docFreq()>1)
        {
          int lastDoc=-1;
          //unset potential duplicates
          TermDocs td = reader.termDocs(currTerm);
          td.next();
View Full Code Here

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.