Package org.apache.lucene.index

Examples of org.apache.lucene.index.TermsEnum.docFreq()


            highFreqQueue.top().freq = iterator.docFreq();
            highFreqQueue.top().term = BytesRef.deepCopyOf(iterator.term());
            highFreqQueue.updateTop();
          }
         
          if (lowFreqQueue.top().freq > iterator.docFreq()) {
            lowFreqQueue.top().freq = iterator.docFreq();
            lowFreqQueue.top().term = BytesRef.deepCopyOf(iterator.term());
            lowFreqQueue.updateTop();
          }
        }
View Full Code Here


            highFreqQueue.top().term = BytesRef.deepCopyOf(iterator.term());
            highFreqQueue.updateTop();
          }
         
          if (lowFreqQueue.top().freq > iterator.docFreq()) {
            lowFreqQueue.top().freq = iterator.docFreq();
            lowFreqQueue.top().term = BytesRef.deepCopyOf(iterator.term());
            lowFreqQueue.updateTop();
          }
        }
      }
View Full Code Here

       
        if (termsEnum == TermsEnum.EMPTY) continue;
        if (termsEnum.seekExact(term.bytes())) {
          if (termContext == null) {
            contextArray[i] = new TermContext(reader.getContext(),
                termsEnum.termState(), context.ord, termsEnum.docFreq(),
                termsEnum.totalTermFreq());
          } else {
            termContext.register(termsEnum.termState(), context.ord,
                termsEnum.docFreq(), termsEnum.totalTermFreq());
          }
View Full Code Here

            contextArray[i] = new TermContext(reader.getContext(),
                termsEnum.termState(), context.ord, termsEnum.docFreq(),
                termsEnum.totalTermFreq());
          } else {
            termContext.register(termsEnum.termState(), context.ord,
                termsEnum.docFreq(), termsEnum.totalTermFreq());
          }
         
        }
       
      }
View Full Code Here

      public int intVal(int doc)
      {
        try {
          terms.get(doc, ref);
          if (termsEnum.seekExact(ref)) {
            return termsEnum.docFreq();
          } else {
            return 0;
          }
        }
        catch (IOException e) {
View Full Code Here

      while((term = termsEnum.next()) != null) {
        BytesRef term2 = termsEnum2.next();
        assertNotNull(term2);
        assertEquals(term, term2);
        assertEquals(termsEnum.docFreq(), termsEnum2.docFreq());
        assertEquals(termsEnum.totalTermFreq(), termsEnum2.totalTermFreq());

        if (ord == 0) {
          try {
            termsEnum.ord();
View Full Code Here

      int threshold = ir.maxDoc() / 10; // ignore words too common.
      Terms terms = MultiFields.getTerms(ir, field);
      if (terms != null) {
        TermsEnum termsEnum = terms.iterator(null);
        while (termsEnum.next() != null) {
          int df = termsEnum.docFreq();
          if (df<threshold) {
            String ttxt = termsEnum.term().utf8ToString();
            pq.insertWithOverflow(new TermDf(ttxt,df));
          }
        }
View Full Code Here

    //Should we use a linked hash map so that we know terms are in order?
    termEntries = Maps.newLinkedHashMap();
    int count = 0;
    BytesRef text;
    while ((text = te.next()) != null) {
      int df = te.docFreq();
      if (df >= minDf && df <= percent) {
        TermEntry entry = new TermEntry(text.utf8ToString(), count++, df);
        termEntries.put(entry.getTerm(), entry);
      }
    }
View Full Code Here

    BytesRef bytes = new BytesRef();
    while ( (bytes = termsEnum.next()) != null) {
      byte[] buf = new byte[bytes.length];
      System.arraycopy(bytes.bytes, 0, buf, 0, bytes.length);
      String term = new String(buf, "UTF-8");
      int df = termsEnum.docFreq();
      long cf = termsEnum.totalTermFreq();

      if ( df < min) {
        skippedTerms++;
        missingCnt += cf;
View Full Code Here

                        if (terms != null) {
                            TermsEnum termsEnum = terms.iterator(null);
                            BytesRef text;
                            while ((text = termsEnum.next()) != null) {
                                // skip invalid terms
                                if (termsEnum.docFreq() < 1) {
                                    continue;
                                }
                                if (termsEnum.totalTermFreq() < 1) {
                                    continue;
                                }
View Full Code Here

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.