Package org.apache.lucene.index

Examples of org.apache.lucene.index.TermsEnum


        return a.freq < b.freq;
      }
     
    };
    try {
      TermsEnum iterator = terms.iterator(null);
      while (iterator.next() != null) {
        if (highFreqQueue.size() < 5) {
          highFreqQueue.add(new TermAndFreq(
              BytesRef.deepCopyOf(iterator.term()), iterator.docFreq()));
          lowFreqQueue.add(new TermAndFreq(
              BytesRef.deepCopyOf(iterator.term()), iterator.docFreq()));
        } else {
          if (highFreqQueue.top().freq < iterator.docFreq()) {
            highFreqQueue.top().freq = iterator.docFreq();
            highFreqQueue.top().term = BytesRef.deepCopyOf(iterator.term());
            highFreqQueue.updateTop();
          }
         
          if (lowFreqQueue.top().freq > iterator.docFreq()) {
            lowFreqQueue.top().freq = iterator.docFreq();
            lowFreqQueue.top().term = BytesRef.deepCopyOf(iterator.term());
            lowFreqQueue.updateTop();
          }
        }
      }
      int lowFreq = lowFreqQueue.top().freq;
View Full Code Here


      }
      Terms terms = fields.terms(fieldName);
      if (terms == null) {
        continue;
      }
      TermsEnum termsEnum = terms.iterator(null);
      DocsEnum docs = null;
      while(termsEnum.next() != null) {
        docs = _TestUtil.docs(random(), termsEnum, MultiFields.getLiveDocs(reader), docs, DocsEnum.FLAG_FREQS);
        while(docs.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
          totalTokenCount2 += docs.freq();
        }
      }
View Full Code Here

  public static DocsEnum docs(Random random, IndexReader r, String field, BytesRef term, Bits liveDocs, DocsEnum reuse, int flags) throws IOException {
    final Terms terms = MultiFields.getTerms(r, field);
    if (terms == null) {
      return null;
    }
    final TermsEnum termsEnum = terms.iterator(null);
    if (!termsEnum.seekExact(term)) {
      return null;
    }
    return docs(random, termsEnum, liveDocs, reuse, flags);
  }
View Full Code Here

        for (String field : termVectorsToFetch) {
          Terms terms = fds.terms(field);
          if (terms == null) {
            continue;
          }
          TermsEnum termsEnum = terms.iterator(null);
          BytesRef text;
          DocsAndPositionsEnum docsAndPositions = null;
          List<BoboTerm> boboTermList = new ArrayList<BoboTerm>();
          while ((text = termsEnum.next()) != null) {
            BoboTerm boboTerm = new BoboTerm();
            boboTerm.term = text.utf8ToString();
            boboTerm.freq = (int) termsEnum.totalTermFreq();
            docsAndPositions = termsEnum.docsAndPositions(null, docsAndPositions);
            if (docsAndPositions != null) {
              docsAndPositions.nextDoc();
              boboTerm.positions = new ArrayList<Integer>();
              boboTerm.startOffsets = new ArrayList<Integer>();
              boboTerm.endOffsets = new ArrayList<Integer>();
View Full Code Here

    int doc = -1;
    DirectoryReader reader = readerManager.acquire();
    try {
      final BytesRef catTerm = new BytesRef(categoryPath.toString(delimiter));
      TermsEnum termsEnum = null; // reuse
      DocsEnum docs = null; // reuse
      for (AtomicReaderContext ctx : reader.leaves()) {
        Terms terms = ctx.reader().terms(Consts.FULL);
        if (terms != null) {
          termsEnum = terms.iterator(termsEnum);
          if (termsEnum.seekExact(catTerm)) {
            // liveDocs=null because the taxonomy has no deletes
            docs = termsEnum.docs(null, docs, 0 /* freqs not required */);
            // if the term was found, we know it has exactly one document.
            doc = docs.nextDoc() + ctx.docBase;
            break;
          }
        }
View Full Code Here

    initReaderManager();

    boolean aborted = false;
    DirectoryReader reader = readerManager.acquire();
    try {
      TermsEnum termsEnum = null;
      DocsEnum docsEnum = null;
      for (AtomicReaderContext ctx : reader.leaves()) {
        Terms terms = ctx.reader().terms(Consts.FULL);
        if (terms != null) { // cannot really happen, but be on the safe side
          termsEnum = terms.iterator(termsEnum);
          while (termsEnum.next() != null) {
            if (!cache.isFull()) {
              BytesRef t = termsEnum.term();
              // Since we guarantee uniqueness of categories, each term has exactly
              // one document. Also, since we do not allow removing categories (and
              // hence documents), there are no deletions in the index. Therefore, it
              // is sufficient to call next(), and then doc(), exactly once with no
              // 'validation' checks.
              CategoryPath cp = new CategoryPath(t.utf8ToString(), delimiter);
              docsEnum = termsEnum.docs(null, docsEnum, DocsEnum.FLAG_NONE);
              boolean res = cache.put(cp, docsEnum.nextDoc() + ctx.docBase);
              assert !res : "entries should not have been evicted from the cache";
            } else {
              // the cache is full and the next put() will evict entries from it, therefore abort the iteration.
              aborted = true;
View Full Code Here

    try {
      final int size = r.numDocs();
      final OrdinalMap ordinalMap = map;
      ordinalMap.setSize(size);
      int base = 0;
      TermsEnum te = null;
      DocsEnum docs = null;
      for (final AtomicReaderContext ctx : r.leaves()) {
        final AtomicReader ar = ctx.reader();
        final Terms terms = ar.terms(Consts.FULL);
        te = terms.iterator(te);
        while (te.next() != null) {
          String value = te.term().utf8ToString();
          CategoryPath cp = new CategoryPath(value, delimiter);
          final int ordinal = addCategory(cp);
          docs = te.docs(null, docs, DocsEnum.FLAG_NONE);
          ordinalMap.addMapping(docs.nextDoc() + base, ordinal);
        }
        base += ar.maxDoc(); // no deletions, so we're ok
      }
      ordinalMap.addDone();
View Full Code Here

    int docID = -1;
    int valId = 0;

    Terms terms = reader.terms(field);
    if (terms != null) {
      TermsEnum termsEnum = terms.iterator(null);
      BytesRef text;
      while ((text = termsEnum.next()) != null) {
        String strText = text.utf8ToString();
        String val = null;
        int weight = 0;
        String[] split = strText.split("\u0000");
        if (split.length > 1) {
View Full Code Here

    int ret = 0;
    Terms terms = reader.terms(field);
    if (terms == null) {
      return ret;
    }
    TermsEnum termsEnum = terms.iterator(null);
    BytesRef text;
    while ((text = termsEnum.next()) != null) {
      if (!text.utf8ToString().startsWith("-")) {
        break;
      }
      ret++;
    }
View Full Code Here

    maxIDList.add(-1);
    freqList.add(0);
    int totalFreq = 0;
    Terms terms = reader.terms(field);
    if (terms != null) {
      TermsEnum termsEnum = terms.iterator(null);
      BytesRef text;
      while ((text = termsEnum.next()) != null) {
        // store term text
        // we expect that there is at most one term per document
        if (t >= length) throw new RuntimeException("there are more terms than "
            + "documents in field \"" + field + "\", but it's impossible to sort on "
            + "tokenized fields");
View Full Code Here

TOP

Related Classes of org.apache.lucene.index.TermsEnum

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.