Package org.apache.lucene.index

Examples of org.apache.lucene.index.TermContext


    BooleanQuery highFreq = new BooleanQuery(disableCoord);
    highFreq.setBoost(highFreqBoost);
    lowFreq.setBoost(lowFreqBoost);
    BooleanQuery query = new BooleanQuery(true);
    for (int i = 0; i < queryTerms.length; i++) {
      TermContext termContext = contextArray[i];
      if (termContext == null) {
        lowFreq.add(new TermQuery(queryTerms[i]), lowFreqOccur);
      } else {
        if ((maxTermFrequency >= 1f && termContext.docFreq() > maxTermFrequency)
            || (termContext.docFreq() > (int) Math.ceil(maxTermFrequency
                * (float) maxDoc))) {
          highFreq
              .add(new TermQuery(queryTerms[i], termContext), highFreqOccur);
        } else {
          lowFreq.add(new TermQuery(queryTerms[i], termContext), lowFreqOccur);
View Full Code Here


        // reader has no fields
        continue;
      }
      for (int i = 0; i < queryTerms.length; i++) {
        Term term = queryTerms[i];
        TermContext termContext = contextArray[i];
        final Terms terms = fields.terms(term.field());
        if (terms == null) {
          // field does not exist
          continue;
        }
        termsEnum = terms.iterator(termsEnum);
        assert termsEnum != null;
       
        if (termsEnum == TermsEnum.EMPTY) continue;
        if (termsEnum.seekExact(term.bytes(), false)) {
          if (termContext == null) {
            contextArray[i] = new TermContext(reader.getContext(),
                termsEnum.termState(), context.ord, termsEnum.docFreq(),
                termsEnum.totalTermFreq());
          } else {
            termContext.register(termsEnum.termState(), context.ord,
                termsEnum.docFreq(), termsEnum.totalTermFreq());
          }
         
        }
       
View Full Code Here

        Term term = ((TermQuery)clause.getQuery()).getTerm();
        long ord = dv.lookupTerm(term.bytes());
        if (ord >= 0) {
          boolean success = ords.add(ord);
          assert success; // no dups
          TermContext context = TermContext.build(reader.getContext(), term, true);
          SimWeight w = weight.similarity.computeWeight(1f,
                        searcher.collectionStatistics("field"),
                        searcher.termStatistics(term, context));
          w.getValueForNormalization(); // ignored
          w.normalize(1F, 1F);
View Full Code Here

  public Term getTerm() { return term; }

  @Override
  public Weight createWeight(IndexSearcher searcher) throws IOException {
    final IndexReaderContext context = searcher.getTopReaderContext();
    final TermContext termState;
    if (perReaderTermState == null || perReaderTermState.topReaderContext != context) {
      // make TermQuery single-pass if we don't have a PRTS or if the context differs!
      termState = TermContext.build(context, term);
    } else {
     // PRTS was pre-build for this IS
     termState = this.perReaderTermState;
    }

    // we must not ignore the given docFreq - if set use the given value (lie)
    if (docFreq != -1)
      termState.setDocFreq(docFreq);
   
    return new TermWeight(searcher, termState);
  }
View Full Code Here

    return true;
  }

  @Override
  public Spans getSpans(final AtomicReaderContext context, Bits acceptDocs, Map<Term,TermContext> termContexts) throws IOException {
    TermContext termContext = termContexts.get(term);
    final TermState state;
    if (termContext == null) {
      // this happens with span-not query, as it doesn't include the NOT side in extractTerms()
      // so we seek to the term now in this segment..., this sucks because its ugly mostly!
      final Fields fields = context.reader().fields();
      if (fields != null) {
        final Terms terms = fields.terms(term.field());
        if (terms != null) {
          final TermsEnum termsEnum = terms.iterator(null);
          if (termsEnum.seekExact(term.bytes())) {
            state = termsEnum.termState();
          } else {
            state = null;
          }
        } else {
          state = null;
        }
      } else {
        state = null;
      }
    } else {
      state = termContext.get(context.ord);
    }
   
    if (state == null) { // term is not present in that reader
      return TermSpans.EMPTY_TERM_SPANS;
    }
View Full Code Here

    query.extractTerms(terms);
    final IndexReaderContext context = searcher.getTopReaderContext();
    final TermStatistics termStats[] = new TermStatistics[terms.size()];
    int i = 0;
    for (Term term : terms) {
      TermContext state = TermContext.build(context, term);
      termStats[i] = searcher.termStatistics(term, state);
      termContexts.put(term, state);
      i++;
    }
    final String field = query.getField();
View Full Code Here

        array.termState[pos].register(state, readerContext.ord, termsEnum.docFreq(), termsEnum.totalTermFreq());
        assert array.boost[pos] == boostAtt.getBoost() : "boost should be equal in all segment TermsEnums";
      } else {
        // new entry: we populate the entry initially
        array.boost[e] = boostAtt.getBoost();
        array.termState[e] = new TermContext(topReaderContext, state, readerContext.ord, termsEnum.docFreq(), termsEnum.totalTermFreq());
        ScoringRewrite.this.checkMaxClauseCount(terms.size());
      }
      return true;
    }
View Full Code Here

     
      // compute idf
      ArrayList<TermStatistics> allTermStats = new ArrayList<>();
      for(final Term[] terms: termArrays) {
        for (Term term: terms) {
          TermContext termContext = termContexts.get(term);
          if (termContext == null) {
            termContext = TermContext.build(context, term);
            termContexts.put(term, termContext);
          }
          allTermStats.add(searcher.termStatistics(term, termContext));
View Full Code Here

       
        assert compareToLastTerm(null);

        // lazy init the initial ScoreTerm because comparator is not known on ctor:
        if (st == null)
          st = new ScoreTerm(this.termComp, new TermContext(topReaderContext));
        boostAtt = termsEnum.attributes().addAttribute(BoostAttribute.class);
      }
   
      // for assert:
      private BytesRef lastTerm;
      private boolean compareToLastTerm(BytesRef t) {
        if (lastTerm == null && t != null) {
          lastTerm = BytesRef.deepCopyOf(t);
        } else if (t == null) {
          lastTerm = null;
        } else {
          assert termsEnum.getComparator().compare(lastTerm, t) < 0: "lastTerm=" + lastTerm + " t=" + t;
          lastTerm.copyBytes(t);
        }
        return true;
      }
 
      @Override
      public boolean collect(BytesRef bytes) throws IOException {
        final float boost = boostAtt.getBoost();

        // make sure within a single seg we always collect
        // terms in order
        assert compareToLastTerm(bytes);

        //System.out.println("TTR.collect term=" + bytes.utf8ToString() + " boost=" + boost + " ord=" + readerContext.ord);
        // ignore uncompetitive hits
        if (stQueue.size() == maxSize) {
          final ScoreTerm t = stQueue.peek();
          if (boost < t.boost)
            return true;
          if (boost == t.boost && termComp.compare(bytes, t.bytes) > 0)
            return true;
        }
        ScoreTerm t = visitedTerms.get(bytes);
        final TermState state = termsEnum.termState();
        assert state != null;
        if (t != null) {
          // if the term is already in the PQ, only update docFreq of term in PQ
          assert t.boost == boost : "boost should be equal in all segment TermsEnums";
          t.termState.register(state, readerContext.ord, termsEnum.docFreq(), termsEnum.totalTermFreq());
        } else {
          // add new entry in PQ, we must clone the term, else it may get overwritten!
          st.bytes.copyBytes(bytes);
          st.boost = boost;
          visitedTerms.put(st.bytes, st);
          assert st.termState.docFreq() == 0;
          st.termState.register(state, readerContext.ord, termsEnum.docFreq(), termsEnum.totalTermFreq());
          stQueue.offer(st);
          // possibly drop entries from queue
          if (stQueue.size() > maxSize) {
            st = stQueue.poll();
            visitedTerms.remove(st.bytes);
            st.termState.clear(); // reset the termstate!
          } else {
            st = new ScoreTerm(termComp, new TermContext(topReaderContext));
          }
          assert stQueue.size() <= maxSize : "the PQ size must be limited to maxSize";
          // set maxBoostAtt with values to help FuzzyTermsEnum to optimize
          if (stQueue.size() == maxSize) {
            t = stQueue.peek();
View Full Code Here

      assert termState != null;
      if (pos < 0) {
        pos = (-pos)-1;
        array.termState[pos].register(termState, readerContext.ord, termsEnum.docFreq(), termsEnum.totalTermFreq());
      } else {
        array.termState[pos] = new TermContext(topReaderContext, termState, readerContext.ord, termsEnum.docFreq(), termsEnum.totalTermFreq());
      }
      return true;
    }
View Full Code Here

TOP

Related Classes of org.apache.lucene.index.TermContext

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.