Examples of TermEnum


Examples of org.apache.lucene.index.TermEnum

    r.close();
    d.close();
  }

  public void termEnumTest(IndexReader r) throws Exception {
    TermEnum terms = r.terms();

    assertNull(terms.term());
    assertFalse(terms.next());

  }
View Full Code Here

Examples of org.apache.lucene.index.TermEnum

      if (fields == null || fields.contains(fieldName)) {
        getTermsByFieldAndText().put(fieldName, new HashMap<String, InstantiatedTerm>(5000));
      }
    }
    List<InstantiatedTerm> terms = new ArrayList<InstantiatedTerm>(5000 * getTermsByFieldAndText().size());
    TermEnum termEnum = sourceIndexReader.terms();
    while (termEnum.next()) {
      if (fields == null || fields.contains(termEnum.term().field())) { // todo skipto if not using field
        InstantiatedTerm instantiatedTerm = new InstantiatedTerm(termEnum.term().field(), termEnum.term().text());
        getTermsByFieldAndText().get(termEnum.term().field()).put(termEnum.term().text(), instantiatedTerm);
        instantiatedTerm.setTermIndex(terms.size());
        terms.add(instantiatedTerm);
        instantiatedTerm.setAssociatedDocuments(new InstantiatedTermDocumentInformation[termEnum.docFreq()]);
      }
    }
    termEnum.close();
    orderedTerms = terms.toArray(new InstantiatedTerm[terms.size()]);

    // create term-document informations
    for (InstantiatedTerm term : orderedTerms) {
      TermPositions termPositions = sourceIndexReader.termPositions(term.getTerm());
View Full Code Here

Examples of org.apache.lucene.index.TermEnum

        LinkedList termsWithPrefix = new LinkedList();
        IndexReader ir = IndexReader.open(indexStore, true);

        // this TermEnum gives "piccadilly", "pie" and "pizza".
        String prefix = "pi";
        TermEnum te = ir.terms(new Term("body", prefix));
        do {
            if (te.term().text().startsWith(prefix))
            {
                termsWithPrefix.add(te.term());
            }
        } while (te.next());

        query1.add((Term[])termsWithPrefix.toArray(new Term[0]));
        assertEquals("body:\"blueberry (piccadilly pie pizza)\"", query1.toString());
        query2.add((Term[])termsWithPrefix.toArray(new Term[0]));
        assertEquals("body:\"strawberry (piccadilly pie pizza)\"", query2.toString());

        ScoreDoc[] result;
        result = searcher.search(query1, null, 1000).scoreDocs;
        assertEquals(2, result.length);
        result = searcher.search(query2, null, 1000).scoreDocs;
        assertEquals(0, result.length);

        // search for "blue* pizza":
        MultiPhraseQuery query3 = new MultiPhraseQuery();
        termsWithPrefix.clear();
        prefix = "blue";
        te = ir.terms(new Term("body", prefix));
        do {
            if (te.term().text().startsWith(prefix))
            {
                termsWithPrefix.add(te.term());
            }
        } while (te.next());
        query3.add((Term[])termsWithPrefix.toArray(new Term[0]));
        query3.add(new Term("body", "pizza"));

        result = searcher.search(query3, null, 1000).scoreDocs;
        assertEquals(2, result.length); // blueberry pizza, bluebird pizza
View Full Code Here

Examples of org.apache.lucene.index.TermEnum

   * @throws IOException
   */
  public int addStopWords(IndexReader reader, String fieldName, int maxDocFreq) throws IOException {
    HashSet<String> stopWords = new HashSet<String>();
    String internedFieldName = StringHelper.intern(fieldName);
    TermEnum te = reader.terms(new Term(fieldName));
    Term term = te.term();
    while (term != null) {
      if (term.field() != internedFieldName) {
        break;
      }
      if (te.docFreq() > maxDocFreq) {
        stopWords.add(term.text());
      }
      if (!te.next()) {
        break;
      }
      term = te.term();
    }
    stopWordsPerField.put(fieldName, stopWords);
   
    /* if the stopwords for a field are changed,
     * then saved streams for that field are erased.
View Full Code Here

Examples of org.apache.lucene.index.TermEnum

            }

            for (Iterator it = startTerms.iterator(); it.hasNext(); ) {
                Term startTerm = (Term) it.next();

                TermEnum terms = reader.terms(startTerm);
                try {
                    TermDocs docs = reader.termDocs();
                    try {
                        do {
                            Term term = terms.term();
                            if (term != null
                                    && term.field() == testField
                                    && term.text().startsWith(namePrefix)) {
                                if (checkLower) {
                                    int compare = termCompare(term.text(), lowerTerm.text(), propNameLength);
                                    if (compare > 0 || compare == 0 && inclusive) {
                                        // do not check lower term anymore if no
                                        // transformation is done on the term enum
                                        checkLower = transform == TRANSFORM_NONE ? false : true;
                                    } else {
                                        // continue with next term
                                        continue;
                                    }
                                }
                                if (upperTerm != null) {
                                    int compare = termCompare(term.text(), upperTerm.text(), propNameLength);
                                    // if beyond the upper term, or is exclusive and
                                    // this is equal to the upper term
                                    if ((compare > 0) || (!inclusive && compare == 0)) {
                                        // only break out if no transformation
                                        // was done on the term from the enum
                                        if (transform == TRANSFORM_NONE) {
                                            break;
                                        } else {
                                            // because of the transformation
                                            // it is possible that the next
                                            // term will be included again if
                                            // we still enumerate on the same
                                            // property name
                                            if (term.text().startsWith(namePrefix)) {
                                                continue;
                                            } else {
                                                break;
                                            }
                                        }
                                    }
                                }

                                docs.seek(terms);
                                while (docs.next()) {
                                    hits.set(docs.doc());
                                }
                            } else {
                                break;
                            }
                        } while(terms.next());
                    } finally {
                        docs.close();
                    }
                } finally {
                    terms.close();
                }
            }

            hitsCalculated = true;
            // put to cache
View Full Code Here

Examples of org.apache.lucene.index.TermEnum

         */
        private void calculateHits() throws IOException {
            if (hitsCalculated) {
                return;
            }
            TermEnum terms = new WildcardTermEnum(reader, field, propName, pattern, transform);
            try {
                // use unpositioned TermDocs
                TermDocs docs = reader.termDocs();
                try {
                    while (terms.term() != null) {
                        docs.seek(terms);
                        while (docs.next()) {
                            hits.set(docs.doc());
                        }
                        if (!terms.next()) {
                            break;
                        }
                    }
                } finally {
                    docs.close();
                }
            } finally {
                terms.close();
            }
            hitsCalculated = true;
            // put to cache
            synchronized (resultMap) {
                resultMap.put(cacheKey, hits);
View Full Code Here

Examples of org.apache.lucene.index.TermEnum

     *                     the search index.
     */
    private void calculateDocFilter() throws IOException {
        docFilter = new BitSet(reader.maxDoc());
        // we match all terms
        TermEnum terms = reader.terms(new Term(FieldNames.PROPERTIES, field));
        try {
            TermDocs docs = reader.termDocs();
            try {
                while (terms.term() != null
                        && terms.term().field() == FieldNames.PROPERTIES
                        && terms.term().text().startsWith(field)) {
                    docs.seek(terms);
                    while (docs.next()) {
                        docFilter.set(docs.doc());
                    }
                    terms.next();
                }
            } finally {
                docs.close();
            }
        } finally {
            terms.close();
        }
    }
View Full Code Here

Examples of org.apache.lucene.index.TermEnum

   
    FieldType ftleft=schema.getFieldType(fieldLeft);
   
    String prefixLeft=TrieField.getMainValuePrefix(ftleft);
    Term tiLeft=new Term(fieldLeft, prefixLeft==null?"":prefixLeft);
    TermEnum teLeft = null;
    TermDocs tdleft=null;
    if(this.leftreader!=null)
    {
      this.joinRevert=new int[this.leftreader.maxDoc()+1];
      tdleft=this.leftreader.termDocs();
      teLeft=this.leftreader.terms(tiLeft);
    }else{
      this.joinRevert=new int[readerleft.getReader().maxDoc()+1];
      teLeft=readerleft.getReader().terms(tiLeft);
      tdleft=readerleft.getReader().termDocs();
    }
   
    for(int i=0;i<this.joinRevert.length;i++)
    {
      this.joinRevert[i]=-1;
    }
   

   
    FieldType ftright =readerright.getSchema().getFieldType(fieldRigth);
    String prefixRight=TrieField.getMainValuePrefix(ftright);
    Term tiRight=new Term(fieldRigth, prefixRight==null?"":prefixRight);
   
    TermEnum teRight = readerright.getReader().terms(tiRight.createTerm(prefixRight==null?"":prefixRight));
    TermDocs tdRight=readerright.getReader().termDocs();
    this.join=new int[readerright.getReader().maxDoc()+1][];
    for(int i=0;i<this.join.length;i++)
    {
      this.join[i]=null;
    }
   
    int[] docs = new int[1000];
    int[] freqs = new int[1000];
   
   
    int debugline=0;
   
    HashMap<Integer,HashSet<Integer>> join_tmp=new HashMap<Integer,HashSet<Integer>>();
//    HashMap<Integer,Integer> joinRevert_tmp=new HashMap<Integer,Integer>();
    for (;;) {
      Term tleft= teLeft.term();
      Term tRight=teRight.term();
     
     
      if (tleft == null||tRight==null) {
        LOG.info("###termbreak###"+String.valueOf(tleft)+">>>>"+String.valueOf(tRight)+","+fieldLeft+","+fieldRigth);
        break;
      }
      if((!tleft.field().equals(fieldLeft))||(!tRight.field().equals(fieldRigth)))
      {
        LOG.info("###termbreak fieldchange###"+String.valueOf(tleft)+">>>>"+String.valueOf(tRight)+","+fieldLeft+","+fieldRigth);
        break;
      }
     
      String tvleft=ftleft.indexedToReadable(tleft.text());
      String tvRight=ftright.indexedToReadable(tRight.text());


      if(tvleft.equals(tvRight))
      {
        if(debugline++<10)
        {
          LOG.info("###termok###"+String.valueOf(tvleft)+">>>>"+String.valueOf(tvRight)+","+fieldLeft+","+fieldRigth);
        }

        if(tvleft!=null&&!tvleft.trim().isEmpty())
        {
          IntArr LeftArr=this.getListArr(teLeft, tdleft, docs, freqs,Integer.MAX_VALUE);
          IntArr RightArr=this.getListArr(teRight, tdRight, docs, freqs,1);
 
          for (Integer docid:RightArr.list) {
            HashSet<Integer> list=join_tmp.get(docid);
            if(list==null)
            {
              list=new HashSet<Integer>();
              join_tmp.put(docid, list);
            }
            for(int jid:LeftArr.list)
            {
  //            joinRevert_tmp.put(jid, docid);
              this.joinRevert[jid]=docid;
              list.add(jid);
            }
          }
        }else{
          LOG.info("###empty###"+String.valueOf(tvleft)+">>>>"+String.valueOf(tvRight)+","+fieldLeft+","+fieldRigth);
        }
       
       
        teLeft.next();
        teRight.next();
      }else if(tvleft.compareTo(tvRight)>0)
      {
        teRight.next();
      }else{
        teLeft.next();
      }
    }
   
    teLeft.close();
    teRight.close();
       
    for(Entry<Integer, HashSet<Integer>> e:join_tmp.entrySet())
    {
      this.join[e.getKey()]=IntArr.parse(e.getValue()).list;
    }
View Full Code Here

Examples of org.apache.lucene.index.TermEnum

   
    indexInfo.add("numDocs", reader.numDocs());
    indexInfo.add("maxDoc", reader.maxDoc());
   
    if( countTerms ) {
      TermEnum te = null;
      try{
        te = reader.terms();
        int numTerms = 0;
        while (te.next()) {
          numTerms++;
        }
        indexInfo.add("numTerms", numTerms );
      }
      finally{
        if( te != null ) te.close();
      }
    }

    indexInfo.add("version", reader.getVersion())// TODO? Is this different then: IndexReader.getCurrentVersion( dir )?
    indexInfo.add("segmentCount", reader.getSequentialSubReaders().length);
View Full Code Here

Examples of org.apache.lucene.index.TermEnum

  private static Map<String,TopTermQueue> getTopTerms( IndexReader reader, Set<String> fields, int numTerms, Set<String> junkWords ) throws Exception
  {
    Map<String,TopTermQueue> info = new HashMap<String, TopTermQueue>();
   
    TermEnum terms = null;
    try{
      terms = reader.terms();   
      while (terms.next()) {
        String field = terms.term().field();
        String t = terms.term().text();
 
        // Compute distinct terms for every field
        TopTermQueue tiq = info.get( field );
        if( tiq == null ) {
          tiq = new TopTermQueue( numTerms+1 );
          info.put( field, tiq );
        }
        tiq.distinctTerms++;
        tiq.histogram.add( terms.docFreq() )// add the term to the histogram
       
        // Only save the distinct terms for fields we worry about
        if (fields != null && fields.size() > 0) {
          if( !fields.contains( field ) ) {
            continue;
          }
        }
        if( junkWords != null && junkWords.contains( t ) ) {
          continue;
        }
       
        if( terms.docFreq() > tiq.minFreq ) {
          tiq.add(new TopTermQueue.TermInfo(terms.term(), terms.docFreq()));
            if (tiq.size() > numTerms) { // if tiq full
            tiq.pop(); // remove lowest in tiq
            tiq.minFreq = ((TopTermQueue.TermInfo)tiq.top()).docFreq; // reset minFreq
          }
        }
      }
    }
    finally {
      if( terms != null ) terms.close();
    }
    return info;
  }
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.