Package org.apache.lucene.index

Examples of org.apache.lucene.index.TermEnum


    private int getDocFreq(String term) {
      int result = 1;
      currentTerm = currentTerm.createTerm(term);
      try {
        final TermEnum termEnum = reader.terms(currentTerm);
        if (termEnum != null && termEnum.term().equals(currentTerm)) {
          result = termEnum.docFreq();
        }
      } catch (IOException e) {
        throw new RuntimeException(e);
      }
      return result;
View Full Code Here


  PrefixGenerator(Term prefix) {
    this.prefix = prefix;
  }

  public void generate(IndexReader reader) throws IOException {
    TermEnum enumerator = reader.terms(prefix);
    TermDocs termDocs = reader.termDocs();

    try {

      String prefixText = prefix.text();
      String prefixField = prefix.field();
      do {
        Term term = enumerator.term();
        if (term != null &&
            term.text().startsWith(prefixText) &&
            term.field() == prefixField)
        {
          termDocs.seek(term);
          while (termDocs.next()) {
            handleDoc(termDocs.doc());
          }
        } else {
          break;
        }
      } while (enumerator.next());
    } finally {
      termDocs.close();
      enumerator.close();
    }
  }
View Full Code Here

  WildcardGenerator(Term wildcard) {
    this.wildcard = wildcard;
  }

  public void generate(IndexReader reader) throws IOException {
    TermEnum enumerator = new WildcardTermEnum(reader, wildcard);
    TermDocs termDocs = reader.termDocs();
    try {
      do {
        Term term = enumerator.term();
        if (term==null) break;
        termDocs.seek(term);
        while (termDocs.next()) {
          handleDoc(termDocs.doc());
        }
      } while (enumerator.next());
    } finally {
      termDocs.close();
      enumerator.close();
    }
  }
View Full Code Here

        }
      }
      final int ix = i;
      final int jx = j;
 
      return new TermEnum() {
 
        private int srtTermsIdx = ix; // index into info.sortedTerms
        private int srtFldsIdx = jx; // index into sortedFields
         
        @Override
View Full Code Here

    int notFoundCount=0;
    int otherErrors=0;

    TermDocs termDocs = null;
    Term protoTerm = new Term(idName, "");
    TermEnum termEnum = null;
    // Number of times to try termEnum.next() before resorting to skip
    int numTimesNext = 10;

    char delimiter='=';
    String termVal;
    boolean hasNext=true;
    String prevKey="";

    String lastVal="\uFFFF\uFFFF\uFFFF\uFFFF\uFFFF\uFFFF\uFFFF\uFFFF";

    try {
      termDocs = reader.termDocs();
      termEnum = reader.terms(protoTerm);
      Term t = termEnum.term();
      if (t != null && t.field() == idName) { // intern'd comparison
        termVal = t.text();
      } else {
        termVal = lastVal;
      }


      for (String line; (line=r.readLine())!=null;) {
        int delimIndex = line.lastIndexOf(delimiter);
        if (delimIndex < 0) continue;

        int endIndex = line.length();
        /* EOLs should already be removed for BufferedReader.readLine()
        for(int endIndex = line.length();endIndex>delimIndex+1; endIndex--) {
          char ch = line.charAt(endIndex-1);
          if (ch!='\n' && ch!='\r') break;
        }
        */
        String key = line.substring(0, delimIndex);
        String val = line.substring(delimIndex+1, endIndex);

        String internalKey = idType.toInternal(key);
        float fval;
        try {
          fval=Float.parseFloat(val);
        } catch (Exception e) {
          if (++otherErrors<=10) {
            SolrCore.log.error( "Error loading external value source + fileName + " + e
              + (otherErrors<10 ? "" : "\tSkipping future errors for this file.")                   
            );
          }
          continue// go to next line in file.. leave values as default.
        }

        if (sorted) {
          // make sure this key is greater than the previous key
          sorted = internalKey.compareTo(prevKey) >= 0;
          prevKey = internalKey;

          if (sorted) {
            int countNext = 0;
            for(;;) {
              int cmp = internalKey.compareTo(termVal);
              if (cmp == 0) {
                termDocs.seek(termEnum);
                while (termDocs.next()) {
                  vals[termDocs.doc()] = fval;
                }
                break;
              } else if (cmp < 0) {
                // term enum has already advanced past current key... we didn't find it.
                if (notFoundCount<10) {  // collect first 10 not found for logging
                  notFound.add(key);
                }
                notFoundCount++;
                break;
              } else {
                // termEnum is less than our current key, so skip ahead

                // try next() a few times to see if we hit or pass the target.
                // Lucene's termEnum.skipTo() is currently unoptimized (it just does next())
                // so the best thing is to simply ask the reader for a new termEnum(target)
                // if we really need to skip.
                if (++countNext > numTimesNext) {
                  termEnum = reader.terms(protoTerm.createTerm(internalKey));
                  t = termEnum.term();
                } else {
                  hasNext = termEnum.next();
                  t = hasNext ? termEnum.term() : null;
                }

                if (t != null && t.field() == idName) { // intern'd comparison
                  termVal = t.text();
                } else {
                  termVal = lastVal;
                }
              }
            } // end for(;;)
          }
        }

        if (!sorted) {
          termEnum = reader.terms(protoTerm.createTerm(internalKey));
          t = termEnum.term();
          if (t != null && t.field() == idName  // intern'd comparison
                  && internalKey.equals(t.text()))
          {
            termDocs.seek (termEnum);
            while (termDocs.next()) {
              vals[termDocs.doc()] = fval;
            }
          } else {
            if (notFoundCount<10) {  // collect first 10 not found for logging
              notFound.add(key);
            }
            notFoundCount++;
          }
        }
      }
    } catch (IOException e) {
      // log, use defaults
      SolrCore.log.error("Error loading external value source: " +e);
    } finally {
      // swallow exceptions on close so we don't override any
      // exceptions that happened in the loop
      if (termDocs!=null) try{termDocs.close();}catch(Exception e){}
      if (termEnum!=null) try{termEnum.close();}catch(Exception e){}
      try{r.close();}catch(Exception e){}
    }

    SolrCore.log.info("Loaded external value source " + fname
      + (notFoundCount==0 ? "" : " :"+notFoundCount+" missing keys "+notFound)
View Full Code Here

  }

  protected float calculateWeight(Term term, IndexReader reader) throws IOException {
    //if a term is not in the index, then it's weight is 0
    TermEnum termEnum = reader.terms(term);
    if (termEnum != null && termEnum.term() != null && termEnum.term().equals(term)) {
      return 1.0f / termEnum.docFreq();
    } else {
      log.warn("Couldn't find doc freq for term {}", term);
      return 0;
    }
View Full Code Here

   * @throws IOException
   */
  public static void dumpTerms(File indexDir, String field, PrintWriter out) throws IOException {
    Directory dir = FSDirectory.open(indexDir);
    IndexReader reader = IndexReader.open(dir, true);
    TermEnum te = reader.terms(new Term(field, ""));
    do {
      Term term = te.term();
      if (term == null || term.field().equals(field) == false) {
        break;
      }
      out.printf("%s %d\n", term.text(), te.docFreq());
    } while (te.next());
    te.close();
  }
View Full Code Here

    log.info("Loaded " + categories.size() + " categories from index");
  }
 
  /** populate the list of categories by reading the values from the categoryField in the index */
  protected void scanCategories() throws IOException {
    TermEnum te = indexReader.terms(new Term(categoryFieldName));
    final Set<String> c = categories;
   
    do {
      if (!te.term().field().equals(categoryFieldName)) break;
      c.add(te.term().text());
    } while (te.next());
   
    log.info("Scanned " + c.size() + " categories from index");
  }
View Full Code Here

  }
 
  public static void dumpTags(File file, String field, long maxDocs) throws IOException {
    Directory dir = FSDirectory.open(file);
    IndexReader reader = IndexReader.open(dir, true);
    TermEnum te = reader.terms(new Term(field, ""));
    do {
      Term term = te.term();
      if (term == null || term.field().equals(field) == false) {
        break;
      }
      System.err.printf("%s %d\n", term.text(), te.docFreq());
    } while (te.next());
    te.close();
  }
View Full Code Here

  public static void emitTextForTags(File file, File output) throws IOException {
    String field = "tag";
   
    Directory dir = FSDirectory.open(file);
    IndexReader reader = IndexReader.open(dir, true);
    TermEnum te = reader.terms(new Term(field, ""));
    StringBuilder buf = new StringBuilder();
    do {
      Term term = te.term();
      if (term == null || term.field().equals(field) == false) {
        break;
      }
     
      if (te.docFreq() > 30) {
        File f = new File(output, term.text() + ".txt");
        PrintWriter pw = new PrintWriter(new FileWriter(f));
        System.err.printf("%s %d\n", term.text(), te.docFreq());
       
        TermDocs td = reader.termDocs(term);
        while (td.next()) {
          int doc = td.doc();
          buf.setLength(0);
          appendVectorTerms(buf, reader.getTermFreqVector(doc, "description-clustering"));
          appendVectorTerms(buf, reader.getTermFreqVector(doc, "extended-clustering"));
          emitTagDoc(term, pw, buf);
        }
       
        pw.close();
      }
    } while (te.next());
    te.close();
  }
View Full Code Here

TOP

Related Classes of org.apache.lucene.index.TermEnum

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.