Examples of TermEnum

org.apache.lucene.index.TermEnum
Abstract class for enumerating terms.
Term enumerations are always ordered by Term.compareTo(). Each term in the enumeration is greater than all that precede it.
uk.ac.ucl.panda.utility.structure.TermEnum
Abstract class for enumerating terms.
Term enumerations are always ordered by Term.compareTo(). Each term in the enumeration is greater than all that precede it.

Examples of org.apache.lucene.index.TermEnum

   */
  public static void main(String[] args) throws Exception {
    addIndex();
    IndexReader reader = IndexReader.open(lucenePath);
    System.out.println("�ĵ���:"+reader.numDocs());
    TermEnum tes = reader.terms();
    while(tes.next()){
      Term t = tes.term();
      System.out.println(t.toString());
    }
    //IndexSearcher searcher = new IndexSearcher(lucenePath);
  }

View Full Code Here

Examples of org.apache.lucene.index.TermEnum

    fieldNameArr = null;


    // Read the terms
    if (!fieldsToReadSet.isEmpty()) {
      try {
        TermEnum termEnum = indexReader.terms();
        while (termEnum.next()) {
          Term term = termEnum.term();
          String field = term.field();
          if (fieldsToReadSet.contains(field)) {
            // This is a value of a wanted field
            ArrayList valueList = (ArrayList) resultMap.get(field);
            valueList.add(term.text());

View Full Code Here

Examples of org.apache.lucene.index.TermEnum

    IntArrayList minIDList=new IntArrayList();
      IntArrayList maxIDList=new IntArrayList();
      IntArrayList freqList = new IntArrayList();
      
    TermDocs termDocs = null;
    TermEnum termEnum = null;
    int t = 0; // current term number
    mterms.add(null);
    minIDList.add(-1);
      maxIDList.add(-1);
      freqList.add(0);
    t++;
    try {
      termDocs = reader.termDocs();
      termEnum = reader.terms(new Term(_indexFieldName, ""));
      do {
        if (termEnum == null)
          break;
        Term term = termEnum.term();
        if (term == null || !_indexFieldName.equals(term.field()))
          break;


        // store term text
        // we expect that there is at most one term per document
        if (t > MAX_VAL_COUNT) {
          throw new IOException("maximum number of value cannot exceed: "
              + MAX_VAL_COUNT);
        }
        String val = term.text();
        mterms.add(val);
        int bit = (0x00000001 << (t-1));
        termDocs.seek(termEnum);
        //freqList.add(termEnum.docFreq());  // removed because the df doesn't take into account the num of deletedDocs
        int df = 0;
        int minID=-1;
            int maxID=-1;
            if(termDocs.next())
            {
              df++;
                  int docid = termDocs.doc();
                  order.add(docid, order.get(docid) | bit);
                  minID = docid;
                  while (termDocs.next())
              {
                    df++;
                    docid = termDocs.doc();
                    order.add(docid, order.get(docid) | bit);
              }
          maxID = docid;
            }
              freqList.add(df);
        minIDList.add(minID);
            maxIDList.add(maxID);
        t++;
      } while (termEnum.next());
    } finally {
      try {
        if (termDocs != null) {
          termDocs.close();
        }
      } finally {
        if (termEnum != null) {
          termEnum.close();
        }
      }
    }
    
    mterms.seal();

View Full Code Here

Examples of org.apache.lucene.index.TermEnum

      writer.println("It's no error when endings like 'e', 'en', and so on " + "are missing.");
      writer.println("They have been cuttet by the GermanAnalyzer and will be " + "cuttet from a search query too.");
      writer.println();


      // Write the terms
      TermEnum termEnum = reader.terms();
      int termCount;
      if (WRITE_TERMS_SORTED) {
        termCount = writeTermsSorted(termEnum, writer);
      } else {
        termCount = writeTermsSimply(termEnum, writer);

View Full Code Here

Examples of org.apache.lucene.index.TermEnum

    BoboIndexReader boboReader = BoboIndexReader.getInstance(reader);
    
    Set<String> fieldNames = boboReader.getFacetNames();
    for (String fieldName : fieldNames)
    {
      TermEnum te = reader.terms(new Term(fieldName,""));
      while(te.next())
      {
        Term t = te.term();
        if (!fieldName.equals(t.field())) break;
        writer.println(t.field()+":"+t.text());
      }
      te.close();
    }
    writer.flush();
    writer.close();
    
    boboReader.close();

View Full Code Here

Examples of org.apache.lucene.index.TermEnum

      writer = new FileWriter(outFile);
      PrintWriter out = new PrintWriter(writer);
      Set<String> fieldNames = getFacetNames();
      for (String fieldName : fieldNames)
      {
        TermEnum te = terms(new Term(fieldName, ""));
        out.write(fieldName + ":\n");
        while (te.next())
        {
          Term term = te.term();
          if (!fieldName.equals(term.field()))
          {
            break;
          }
          out.write(term.text() + "\n");

View Full Code Here

Examples of org.apache.lucene.index.TermEnum


    int length = maxDoc + 1;
    TermValueList<T> list = listFactory == null ? (TermValueList<T>) new TermStringList()
        : listFactory.createTermList();
    TermDocs termDocs = reader.termDocs();
    TermEnum termEnum = reader.terms(new Term(field, ""));
    int t = 0; // current term number


    list.add(null);
    minIDList.add(-1);
    maxIDList.add(-1);
    freqList.add(0);
    // int df = 0;
    t++;
    try
    {
      do
      {
        Term term = termEnum.term();
        if (term == null || term.field() != field)
          break;


        if (t > order.maxValue())
        {
          throw new IOException("maximum number of value cannot exceed: "
              + order.maxValue());
        }
        // store term text
        // we expect that there is at most one term per document
        if (t >= length)
          throw new RuntimeException("there are more terms than "
              + "documents in field \"" + field
              + "\", but it's impossible to sort on " + "tokenized fields");
        list.add(term.text());
        termDocs.seek(termEnum);
        // freqList.add(termEnum.docFreq()); // doesn't take into account
        // deldocs
        int minID = -1;
        int maxID = -1;
        int df = 0;
        if (termDocs.next())
        {
          df++;
          int docid = termDocs.doc();
          order.add(docid, t);
          minID = docid;
          while (termDocs.next())
          {
            df++;
            docid = termDocs.doc();
            order.add(docid, t);
          }
          maxID = docid;
        }
        freqList.add(df);
        minIDList.add(minID);
        maxIDList.add(maxID);


        t++;
      } while (termEnum.next());
    } finally
    {
      termDocs.close();
      termEnum.close();
    }
    list.seal();


    this.valArray = list;
    this.freqs = freqList.toIntArray();

View Full Code Here

Examples of org.apache.lucene.index.TermEnum

  {
    long t0 = System.currentTimeMillis();
    int maxdoc = reader.maxDoc();
    BufferedLoader loader = getBufferedLoader(maxdoc, workArea);


    TermEnum tenum = null;
    TermDocs tdoc = null;
    TermValueList<T> list = (listFactory == null ? (TermValueList<T>)new TermStringList() : listFactory.createTermList());
    IntArrayList minIDList = new IntArrayList();
    IntArrayList maxIDList = new IntArrayList();
    IntArrayList freqList = new IntArrayList();


    int t = 0; // current term number
    list.add(null);
    minIDList.add(-1);
    maxIDList.add(-1);
    freqList.add(0);
    t++;
    
    _overflow = false;
    try
    {
      tdoc = reader.termDocs();
      tenum = reader.terms(new Term(fieldName, ""));
      if (tenum != null)
      {
        do
        {
          Term term = tenum.term();
          if (term == null || !fieldName.equals(term.field()))
            break;


          String val = term.text();


          if (val != null)
          {
            list.add(val);


            tdoc.seek(tenum);
            //freqList.add(tenum.docFreq()); // removed because the df doesn't take into account the num of deletedDocs
            int df = 0;
            int minID = -1;
            int maxID = -1;
            if(tdoc.next())
            {
              df++;
              int docid = tdoc.doc();
              if(!loader.add(docid, t)) logOverflow(fieldName);
              minID = docid;
              while(tdoc.next())
              {
                df++;
                docid = tdoc.doc();
                if(!loader.add(docid, t)) logOverflow(fieldName);
              }
              maxID = docid;
            }
            freqList.add(df);
            minIDList.add(minID);
            maxIDList.add(maxID);
          }


          t++;
        }
        while (tenum.next());
      }
    }
    finally
    {
      try
      {
        if (tdoc != null)
        {
          tdoc.close();
        }
      }
      finally
      {
        if (tenum != null)
        {
          tenum.close();
        }
      }
    }


    list.seal();

View Full Code Here

Examples of org.apache.lucene.index.TermEnum

    catch (Exception e)
    {
      throw new RuntimeException("failed to load due to " + e.toString(), e);
    }
    
    TermEnum tenum = null;
    TermDocs tdoc = null;
    TermValueList<T> list = (listFactory == null ? (TermValueList<T>)new TermStringList() : listFactory.createTermList());
    IntArrayList minIDList = new IntArrayList();
    IntArrayList maxIDList = new IntArrayList();
    IntArrayList freqList = new IntArrayList();


    int t = 0; // current term number
    list.add(null);
    minIDList.add(-1);
    maxIDList.add(-1);
    freqList.add(0);
    t++;


    _overflow = false;
    try
    {
      tdoc = reader.termDocs();
      tenum = reader.terms(new Term(fieldName, ""));
      if (tenum != null)
      {
        do
        {
          Term term = tenum.term();
          if(term == null || !fieldName.equals(term.field()))
            break;
          
          String val = term.text();
          
          if (val != null)
          {
            list.add(val);
            
            tdoc.seek(tenum);
            //freqList.add(tenum.docFreq()); // removed because the df doesn't take into account the num of deletedDocs
            int df = 0;
            int minID = -1;
            int maxID = -1;
            if(tdoc.next())
            {
              df++;
              int docid = tdoc.doc();
              if (!_nestedArray.addData(docid, t)) logOverflow(fieldName);
              minID = docid;
              while(tdoc.next())
              {
                df++;
                docid = tdoc.doc();
                if(!_nestedArray.addData(docid, t)) logOverflow(fieldName);
              }
              maxID = docid;
            }
            freqList.add(df);
            minIDList.add(minID);
            maxIDList.add(maxID);
          }
          
          t++;
        }
        while (tenum.next());
      }
    }
    finally
    {
      try
      {
        if (tdoc != null)
        {
          tdoc.close();
        }
      }
      finally
      {
        if (tenum != null)
        {
          tenum.close();
        }
      }
    }
    
    list.seal();

View Full Code Here

Examples of org.apache.lucene.index.TermEnum

            DirectoryProvider dirProvider = ((FullTextSession)em.getDelegate()).getSearchFactory().getDirectoryProviders(indexedEntity.getClazz())[0];
            IndexReader reader = IndexReader.open(dirProvider.getDirectory());


            indexedEntity.setNumOfIndexedDocuments(reader.numDocs());


            TermEnum te = reader.terms();
            long numTerms = 0;
            while (te.next()) numTerms++;
            indexedEntity.setNumOfIndexedTerms(numTerms);


            long size = 0;
            String [] fileNames = dirProvider.getDirectory().list();
            for (String fileName : fileNames) {

View Full Code Here

0 1 2 3 4 5

TOP

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.