Package org.apache.lucene.index

Examples of org.apache.lucene.index.TermEnum


   */
  public static void main(String[] args) throws Exception {
    addIndex();
    IndexReader reader = IndexReader.open(lucenePath);
    System.out.println("�ĵ���:"+reader.numDocs());
    TermEnum tes = reader.terms();
    while(tes.next()){
      Term t = tes.term();
      System.out.println(t.toString());
    }
    //IndexSearcher searcher = new IndexSearcher(lucenePath);
  }
View Full Code Here


    fieldNameArr = null;

    // Read the terms
    if (!fieldsToReadSet.isEmpty()) {
      try {
        TermEnum termEnum = indexReader.terms();
        while (termEnum.next()) {
          Term term = termEnum.term();
          String field = term.field();
          if (fieldsToReadSet.contains(field)) {
            // This is a value of a wanted field
            ArrayList valueList = (ArrayList) resultMap.get(field);
            valueList.add(term.text());
View Full Code Here

    IntArrayList minIDList=new IntArrayList();
      IntArrayList maxIDList=new IntArrayList();
      IntArrayList freqList = new IntArrayList();
     
    TermDocs termDocs = null;
    TermEnum termEnum = null;
    int t = 0; // current term number
    mterms.add(null);
    minIDList.add(-1);
      maxIDList.add(-1);
      freqList.add(0);
    t++;
    try {
      termDocs = reader.termDocs();
      termEnum = reader.terms(new Term(_indexFieldName, ""));
      do {
        if (termEnum == null)
          break;
        Term term = termEnum.term();
        if (term == null || !_indexFieldName.equals(term.field()))
          break;

        // store term text
        // we expect that there is at most one term per document
        if (t > MAX_VAL_COUNT) {
          throw new IOException("maximum number of value cannot exceed: "
              + MAX_VAL_COUNT);
        }
        String val = term.text();
        mterms.add(val);
        int bit = (0x00000001 << (t-1));
        termDocs.seek(termEnum);
        //freqList.add(termEnum.docFreq());  // removed because the df doesn't take into account the num of deletedDocs
        int df = 0;
        int minID=-1;
            int maxID=-1;
            if(termDocs.next())
            {
              df++;
                  int docid = termDocs.doc();
                  order.add(docid, order.get(docid) | bit);
                  minID = docid;
                  while (termDocs.next())
              {
                    df++;
                    docid = termDocs.doc();
                    order.add(docid, order.get(docid) | bit);
              }
          maxID = docid;
            }
              freqList.add(df);
        minIDList.add(minID);
            maxIDList.add(maxID);
        t++;
      } while (termEnum.next());
    } finally {
      try {
        if (termDocs != null) {
          termDocs.close();
        }
      } finally {
        if (termEnum != null) {
          termEnum.close();
        }
      }
    }
   
    mterms.seal();
View Full Code Here

      writer.println("It's no error when endings like 'e', 'en', and so on " + "are missing.");
      writer.println("They have been cuttet by the GermanAnalyzer and will be " + "cuttet from a search query too.");
      writer.println();

      // Write the terms
      TermEnum termEnum = reader.terms();
      int termCount;
      if (WRITE_TERMS_SORTED) {
        termCount = writeTermsSorted(termEnum, writer);
      } else {
        termCount = writeTermsSimply(termEnum, writer);
View Full Code Here

    BoboIndexReader boboReader = BoboIndexReader.getInstance(reader);
   
    Set<String> fieldNames = boboReader.getFacetNames();
    for (String fieldName : fieldNames)
    {
      TermEnum te = reader.terms(new Term(fieldName,""));
      while(te.next())
      {
        Term t = te.term();
        if (!fieldName.equals(t.field())) break;
        writer.println(t.field()+":"+t.text());
      }
      te.close();
    }
    writer.flush();
    writer.close();
   
    boboReader.close();
View Full Code Here

      writer = new FileWriter(outFile);
      PrintWriter out = new PrintWriter(writer);
      Set<String> fieldNames = getFacetNames();
      for (String fieldName : fieldNames)
      {
        TermEnum te = terms(new Term(fieldName, ""));
        out.write(fieldName + ":\n");
        while (te.next())
        {
          Term term = te.term();
          if (!fieldName.equals(term.field()))
          {
            break;
          }
          out.write(term.text() + "\n");
View Full Code Here

    int length = maxDoc + 1;
    TermValueList<T> list = listFactory == null ? (TermValueList<T>) new TermStringList()
        : listFactory.createTermList();
    TermDocs termDocs = reader.termDocs();
    TermEnum termEnum = reader.terms(new Term(field, ""));
    int t = 0; // current term number

    list.add(null);
    minIDList.add(-1);
    maxIDList.add(-1);
    freqList.add(0);
    // int df = 0;
    t++;
    try
    {
      do
      {
        Term term = termEnum.term();
        if (term == null || term.field() != field)
          break;

        if (t > order.maxValue())
        {
          throw new IOException("maximum number of value cannot exceed: "
              + order.maxValue());
        }
        // store term text
        // we expect that there is at most one term per document
        if (t >= length)
          throw new RuntimeException("there are more terms than "
              + "documents in field \"" + field
              + "\", but it's impossible to sort on " + "tokenized fields");
        list.add(term.text());
        termDocs.seek(termEnum);
        // freqList.add(termEnum.docFreq()); // doesn't take into account
        // deldocs
        int minID = -1;
        int maxID = -1;
        int df = 0;
        if (termDocs.next())
        {
          df++;
          int docid = termDocs.doc();
          order.add(docid, t);
          minID = docid;
          while (termDocs.next())
          {
            df++;
            docid = termDocs.doc();
            order.add(docid, t);
          }
          maxID = docid;
        }
        freqList.add(df);
        minIDList.add(minID);
        maxIDList.add(maxID);

        t++;
      } while (termEnum.next());
    } finally
    {
      termDocs.close();
      termEnum.close();
    }
    list.seal();

    this.valArray = list;
    this.freqs = freqList.toIntArray();
View Full Code Here

  {
    long t0 = System.currentTimeMillis();
    int maxdoc = reader.maxDoc();
    BufferedLoader loader = getBufferedLoader(maxdoc, workArea);

    TermEnum tenum = null;
    TermDocs tdoc = null;
    TermValueList<T> list = (listFactory == null ? (TermValueList<T>)new TermStringList() : listFactory.createTermList());
    IntArrayList minIDList = new IntArrayList();
    IntArrayList maxIDList = new IntArrayList();
    IntArrayList freqList = new IntArrayList();

    int t = 0; // current term number
    list.add(null);
    minIDList.add(-1);
    maxIDList.add(-1);
    freqList.add(0);
    t++;
   
    _overflow = false;
    try
    {
      tdoc = reader.termDocs();
      tenum = reader.terms(new Term(fieldName, ""));
      if (tenum != null)
      {
        do
        {
          Term term = tenum.term();
          if (term == null || !fieldName.equals(term.field()))
            break;

          String val = term.text();

          if (val != null)
          {
            list.add(val);

            tdoc.seek(tenum);
            //freqList.add(tenum.docFreq()); // removed because the df doesn't take into account the num of deletedDocs
            int df = 0;
            int minID = -1;
            int maxID = -1;
            if(tdoc.next())
            {
              df++;
              int docid = tdoc.doc();
              if(!loader.add(docid, t)) logOverflow(fieldName);
              minID = docid;
              while(tdoc.next())
              {
                df++;
                docid = tdoc.doc();
                if(!loader.add(docid, t)) logOverflow(fieldName);
              }
              maxID = docid;
            }
            freqList.add(df);
            minIDList.add(minID);
            maxIDList.add(maxID);
          }

          t++;
        }
        while (tenum.next());
      }
    }
    finally
    {
      try
      {
        if (tdoc != null)
        {
          tdoc.close();
        }
      }
      finally
      {
        if (tenum != null)
        {
          tenum.close();
        }
      }
    }

    list.seal();
View Full Code Here

    catch (Exception e)
    {
      throw new RuntimeException("failed to load due to " + e.toString(), e);
    }
   
    TermEnum tenum = null;
    TermDocs tdoc = null;
    TermValueList<T> list = (listFactory == null ? (TermValueList<T>)new TermStringList() : listFactory.createTermList());
    IntArrayList minIDList = new IntArrayList();
    IntArrayList maxIDList = new IntArrayList();
    IntArrayList freqList = new IntArrayList();

    int t = 0; // current term number
    list.add(null);
    minIDList.add(-1);
    maxIDList.add(-1);
    freqList.add(0);
    t++;

    _overflow = false;
    try
    {
      tdoc = reader.termDocs();
      tenum = reader.terms(new Term(fieldName, ""));
      if (tenum != null)
      {
        do
        {
          Term term = tenum.term();
          if(term == null || !fieldName.equals(term.field()))
            break;
         
          String val = term.text();
         
          if (val != null)
          {
            list.add(val);
           
            tdoc.seek(tenum);
            //freqList.add(tenum.docFreq()); // removed because the df doesn't take into account the num of deletedDocs
            int df = 0;
            int minID = -1;
            int maxID = -1;
            if(tdoc.next())
            {
              df++;
              int docid = tdoc.doc();
              if (!_nestedArray.addData(docid, t)) logOverflow(fieldName);
              minID = docid;
              while(tdoc.next())
              {
                df++;
                docid = tdoc.doc();
                if(!_nestedArray.addData(docid, t)) logOverflow(fieldName);
              }
              maxID = docid;
            }
            freqList.add(df);
            minIDList.add(minID);
            maxIDList.add(maxID);
          }
         
          t++;
        }
        while (tenum.next());
      }
    }
    finally
    {
      try
      {
        if (tdoc != null)
        {
          tdoc.close();
        }
      }
      finally
      {
        if (tenum != null)
        {
          tenum.close();
        }
      }
    }
   
    list.seal();
View Full Code Here

            DirectoryProvider dirProvider = ((FullTextSession)em.getDelegate()).getSearchFactory().getDirectoryProviders(indexedEntity.getClazz())[0];
            IndexReader reader = IndexReader.open(dirProvider.getDirectory());

            indexedEntity.setNumOfIndexedDocuments(reader.numDocs());

            TermEnum te = reader.terms();
            long numTerms = 0;
            while (te.next()) numTerms++;
            indexedEntity.setNumOfIndexedTerms(numTerms);

            long size = 0;
            String [] fileNames = dirProvider.getDirectory().list();
            for (String fileName : fileNames) {
View Full Code Here

TOP

Related Classes of org.apache.lucene.index.TermEnum

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.