Package org.apache.lucene.index

Examples of org.apache.lucene.index.TermDocs


        BitSet bits = new BitSet(reader.maxDoc());
        //Match all search terms.
        for (int i=0; i < searchTerms.length; i++) {
            //Get an enumeration of all the documents that match the specified
            //field value.
            TermDocs matchingDocs = reader.termDocs(searchTerms[i]);
            try {
                if (matchingDocs != null) {
                    while(matchingDocs.next()) {
                        bits.set(matchingDocs.doc());
                    }
                }
            }
            finally {
                if (matchingDocs != null) {
                    matchingDocs.close();
                }
            }
        }
        return bits;
    }
View Full Code Here


        term = new Term(name,val);
      }
     
      @Override
      public DocIdSetIterator iterator() throws IOException {
        final TermDocs td = _reader.termDocs(term);
        if (td==null){
          return EmptyDocIdSet.getInstance().iterator();
        }
        return new DocIdSetIterator(){

          private int _doc = -1;
          @Override
          public int advance(int target) throws IOException {
            if (td.skipTo(target)){
              _doc = td.doc();
            }
            else{
              td.close();
              _doc= DocIdSetIterator.NO_MORE_DOCS;
            }
            return _doc;
          }

          @Override
          public int docID() {
            return _doc;
          }

          @Override
          public int nextDoc() throws IOException {
            if (td.next()){
              _doc = td.doc();
            }
            else{
              td.close();
              _doc = DocIdSetIterator.NO_MORE_DOCS;
            }
            return _doc;
          }
         
View Full Code Here

   
    IntArrayList minIDList=new IntArrayList();
      IntArrayList maxIDList=new IntArrayList();
      IntArrayList freqList = new IntArrayList();
     
    TermDocs termDocs = null;
    TermEnum termEnum = null;
    int t = 0; // current term number
    mterms.add(null);
    minIDList.add(-1);
      maxIDList.add(-1);
      freqList.add(0);
    t++;
    try {
      termDocs = reader.termDocs();
      termEnum = reader.terms(new Term(_indexFieldName, ""));
      do {
        if (termEnum == null)
          break;
        Term term = termEnum.term();
        if (term == null || !_indexFieldName.equals(term.field()))
          break;

        // store term text
        // we expect that there is at most one term per document
        if (t > MAX_VAL_COUNT) {
          throw new IOException("maximum number of value cannot exceed: "
              + MAX_VAL_COUNT);
        }
        String val = term.text();
        mterms.add(val);
        int bit = (0x00000001 << (t-1));
        termDocs.seek(termEnum);
        //freqList.add(termEnum.docFreq());  // removed because the df doesn't take into account the num of deletedDocs
        int df = 0;
        int minID=-1;
            int maxID=-1;
            if(termDocs.next())
            {
              df++;
                  int docid = termDocs.doc();
                  order.add(docid, order.get(docid) | bit);
                  minID = docid;
                  while (termDocs.next())
              {
                    df++;
                    docid = termDocs.doc();
                    order.add(docid, order.get(docid) | bit);
              }
          maxID = docid;
            }
              freqList.add(df);
        minIDList.add(minID);
            maxIDList.add(maxID);
        t++;
      } while (termEnum.next());
    } finally {
      try {
        if (termDocs != null) {
          termDocs.close();
        }
      } finally {
        if (termEnum != null) {
          termEnum.close();
        }
View Full Code Here

   *
   * @return a Lucene document of the representative bookmark
   */
  public Document findLeadDocument(String urlHash) {
    IndexReader reader = null;
    TermDocs termDocs = null;
    Document leadDoc = null;
    try {
      boolean exists = IndexReader.indexExists(indexDirectory);
      if (exists == true) {
        reader = IndexReader.open(indexDirectory);
        Term key = new Term(DocumentCreator.FIELD_URL_MD5, urlHash);
        termDocs = reader.termDocs(key);
        boolean found = false;
        while (termDocs.next() && found == false) {
          int pos = termDocs.doc();
          // use FieldSelector for more efficient loading of Fields.
          // load only what's needed to determine a leading document
          Document d = reader.document(pos, new FieldSelector() {
            private static final long serialVersionUID = 1426724242925499003L;

            public FieldSelectorResult accept(String field) {
              if (field.equals(DocumentCreator.FIELD_INDEX_TYPE)) {
                return FieldSelectorResult.LOAD_AND_BREAK;
              } else {
                return FieldSelectorResult.NO_LOAD;
              }
            }
          });
          String[] values = d
              .getValues(DocumentCreator.FIELD_INDEX_TYPE);
          if (values != null) {
            List<String> vList = Arrays.asList(values);
            if (vList.contains(DocumentCreator.INDEX_TYPE_LEAD) == true) {
              leadDoc = reader.document(pos);
              found = true;
            }
          }
        }
      }
    } catch (Exception e) {
      logger.error("FindLeadDocument failed to find doc: " + urlHash
          + ", exception=" + e);
    } finally {
      try {
        if (termDocs != null) {
          termDocs.close();
        }
        if (reader != null) {
          reader.close();
        }
      } catch (Exception e) {
View Full Code Here

   *
   * @return a Lucene document of a non-representative bookmark
   */
  public Document findNonLeadDocument(String urlHash) {
    IndexReader reader = null;
    TermDocs termDocs = null;
    Document leadDoc = null;
    try {
      boolean exists = IndexReader.indexExists(indexDirectory);
      if (exists == true) {
        reader = IndexReader.open(indexDirectory);
        Term key = new Term(DocumentCreator.FIELD_URL_MD5, urlHash);
        termDocs = reader.termDocs(key);
        boolean found = false;
        while (termDocs.next() && found == false) {
          int pos = termDocs.doc();
          // use FieldSelector for more efficient loading of Fields.
          // load only what's needed to determine a leading document
          Document d = reader.document(pos, new FieldSelector() {
            private static final long serialVersionUID = 1426724242925499003L;

            public FieldSelectorResult accept(String field) {
              if (field.equals(DocumentCreator.FIELD_INDEX_TYPE)) {
                return FieldSelectorResult.LOAD_AND_BREAK;
              } else {
                return FieldSelectorResult.NO_LOAD;
              }
            }

          });
          String[] values = d
              .getValues(DocumentCreator.FIELD_INDEX_TYPE);
          if (values != null) {
            List<String> vList = Arrays.asList(values);
            if (vList.contains(DocumentCreator.INDEX_TYPE_LEAD) == false) {
              leadDoc = reader.document(pos);
              found = true;
            }
          } else {
            leadDoc = reader.document(pos);
            found = true;
          }
        }
      }
    } catch (Exception e) {
      logger.error("FindLeadDocument failed to find doc hash: " + urlHash
          + ", exception=" + e);
    } finally {
      try {
        if (termDocs != null) {
          termDocs.close();
        }
        if (reader != null) {
          reader.close();
        }
      } catch (Exception e) {
View Full Code Here

    IntArrayList freqList = new IntArrayList();

    int length = maxDoc + 1;
    TermValueList<T> list = listFactory == null ? (TermValueList<T>) new TermStringList()
        : listFactory.createTermList();
    TermDocs termDocs = reader.termDocs();
    TermEnum termEnum = reader.terms(new Term(field, ""));
    int t = 0; // current term number

    list.add(null);
    minIDList.add(-1);
    maxIDList.add(-1);
    freqList.add(0);
    // int df = 0;
    t++;
    try
    {
      do
      {
        Term term = termEnum.term();
        if (term == null || term.field() != field)
          break;

        if (t > order.maxValue())
        {
          throw new IOException("maximum number of value cannot exceed: "
              + order.maxValue());
        }
        // store term text
        // we expect that there is at most one term per document
        if (t >= length)
          throw new RuntimeException("there are more terms than "
              + "documents in field \"" + field
              + "\", but it's impossible to sort on " + "tokenized fields");
        list.add(term.text());
        termDocs.seek(termEnum);
        // freqList.add(termEnum.docFreq()); // doesn't take into account
        // deldocs
        int minID = -1;
        int maxID = -1;
        int df = 0;
        if (termDocs.next())
        {
          df++;
          int docid = termDocs.doc();
          order.add(docid, t);
          minID = docid;
          while (termDocs.next())
          {
            df++;
            docid = termDocs.doc();
            order.add(docid, t);
          }
          maxID = docid;
        }
        freqList.add(df);
        minIDList.add(minID);
        maxIDList.add(maxID);

        t++;
      } while (termEnum.next());
    } finally
    {
      termDocs.close();
      termEnum.close();
    }
    list.seal();

    this.valArray = list;
View Full Code Here

    long t0 = System.currentTimeMillis();
    int maxdoc = reader.maxDoc();
    BufferedLoader loader = getBufferedLoader(maxdoc, workArea);

    TermEnum tenum = null;
    TermDocs tdoc = null;
    TermValueList<T> list = (listFactory == null ? (TermValueList<T>)new TermStringList() : listFactory.createTermList());
    IntArrayList minIDList = new IntArrayList();
    IntArrayList maxIDList = new IntArrayList();
    IntArrayList freqList = new IntArrayList();

    int t = 0; // current term number
    list.add(null);
    minIDList.add(-1);
    maxIDList.add(-1);
    freqList.add(0);
    t++;
   
    _overflow = false;
    try
    {
      tdoc = reader.termDocs();
      tenum = reader.terms(new Term(fieldName, ""));
      if (tenum != null)
      {
        do
        {
          Term term = tenum.term();
          if (term == null || !fieldName.equals(term.field()))
            break;

          String val = term.text();

          if (val != null)
          {
            list.add(val);

            tdoc.seek(tenum);
            //freqList.add(tenum.docFreq()); // removed because the df doesn't take into account the num of deletedDocs
            int df = 0;
            int minID = -1;
            int maxID = -1;
            if(tdoc.next())
            {
              df++;
              int docid = tdoc.doc();
              if(!loader.add(docid, t)) logOverflow(fieldName);
              minID = docid;
              while(tdoc.next())
              {
                df++;
                docid = tdoc.doc();
                if(!loader.add(docid, t)) logOverflow(fieldName);
              }
              maxID = docid;
            }
            freqList.add(df);
            minIDList.add(minID);
            maxIDList.add(maxID);
          }

          t++;
        }
        while (tenum.next());
      }
    }
    finally
    {
      try
      {
        if (tdoc != null)
        {
          tdoc.close();
        }
      }
      finally
      {
        if (tenum != null)
View Full Code Here

    {
      throw new RuntimeException("failed to load due to " + e.toString(), e);
    }
   
    TermEnum tenum = null;
    TermDocs tdoc = null;
    TermValueList<T> list = (listFactory == null ? (TermValueList<T>)new TermStringList() : listFactory.createTermList());
    IntArrayList minIDList = new IntArrayList();
    IntArrayList maxIDList = new IntArrayList();
    IntArrayList freqList = new IntArrayList();

    int t = 0; // current term number
    list.add(null);
    minIDList.add(-1);
    maxIDList.add(-1);
    freqList.add(0);
    t++;

    _overflow = false;
    try
    {
      tdoc = reader.termDocs();
      tenum = reader.terms(new Term(fieldName, ""));
      if (tenum != null)
      {
        do
        {
          Term term = tenum.term();
          if(term == null || !fieldName.equals(term.field()))
            break;
         
          String val = term.text();
         
          if (val != null)
          {
            list.add(val);
           
            tdoc.seek(tenum);
            //freqList.add(tenum.docFreq()); // removed because the df doesn't take into account the num of deletedDocs
            int df = 0;
            int minID = -1;
            int maxID = -1;
            if(tdoc.next())
            {
              df++;
              int docid = tdoc.doc();
              if (!_nestedArray.addData(docid, t)) logOverflow(fieldName);
              minID = docid;
              while(tdoc.next())
              {
                df++;
                docid = tdoc.doc();
                if(!_nestedArray.addData(docid, t)) logOverflow(fieldName);
              }
              maxID = docid;
            }
            freqList.add(df);
            minIDList.add(minID);
            maxIDList.add(maxID);
          }
         
          t++;
        }
        while (tenum.next());
      }
    }
    finally
    {
      try
      {
        if (tdoc != null)
        {
          tdoc.close();
        }
      }
      finally
      {
        if (tenum != null)
View Full Code Here

    public BitSet bits(IndexReader reader) throws IOException {
        //Create a new BitSet with a capacity equal to the size of the index.
        BitSet bits = new BitSet(reader.maxDoc());
        //Get an enumeration of all the documents that match the specified field
        //value.
        TermDocs matchingDocs = reader.termDocs(searchTerm);
        try {
            while(matchingDocs.next()) {
                bits.set(matchingDocs.doc());
            }
        }
        finally {
            if (matchingDocs != null) {
                matchingDocs.close();
            }
        }
        return bits;
    }
View Full Code Here

      assertEquals(aprioriTermEnum.term(), testTermEnum.term());
      assertTrue(aprioriTermEnum.docFreq() == testTermEnum.docFreq());

      // compare termDocs seeking

      TermDocs aprioriTermDocsSeeker = aprioriReader.termDocs(aprioriTermEnum.term());
      TermDocs testTermDocsSeeker = testReader.termDocs(testTermEnum.term());

      while (aprioriTermDocsSeeker.next()) {
        assertTrue(testTermDocsSeeker.skipTo(aprioriTermDocsSeeker.doc()));
        assertEquals(aprioriTermDocsSeeker.doc(), testTermDocsSeeker.doc());
      }

      aprioriTermDocsSeeker.close();
      testTermDocsSeeker.close();

      // compare documents per term

      assertEquals(aprioriReader.docFreq(aprioriTermEnum.term()), testReader.docFreq(testTermEnum.term()));

      TermDocs aprioriTermDocs = aprioriReader.termDocs(aprioriTermEnum.term());
      TermDocs testTermDocs = testReader.termDocs(testTermEnum.term());

      while (true) {
        if (!aprioriTermDocs.next()) {
          assertFalse(testTermDocs.next());
          break;
        }
        assertTrue(testTermDocs.next());

        assertEquals(aprioriTermDocs.doc(), testTermDocs.doc());
        assertEquals(aprioriTermDocs.freq(), testTermDocs.freq());
      }

      aprioriTermDocs.close();
      testTermDocs.close();

      // compare term positions

      TermPositions testTermPositions = testReader.termPositions(testTermEnum.term());
      TermPositions aprioriTermPositions = aprioriReader.termPositions(aprioriTermEnum.term());
View Full Code Here

TOP

Related Classes of org.apache.lucene.index.TermDocs

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.