Package org.apache.lucene.index

Examples of org.apache.lucene.index.TermEnum


    IndexReader testReader = testIndex.indexReaderFactory();

    // test seek

    Term t = new Term("c", "danny");
    TermEnum aprioriTermEnum = aprioriReader.terms(t);
    TermEnum testTermEnum = testReader.terms(t);

    assertEquals(aprioriTermEnum.term(), testTermEnum.term());

    t = aprioriTermEnum.term();

    aprioriTermEnum.close();
    testTermEnum.close();

    TermDocs aprioriTermDocs = aprioriReader.termDocs(t);
    TermDocs testTermDocs = testReader.termDocs(t);

    assertEquals(aprioriTermDocs.next(), testTermDocs.next());
View Full Code Here


      assertEquals(aprioriReader.isDeleted(docIndex), testReader.isDeleted(docIndex));
    }

    // compare term enumeration stepping

    TermEnum aprioriTermEnum = aprioriReader.terms();
    TermEnum testTermEnum = testReader.terms();


    while (true) {

      if (!aprioriTermEnum.next()) {
        assertFalse(testTermEnum.next());
        break;
      }
      assertTrue(testTermEnum.next());

      assertEquals(aprioriTermEnum.term(), testTermEnum.term());
      assertTrue(aprioriTermEnum.docFreq() == testTermEnum.docFreq());

      // compare termDocs seeking

      TermDocs aprioriTermDocsSeeker = aprioriReader.termDocs(aprioriTermEnum.term());
      TermDocs testTermDocsSeeker = testReader.termDocs(testTermEnum.term());

      while (aprioriTermDocsSeeker.next()) {
        assertTrue(testTermDocsSeeker.skipTo(aprioriTermDocsSeeker.doc()));
        assertEquals(aprioriTermDocsSeeker.doc(), testTermDocsSeeker.doc());
      }

      aprioriTermDocsSeeker.close();
      testTermDocsSeeker.close();

      // compare documents per term

      assertEquals(aprioriReader.docFreq(aprioriTermEnum.term()), testReader.docFreq(testTermEnum.term()));

      TermDocs aprioriTermDocs = aprioriReader.termDocs(aprioriTermEnum.term());
      TermDocs testTermDocs = testReader.termDocs(testTermEnum.term());

      while (true) {
        if (!aprioriTermDocs.next()) {
          assertFalse(testTermDocs.next());
          break;
        }
        assertTrue(testTermDocs.next());

        assertEquals(aprioriTermDocs.doc(), testTermDocs.doc());
        assertEquals(aprioriTermDocs.freq(), testTermDocs.freq());
      }

      aprioriTermDocs.close();
      testTermDocs.close();

      // compare term positions

      TermPositions testTermPositions = testReader.termPositions(testTermEnum.term());
      TermPositions aprioriTermPositions = aprioriReader.termPositions(aprioriTermEnum.term());

      if (aprioriTermPositions != null) {

        for (int docIndex = 0; docIndex < aprioriReader.maxDoc(); docIndex++) {
          boolean hasNext = aprioriTermPositions.next();
          if (hasNext) {
            assertTrue(testTermPositions.next());

            assertEquals(aprioriTermPositions.freq(), testTermPositions.freq());


            for (int termPositionIndex = 0; termPositionIndex < aprioriTermPositions.freq(); termPositionIndex++) {
              int aprioriPos = aprioriTermPositions.nextPosition();
              int testPos = testTermPositions.nextPosition();

              if (aprioriPos != testPos) {
                assertEquals(aprioriPos, testPos);
              }


              assertEquals(aprioriTermPositions.isPayloadAvailable(), testTermPositions.isPayloadAvailable());
              if (aprioriTermPositions.isPayloadAvailable()) {
                assertEquals(aprioriTermPositions.getPayloadLength(), testTermPositions.getPayloadLength());
                byte[] aprioriPayloads = aprioriTermPositions.getPayload(new byte[aprioriTermPositions.getPayloadLength()], 0);
                byte[] testPayloads = testTermPositions.getPayload(new byte[testTermPositions.getPayloadLength()], 0);
                for (int i = 0; i < aprioriPayloads.length; i++) {
                  assertEquals(aprioriPayloads[i], testPayloads[i]);
                }
              }

            }
          }
        }

        aprioriTermPositions.close();
        testTermPositions.close();

      }
    }

    // compare term vectors and position vectors

    for (int documentNumber = 0; documentNumber < aprioriReader.numDocs(); documentNumber++) {

      if (documentNumber > 0) {
        assertNotNull(aprioriReader.getTermFreqVector(documentNumber, "b0"));
        assertNull(aprioriReader.getTermFreqVector(documentNumber, "b1"));

        assertNotNull(testReader.getTermFreqVector(documentNumber, "b0"));
        assertNull(testReader.getTermFreqVector(documentNumber, "b1"));

      }

      TermFreqVector[] aprioriFreqVectors = aprioriReader.getTermFreqVectors(documentNumber);
      TermFreqVector[] testFreqVectors = testReader.getTermFreqVectors(documentNumber);

      if (aprioriFreqVectors != null && testFreqVectors != null) {

        Arrays.sort(aprioriFreqVectors, new Comparator<TermFreqVector>() {
          public int compare(TermFreqVector termFreqVector, TermFreqVector termFreqVector1) {
            return termFreqVector.getField().compareTo(termFreqVector1.getField());
          }
        });
        Arrays.sort(testFreqVectors, new Comparator<TermFreqVector>() {
          public int compare(TermFreqVector termFreqVector, TermFreqVector termFreqVector1) {
            return termFreqVector.getField().compareTo(termFreqVector1.getField());
          }
        });

        assertEquals("document " + documentNumber + " vectors does not match", aprioriFreqVectors.length, testFreqVectors.length);

        for (int freqVectorIndex = 0; freqVectorIndex < aprioriFreqVectors.length; freqVectorIndex++) {
          assertTrue(Arrays.equals(aprioriFreqVectors[freqVectorIndex].getTermFrequencies(), testFreqVectors[freqVectorIndex].getTermFrequencies()));
          assertTrue(Arrays.equals(aprioriFreqVectors[freqVectorIndex].getTerms(), testFreqVectors[freqVectorIndex].getTerms()));

          if (aprioriFreqVectors[freqVectorIndex] instanceof TermPositionVector) {
            TermPositionVector aprioriTermPositionVector = (TermPositionVector) aprioriFreqVectors[freqVectorIndex];
            TermPositionVector testTermPositionVector = (TermPositionVector) testFreqVectors[freqVectorIndex];

            for (int positionVectorIndex = 0; positionVectorIndex < aprioriFreqVectors[freqVectorIndex].getTerms().length; positionVectorIndex++)
            {
              if (aprioriTermPositionVector.getOffsets(positionVectorIndex) != null) {
                assertTrue(Arrays.equals(aprioriTermPositionVector.getOffsets(positionVectorIndex), testTermPositionVector.getOffsets(positionVectorIndex)));
              }

              if (aprioriTermPositionVector.getTermPositions(positionVectorIndex) != null) {
                assertTrue(Arrays.equals(aprioriTermPositionVector.getTermPositions(positionVectorIndex), testTermPositionVector.getTermPositions(positionVectorIndex)));
              }
            }
          }

        }
      }

    }

    aprioriTermEnum.close();
    testTermEnum.close();

    aprioriReader.close();
    testReader.close();
  }
View Full Code Here

    String fieldName,
    MatchingTermVisitor mtv) throws IOException
  {
    boolean expanded = false;
    int prefixLength = prefix.length();
    TermEnum enumerator = reader.terms(new Term(fieldName, prefix));
    Matcher matcher = pattern.matcher("");
    try {
      do {
        Term term = enumerator.term();
        if (term != null) {
          String text = term.text();
          if ((! text.startsWith(prefix)) || (! term.field().equals(fieldName))) {
            break;
          } else {
            matcher.reset( text.substring(prefixLength));
            if (matcher.matches()) {
              mtv.visitMatchingTerm(term);
              expanded = true;
            }
          }
        }
      } while (enumerator.next());
    } finally {
      enumerator.close();
      matcher.reset();
    }
    if (! expanded) {
      System.out.println("No terms in " + fieldName + " field for: " + toString());
    }
View Full Code Here

    IndexReader reader,
    String fieldName,
    MatchingTermVisitor mtv) throws IOException
  {
    /* inspired by PrefixQuery.rewrite(): */
    TermEnum enumerator = reader.terms(getLucenePrefixTerm(fieldName));
    boolean expanded = false;
    try {
      do {
        Term term = enumerator.term();
        if ((term != null)
            && term.text().startsWith(getPrefix())
            && term.field().equals(fieldName)) {
          mtv.visitMatchingTerm(term);
          expanded = true;
        } else {
          break;
        }
      } while (enumerator.next());
    } finally {
      enumerator.close();
    }
    if (! expanded) {
      System.out.println("No terms in " + fieldName + " field for: " + toString());
    }
  }
View Full Code Here

    IndexReader reader,
    String fieldName,
    MatchingTermVisitor mtv) throws IOException
  {
    /* check term presence in index here for symmetry with other SimpleTerm's */
    TermEnum enumerator = reader.terms(getLuceneTerm(fieldName));
    try {
      Term it= enumerator.term(); /* same or following index term */
      if ((it != null)
          && it.text().equals(getTermText())
          && it.field().equals(fieldName)) {
        mtv.visitMatchingTerm(it);
      } else {
        System.out.println("No term in " + fieldName + " field for: " + toString());
      }
    } finally {
      enumerator.close();
    }
  }
View Full Code Here

        }
      }
      final int ix = i;
      final int jx = j;
 
      return new TermEnum() {
 
        private int i = ix; // index into info.sortedTerms
        private int j = jx; // index into sortedFields
         
        @Override
View Full Code Here

    IndexReader testReader = testIndex.indexReaderFactory();

    // test seek

    Term t = new Term("c", "danny");
    TermEnum aprioriTermEnum = aprioriReader.terms(t);
    TermEnum testTermEnum = testReader.terms(t);

    assertEquals(aprioriTermEnum.term(), testTermEnum.term());

    t = aprioriTermEnum.term();

    aprioriTermEnum.close();
    testTermEnum.close();

    TermDocs aprioriTermDocs = aprioriReader.termDocs(t);
    TermDocs testTermDocs = testReader.termDocs(t);

    assertEquals(aprioriTermDocs.next(), testTermDocs.next());
View Full Code Here

      assertEquals(aprioriReader.isDeleted(docIndex), testReader.isDeleted(docIndex));
    }

    // compare term enumeration stepping

    TermEnum aprioriTermEnum = aprioriReader.terms();
    TermEnum testTermEnum = testReader.terms();


    while (true) {

      if (!aprioriTermEnum.next()) {
        assertFalse(testTermEnum.next());
        break;
      }
      assertTrue(testTermEnum.next());

      assertEquals(aprioriTermEnum.term(), testTermEnum.term());
      assertTrue(aprioriTermEnum.docFreq() == testTermEnum.docFreq());

      // compare termDocs seeking

      TermDocs aprioriTermDocsSeeker = aprioriReader.termDocs(aprioriTermEnum.term());
      TermDocs testTermDocsSeeker = testReader.termDocs(testTermEnum.term());

      while (aprioriTermDocsSeeker.next()) {
        assertTrue(testTermDocsSeeker.skipTo(aprioriTermDocsSeeker.doc()));
        assertEquals(aprioriTermDocsSeeker.doc(), testTermDocsSeeker.doc());
      }

      aprioriTermDocsSeeker.close();
      testTermDocsSeeker.close();

      // compare documents per term

      assertEquals(aprioriReader.docFreq(aprioriTermEnum.term()), testReader.docFreq(testTermEnum.term()));

      TermDocs aprioriTermDocs = aprioriReader.termDocs(aprioriTermEnum.term());
      TermDocs testTermDocs = testReader.termDocs(testTermEnum.term());

      while (true) {
        if (!aprioriTermDocs.next()) {
          assertFalse(testTermDocs.next());
          break;
        }
        assertTrue(testTermDocs.next());

        assertEquals(aprioriTermDocs.doc(), testTermDocs.doc());
        assertEquals(aprioriTermDocs.freq(), testTermDocs.freq());
      }

      aprioriTermDocs.close();
      testTermDocs.close();

      // compare term positions

      TermPositions testTermPositions = testReader.termPositions(testTermEnum.term());
      TermPositions aprioriTermPositions = aprioriReader.termPositions(aprioriTermEnum.term());

      if (aprioriTermPositions != null) {

        for (int docIndex = 0; docIndex < aprioriReader.maxDoc(); docIndex++) {
          boolean hasNext = aprioriTermPositions.next();
          if (hasNext) {
            assertTrue(testTermPositions.next());

            assertEquals(aprioriTermPositions.freq(), testTermPositions.freq());


            for (int termPositionIndex = 0; termPositionIndex < aprioriTermPositions.freq(); termPositionIndex++) {
              int aprioriPos = aprioriTermPositions.nextPosition();
              int testPos = testTermPositions.nextPosition();

              if (aprioriPos != testPos) {
                assertEquals(aprioriPos, testPos);
              }


              assertEquals(aprioriTermPositions.isPayloadAvailable(), testTermPositions.isPayloadAvailable());
              if (aprioriTermPositions.isPayloadAvailable()) {
                assertEquals(aprioriTermPositions.getPayloadLength(), testTermPositions.getPayloadLength());
                byte[] aprioriPayloads = aprioriTermPositions.getPayload(new byte[aprioriTermPositions.getPayloadLength()], 0);
                byte[] testPayloads = testTermPositions.getPayload(new byte[testTermPositions.getPayloadLength()], 0);
                for (int i = 0; i < aprioriPayloads.length; i++) {
                  assertEquals(aprioriPayloads[i], testPayloads[i]);
                }
              }

            }
          }
        }

        aprioriTermPositions.close();
        testTermPositions.close();

      }
    }

    // compare term vectors and position vectors

    for (int documentNumber = 0; documentNumber < aprioriReader.numDocs(); documentNumber++) {

      if (documentNumber > 0) {
        assertNotNull(aprioriReader.getTermFreqVector(documentNumber, "b0"));
        assertNull(aprioriReader.getTermFreqVector(documentNumber, "b1"));

        assertNotNull(testReader.getTermFreqVector(documentNumber, "b0"));
        assertNull(testReader.getTermFreqVector(documentNumber, "b1"));

      }

      TermFreqVector[] aprioriFreqVectors = aprioriReader.getTermFreqVectors(documentNumber);
      TermFreqVector[] testFreqVectors = testReader.getTermFreqVectors(documentNumber);

      if (aprioriFreqVectors != null && testFreqVectors != null) {

        Arrays.sort(aprioriFreqVectors, new Comparator<TermFreqVector>() {
          public int compare(TermFreqVector termFreqVector, TermFreqVector termFreqVector1) {
            return termFreqVector.getField().compareTo(termFreqVector1.getField());
          }
        });
        Arrays.sort(testFreqVectors, new Comparator<TermFreqVector>() {
          public int compare(TermFreqVector termFreqVector, TermFreqVector termFreqVector1) {
            return termFreqVector.getField().compareTo(termFreqVector1.getField());
          }
        });

        assertEquals("document " + documentNumber + " vectors does not match", aprioriFreqVectors.length, testFreqVectors.length);

        for (int freqVectorIndex = 0; freqVectorIndex < aprioriFreqVectors.length; freqVectorIndex++) {
          assertTrue(Arrays.equals(aprioriFreqVectors[freqVectorIndex].getTermFrequencies(), testFreqVectors[freqVectorIndex].getTermFrequencies()));
          assertTrue(Arrays.equals(aprioriFreqVectors[freqVectorIndex].getTerms(), testFreqVectors[freqVectorIndex].getTerms()));

          if (aprioriFreqVectors[freqVectorIndex] instanceof TermPositionVector) {
            TermPositionVector aprioriTermPositionVector = (TermPositionVector) aprioriFreqVectors[freqVectorIndex];
            TermPositionVector testTermPositionVector = (TermPositionVector) testFreqVectors[freqVectorIndex];

            for (int positionVectorIndex = 0; positionVectorIndex < aprioriFreqVectors[freqVectorIndex].getTerms().length; positionVectorIndex++)
            {
              if (aprioriTermPositionVector.getOffsets(positionVectorIndex) != null) {
                assertTrue(Arrays.equals(aprioriTermPositionVector.getOffsets(positionVectorIndex), testTermPositionVector.getOffsets(positionVectorIndex)));
              }

              if (aprioriTermPositionVector.getTermPositions(positionVectorIndex) != null) {
                assertTrue(Arrays.equals(aprioriTermPositionVector.getTermPositions(positionVectorIndex), testTermPositionVector.getTermPositions(positionVectorIndex)));
              }
            }
          }

        }
      }

    }

    aprioriTermEnum.close();
    testTermEnum.close();

    aprioriReader.close();
    testReader.close();
  }
View Full Code Here

    // Separately count how many tokens are actually in the index:
    IndexReader reader = IndexReader.open(benchmark.getRunData().getDirectory(), true);
    assertEquals(NUM_DOCS, reader.numDocs());

    TermEnum terms = reader.terms();
    TermDocs termDocs = reader.termDocs();
    int totalTokenCount2 = 0;
    while(terms.next()) {
      termDocs.seek(terms.term());
      while(termDocs.next())
        totalTokenCount2 += termDocs.freq();
    }
    reader.close();
View Full Code Here

   * @param field  - the name of the command or null for all of them.
   */
  public void terms(String field) throws IOException {
    TreeMap<String,Integer> termMap = new TreeMap<String,Integer>();
    IndexReader indexReader = IndexReader.open(indexName, true);
    TermEnum terms = indexReader.terms();
    while (terms.next()) {
      Term term = terms.term();
      //message(term.field() + ":" + term.text() + " freq:" + terms.docFreq());
      //if we're either not looking by field or we're matching the specific field
      if ((field == null) || field.equals(term.field()))
        termMap.put(term.field() + ":" + term.text(), Integer.valueOf((terms.docFreq())));
    }

    Iterator<String> termIterator = termMap.keySet().iterator();
    for (int ii = 0; termIterator.hasNext() && ii < 100; ii++) {
      String termDetails = termIterator.next();
View Full Code Here

TOP

Related Classes of org.apache.lucene.index.TermEnum

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.