Examples of startOffset()


Examples of org.apache.lucene.analysis.tokenattributes.OffsetAttribute.startOffset()

    assertTrue(tf.incrementToken());
    assertTrue(termAtt.term() + " is not equal to " + "j",
            termAtt.term().equals("j") == true);
    assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1);
    assertTrue(typeAtt.type() + " is not equal to " + WikipediaTokenizer.CATEGORY, typeAtt.type().equals(WikipediaTokenizer.CATEGORY) == true);
    assertTrue(offsetAtt.startOffset() + " does not equal: " + 132, offsetAtt.startOffset() == 132);
    assertTrue(offsetAtt.endOffset() + " does not equal: " + 133, offsetAtt.endOffset() == 133);

    assertFalse(tf.incrementToken());
  }
}
View Full Code Here

Examples of org.apache.lucene.analysis.tokenattributes.OffsetAttribute.startOffset()

    assertTrue(tf.incrementToken());
    assertTrue(termAtt.term() + " is not equal to " + "j",
            termAtt.term().equals("j") == true);
    assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1);
    assertTrue(typeAtt.type() + " is not equal to " + WikipediaTokenizer.CATEGORY, typeAtt.type().equals(WikipediaTokenizer.CATEGORY) == true);
    assertTrue(offsetAtt.startOffset() + " does not equal: " + 132, offsetAtt.startOffset() == 132);
    assertTrue(offsetAtt.endOffset() + " does not equal: " + 133, offsetAtt.endOffset() == 133);

    assertFalse(tf.incrementToken());
  }
}
View Full Code Here

Examples of org.apache.lucene.analysis.tokenattributes.OffsetAttribute.startOffset()

  // LUCENE-1441
  public void testOffsets() throws Exception {
    TokenStream stream = new KeywordAnalyzer().tokenStream("field", new StringReader("abcd"));
    OffsetAttribute offsetAtt = stream.addAttribute(OffsetAttribute.class);
    assertTrue(stream.incrementToken());
    assertEquals(0, offsetAtt.startOffset());
    assertEquals(4, offsetAtt.endOffset());
  }
}
View Full Code Here

Examples of org.apache.lucene.analysis.tokenattributes.OffsetAttribute.startOffset()

      int lastEndOffset = 0;
      textFragmenter.start(text, tokenStream);

      TokenGroup tokenGroup=new TokenGroup(tokenStream);

      for (boolean next = tokenStream.incrementToken(); next && (offsetAtt.startOffset()< maxDocCharsToAnalyze);
            next = tokenStream.incrementToken())
      {
        if(  (offsetAtt.endOffset()>text.length())
          ||
          (offsetAtt.startOffset()>text.length())
View Full Code Here

Examples of org.apache.lucene.analysis.tokenattributes.OffsetAttribute.startOffset()

      for (boolean next = tokenStream.incrementToken(); next && (offsetAtt.startOffset()< maxDocCharsToAnalyze);
            next = tokenStream.incrementToken())
      {
        if(  (offsetAtt.endOffset()>text.length())
          ||
          (offsetAtt.startOffset()>text.length())
          )           
        {
          throw new InvalidTokenOffsetsException("Token "+ termAtt.term()
              +" exceeds length of provided text sized "+text.length());
        }
View Full Code Here

Examples of org.apache.lucene.analysis.tokenattributes.OffsetAttribute.startOffset()

    final OffsetAttribute offsetAtt = tk.addAttribute(OffsetAttribute.class);
    tk.reset();
    for (int start = 0; start < codePointCount; ++start) {
      for (int end = start + minGram; end <= Math.min(codePointCount, start + maxGram); ++end) {
        assertTrue(tk.incrementToken());
        assertEquals(0, offsetAtt.startOffset());
        assertEquals(s.length(), offsetAtt.endOffset());
        final int startIndex = Character.offsetByCodePoints(s, 0, start);
        final int endIndex = Character.offsetByCodePoints(s, 0, end);
        assertEquals(s.substring(startIndex, endIndex), termAtt.toString());
      }
View Full Code Here

Examples of org.apache.lucene.analysis.tokenattributes.OffsetAttribute.startOffset()

        }
        assertTrue(grams.incrementToken());
        assertArrayEquals(Arrays.copyOfRange(codePoints, start, end), toCodePoints(termAtt));
        assertEquals(1, posIncAtt.getPositionIncrement());
        assertEquals(1, posLenAtt.getPositionLength());
        assertEquals(offsets[start], offsetAtt.startOffset());
        assertEquals(offsets[end], offsetAtt.endOffset());
      }
    }
    assertFalse(grams.incrementToken());
    grams.end();
View Full Code Here

Examples of org.apache.lucene.analysis.tokenattributes.OffsetAttribute.startOffset()

        assertEquals(offsets[end], offsetAtt.endOffset());
      }
    }
    assertFalse(grams.incrementToken());
    grams.end();
    assertEquals(s.length(), offsetAtt.startOffset());
    assertEquals(s.length(), offsetAtt.endOffset());
  }

  public void testLargeInput() throws IOException {
    // test sliding
View Full Code Here

Examples of org.apache.lucene.analysis.tokenattributes.OffsetAttribute.startOffset()

      TokenStream ts = analyzer.tokenStream("foo", s);
      try {
        ts.reset();
        OffsetAttribute offsetAtt = ts.addAttribute(OffsetAttribute.class);
        while (ts.incrementToken()) {
          String highlightedText = s.substring(offsetAtt.startOffset(), offsetAtt.endOffset());
          for (int j = 0, cp = 0; j < highlightedText.length(); j += Character.charCount(cp)) {
            cp = highlightedText.codePointAt(j);
            assertTrue("non-letter:" + Integer.toHexString(cp), Character.isLetter(cp));
          }
        }
View Full Code Here

Examples of org.apache.lucene.analysis.tokenattributes.OffsetAttribute.startOffset()

      TokenStream ts = analyzer.tokenStream("foo", s);
      try {
        ts.reset();
        OffsetAttribute offsetAtt = ts.addAttribute(OffsetAttribute.class);
        while (ts.incrementToken()) {
          String highlightedText = s.substring(offsetAtt.startOffset(), offsetAtt.endOffset());
          for (int j = 0, cp = 0; j < highlightedText.length(); j += Character.charCount(cp)) {
            cp = highlightedText.codePointAt(j);
            assertTrue("non-letter:" + Integer.toHexString(cp), Character.isLetter(cp));
          }
        }
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.