Package org.apache.lucene.analysis

Examples of org.apache.lucene.analysis.TokenFilter


  public void testTokenReuse() throws IOException {
    Analyzer analyzer = new Analyzer() {
      @Override
      public TokenStream tokenStream(String fieldName, Reader reader) {
        return new TokenFilter(new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader)) {
          boolean first=true;
          AttributeSource.State state;

          @Override
          public boolean incrementToken() throws IOException {
View Full Code Here


      throw new IllegalArgumentException("logStream must not be null");

    return new Analyzer() {
      @Override
      public TokenStream tokenStream(final String fieldName, Reader reader) {
        return new TokenFilter(child.tokenStream(fieldName, reader)) {
          private int position = -1;
          private TermAttribute termAtt = addAttribute(TermAttribute.class);
          private PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class);
          private OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
          private TypeAttribute typeAtt = addAttribute(TypeAttribute.class);
View Full Code Here

      return child; // no need to wrap
 
    return new Analyzer() {
      @Override
      public TokenStream tokenStream(String fieldName, Reader reader) {
        return new TokenFilter(child.tokenStream(fieldName, reader)) {
          private int todo = maxTokens;
         
          @Override
          public boolean incrementToken() throws IOException {
            return --todo >= 0 ? input.incrementToken() : false;
View Full Code Here

      @Override
      public TokenStream tokenStream(String fieldName, Reader reader) {
        final ArrayList<AttributeSource.State> tokens = cache.get(fieldName);
        if (tokens == null) { // not yet cached
          final ArrayList<AttributeSource.State> tokens2 = new ArrayList<AttributeSource.State>();
          TokenStream tokenStream = new TokenFilter(child.tokenStream(fieldName, reader)) {

            @Override
            public boolean incrementToken() throws IOException {
              boolean hasNext = input.incrementToken();
              if (hasNext) tokens2.add(captureState());
View Full Code Here

  public void testTokenReuse() throws IOException {
    Analyzer analyzer = new Analyzer() {
      @Override
      public TokenStream tokenStream(String fieldName, Reader reader) {
        return new TokenFilter(new WhitespaceTokenizer(reader)) {
          boolean first=true;
          AttributeSource.State state;

          @Override
          public boolean incrementToken() throws IOException {
View Full Code Here

  public void testElision() throws Exception {
    String test = "Plop, juste pour voir l'embrouille avec O'brian. M'enfin.";
    Tokenizer tokenizer = new StandardTokenizer(TEST_VERSION_CURRENT, new StringReader(test));
    CharArraySet articles = new CharArraySet(TEST_VERSION_CURRENT, asSet("l", "M"), false);
    TokenFilter filter = new ElisionFilter(tokenizer, articles);
    List<String> tas = filter(filter);
    assertEquals("embrouille", tas.get(4));
    assertEquals("O'brian", tas.get(6));
    assertEquals("enfin", tas.get(7));
  }
View Full Code Here

  public void testInvalidOffsets() throws Exception {
    Analyzer analyzer = new Analyzer() {
      @Override
      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
        Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
        TokenFilter filters = new ASCIIFoldingFilter(tokenizer);
        filters = new NGramTokenFilter(TEST_VERSION_CURRENT, filters, 2, 2);
        return new TokenStreamComponents(tokenizer, filters);
      }
    };
    assertAnalyzesTo(analyzer, "mosfellsbær",
View Full Code Here

  public void testExceptionFromTokenStream() throws IOException {
    RAMDirectory dir = new MockRAMDirectory();
    IndexWriter writer = new IndexWriter(dir, new Analyzer() {

      public TokenStream tokenStream(String fieldName, Reader reader) {
        return new TokenFilter(new StandardTokenizer(reader)) {
          private int count = 0;

          public boolean incrementToken() throws IOException {
            if (count++ == 5) {
              throw new IOException();
View Full Code Here

  public void testInvalidOffsets() throws Exception {
    Analyzer analyzer = new Analyzer() {
      @Override
      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
        Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
        TokenFilter filters = new ASCIIFoldingFilter(tokenizer);
        filters = new EdgeNGramTokenFilter(Version.LUCENE_43, filters, EdgeNGramTokenFilter.Side.FRONT, 2, 15);
        return new TokenStreamComponents(tokenizer, filters);
      }
    };
    assertAnalyzesTo(analyzer, "mosfellsbær",
View Full Code Here

    final PositionIncrementAttribute posIncrAtt = t.getAttribute(PositionIncrementAttribute.class);

    t.setReader(new StringReader(uri));
    t.reset();

    final TokenFilter filter = new MailtoFilter(t);
    for (int i = 0; i < expectedStems.length; i++) {
        assertTrue("token " + i + " exists", filter.incrementToken());
        assertEquals(expectedStems[i], termAtt.toString());
        if (expectedTypes == null)
          assertEquals(uritype, typeAtt.type());
        else
          assertEquals(expectedTypes[i], typeAtt.type());
        if (expectedPosIncr != null)
          assertEquals(expectedPosIncr[i], posIncrAtt.getPositionIncrement());
    }
    filter.end();
    filter.close();
  }
View Full Code Here

TOP

Related Classes of org.apache.lucene.analysis.TokenFilter

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.