Package org.apache.lucene.analysis.tokenattributes

Examples of org.apache.lucene.analysis.tokenattributes.TermAttribute


  @Override
  protected Query getFuzzyQuery(String field, String termStr, float minSimilarity)
      throws ParseException {
    // get Analyzer from superclass and tokenize the term
    TokenStream source = getAnalyzer().tokenStream(field, new StringReader(termStr));
    TermAttribute termAtt = source.addAttribute(TermAttribute.class);
    String nextToken = null;
    boolean multipleTokens = false;
   
    try {
      if (source.incrementToken()) {
        nextToken = termAtt.term();
      }
      multipleTokens = source.incrementToken();
    } catch (IOException e) {
      nextToken = null;
    }
View Full Code Here


  @Override
  protected Query getRangeQuery(String field, String part1, String part2, boolean inclusive)
      throws ParseException {
    // get Analyzer from superclass and tokenize the terms
    TokenStream source = getAnalyzer().tokenStream(field, new StringReader(part1));
    TermAttribute termAtt = source.addAttribute(TermAttribute.class);
    boolean multipleTokens = false;

    // part1
    try {
      if (source.incrementToken()) {
        part1 = termAtt.term();
      }
      multipleTokens = source.incrementToken();
    } catch (IOException e) {
      // ignore
    }
    try {
      source.close();
    } catch (IOException e) {
      // ignore
    }
    if (multipleTokens) {
      throw new ParseException("Cannot build RangeQuery with analyzer " + getAnalyzer().getClass()
          + " - tokens were added to part1");
    }

    // part2
    source = getAnalyzer().tokenStream(field, new StringReader(part2));
    termAtt = source.addAttribute(TermAttribute.class);
   
    try {
      if (source.incrementToken()) {
        part2 = termAtt.term();
      }
      multipleTokens = source.incrementToken();
    } catch (IOException e) {
      // ignore
    }
View Full Code Here

  {
    TermsFilter tf = new TermsFilter();
    String text = DOMUtils.getNonBlankTextOrFail(e);
    String fieldName = DOMUtils.getAttributeWithInheritanceOrFail(e, "fieldName");
    TokenStream ts = analyzer.tokenStream(fieldName, new StringReader(text));
    TermAttribute termAtt = ts.addAttribute(TermAttribute.class);
   
    try
    {
      Term term = null;
        while (ts.incrementToken()) {
        if (term == null)
        {
          term = new Term(fieldName, termAtt.term());
        } else
        {
//           create from previous to save fieldName.intern overhead
          term = term.createTerm(termAtt.term());
        }
        tf.addTerm(term);
      }
    }
    catch (IOException ioe)
View Full Code Here

        new String[]{"he", "abhorred", "accents"});
  }
 
  public void testFilterTokens() throws Exception {
    SnowballFilter filter = new SnowballFilter(new TestTokenStream(), "English");
    TermAttribute termAtt = filter.getAttribute(TermAttribute.class);
    OffsetAttribute offsetAtt = filter.getAttribute(OffsetAttribute.class);
    TypeAttribute typeAtt = filter.getAttribute(TypeAttribute.class);
    PayloadAttribute payloadAtt = filter.getAttribute(PayloadAttribute.class);
    PositionIncrementAttribute posIncAtt = filter.getAttribute(PositionIncrementAttribute.class);
    FlagsAttribute flagsAtt = filter.getAttribute(FlagsAttribute.class);
   
    filter.incrementToken();

    assertEquals("accent", termAtt.term());
    assertEquals(2, offsetAtt.startOffset());
    assertEquals(7, offsetAtt.endOffset());
    assertEquals("wrd", typeAtt.type());
    assertEquals(3, posIncAtt.getPositionIncrement());
    assertEquals(77, flagsAtt.getFlags());
View Full Code Here

    {
        stopWordsSet=new HashSet<String>();
        for (int i = 0; i < fields.length; i++)
            {
                TokenStream ts = analyzer.tokenStream(fields[i],new StringReader(stopWords));
                TermAttribute termAtt = ts.addAttribute(TermAttribute.class);
                try
                {
                  while(ts.incrementToken()) {
                      stopWordsSet.add(termAtt.term());
                  }
                }
                catch(IOException ioe)
                {
                    throw new ParserException("IoException parsing stop words list in "
View Full Code Here

public class TestReverseStringFilter extends BaseTokenStreamTestCase {
  public void testFilter() throws Exception {
    TokenStream stream = new WhitespaceTokenizer(
        new StringReader("Do have a nice day"));     // 1-4 length string
    ReverseStringFilter filter = new ReverseStringFilter(stream);
    TermAttribute text = filter.getAttribute(TermAttribute.class);
    assertTrue(filter.incrementToken());
    assertEquals("oD", text.term());
    assertTrue(filter.incrementToken());
    assertEquals("evah", text.term());
    assertTrue(filter.incrementToken());
    assertEquals("a", text.term());
    assertTrue(filter.incrementToken());
    assertEquals("ecin", text.term());
    assertTrue(filter.incrementToken());
    assertEquals("yad", text.term());
    assertFalse(filter.incrementToken());
  }
View Full Code Here

 
  public void testFilterWithMark() throws Exception {
    TokenStream stream = new WhitespaceTokenizer(new StringReader(
        "Do have a nice day")); // 1-4 length string
    ReverseStringFilter filter = new ReverseStringFilter(stream, '\u0001');
    TermAttribute text = filter
        .getAttribute(TermAttribute.class);
    assertTrue(filter.incrementToken());
    assertEquals("\u0001oD", text.term());
    assertTrue(filter.incrementToken());
    assertEquals("\u0001evah", text.term());
    assertTrue(filter.incrementToken());
    assertEquals("\u0001a", text.term());
    assertTrue(filter.incrementToken());
    assertEquals("\u0001ecin", text.term());
    assertTrue(filter.incrementToken());
    assertEquals("\u0001yad", text.term());
    assertFalse(filter.incrementToken());
  }
View Full Code Here

   
    try
    {
      ArrayList<SpanQuery> clausesList=new ArrayList<SpanQuery>();
      TokenStream ts=analyzer.tokenStream(fieldName,new StringReader(value));
      TermAttribute termAtt = ts.addAttribute(TermAttribute.class);
     
      while (ts.incrementToken()) {
          SpanTermQuery stq=new SpanTermQuery(new Term(fieldName, termAtt.term()));
          clausesList.add(stq);
      }
      SpanOrQuery soq=new SpanOrQuery(clausesList.toArray(new SpanQuery[clausesList.size()]));
      soq.setBoost(DOMUtils.getAttribute(e,"boost",1.0f));
      return soq;
View Full Code Here

    TeeSinkTokenFilter ttf = new TeeSinkTokenFilter(new WordTokenFilter(new WhitespaceTokenizer(new StringReader(test))));
    SinkTokenStream sink = ttf.newSinkTokenStream(sinkFilter);
   
    boolean seenDogs = false;

    TermAttribute termAtt = ttf.addAttribute(TermAttribute.class);
    TypeAttribute typeAtt = ttf.addAttribute(TypeAttribute.class);
    ttf.reset();
    while (ttf.incrementToken()) {
      if (termAtt.term().equals("dogs")) {
        seenDogs = true;
        assertTrue(typeAtt.type() + " is not equal to " + "D", typeAtt.type().equals("D") == true);
      } else {
        assertTrue(typeAtt.type() + " is not null and it should be", typeAtt.type().equals("word"));
      }
View Full Code Here

public class DelimitedPayloadTokenFilterTest extends LuceneTestCase {

  public void testPayloads() throws Exception {
    String test = "The quick|JJ red|JJ fox|NN jumped|VB over the lazy|JJ brown|JJ dogs|NN";
    DelimitedPayloadTokenFilter filter = new DelimitedPayloadTokenFilter(new WhitespaceTokenizer(new StringReader(test)));
    TermAttribute termAtt = filter.getAttribute(TermAttribute.class);
    PayloadAttribute payAtt = filter.getAttribute(PayloadAttribute.class);
    assertTermEquals("The", filter, termAtt, payAtt, null);
    assertTermEquals("quick", filter, termAtt, payAtt, "JJ".getBytes("UTF-8"));
    assertTermEquals("red", filter, termAtt, payAtt, "JJ".getBytes("UTF-8"));
    assertTermEquals("fox", filter, termAtt, payAtt, "NN".getBytes("UTF-8"));
View Full Code Here

TOP

Related Classes of org.apache.lucene.analysis.tokenattributes.TermAttribute

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.