Package org.apache.lucene.analysis

Examples of org.apache.lucene.analysis.TokenStream.addAttribute()


    final String field = ( f == null) ? "contents" : f;
    if ( a == null) a = new StandardAnalyzer(Version.LUCENE_CURRENT);

    // [1] Parse query into separate words so that when we expand we can avoid dups
    TokenStream ts = a.reusableTokenStream( field, new StringReader( query));
    CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
    ts.reset();
    while (ts.incrementToken()) {
      String word = termAtt.toString();
      if ( already.add( word))
        top.add( word);
View Full Code Here


    final Set<String> already = new HashSet<String>(); // avoid dups   
    List<String> top = new LinkedList<String>(); // needs to be separately listed..

    // [1] Parse query into separate words so that when we expand we can avoid dups
    TokenStream ts = a.reusableTokenStream( field, new StringReader( query));
    CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
   
    while (ts.incrementToken()) {
      String word = termAtt.toString();
      if ( already.add( word))
        top.add( word);
View Full Code Here

    BooleanQuery bq=new BooleanQuery(DOMUtils.getAttribute(e,"disableCoord",false));
    bq.setMinimumNumberShouldMatch(DOMUtils.getAttribute(e,"minimumNumberShouldMatch",0));
    try
    {
      TokenStream ts = analyzer.reusableTokenStream(fieldName, new StringReader(text));
      CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
      Term term = null;
      ts.reset();
      while (ts.incrementToken()) {
        if (term == null)
        {
View Full Code Here

    String fieldName = DOMUtils.getAttributeWithInheritanceOrFail(e, "fieldName");
   
    try
    {
      TokenStream ts = analyzer.reusableTokenStream(fieldName, new StringReader(text));
      CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
      Term term = null;
      ts.reset();
        while (ts.incrementToken()) {
        if (term == null)
        {
View Full Code Here

        for (int i = 0; i < fields.length; i++)
            {
                try
                {
                  TokenStream ts = analyzer.reusableTokenStream(fields[i],new StringReader(stopWords));
                  CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
                  ts.reset();
                  while(ts.incrementToken()) {
                      stopWordsSet.add(termAtt.toString());
                  }
                  ts.end();
View Full Code Here

   
    private void addTerms(IndexReader reader,FieldVals f) throws IOException
    {
        if(f.queryString==null) return;
        TokenStream ts=analyzer.reusableTokenStream(f.fieldName,new StringReader(f.queryString));
        CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
       
        int corpusNumDocs=reader.numDocs();
        Term internSavingTemplateTerm =new Term(f.fieldName); //optimization to avoid constructing new Term() objects
        HashSet<String> processedTerms=new HashSet<String>();
        ts.reset();
View Full Code Here

   
    try
    {
      ArrayList<SpanQuery> clausesList=new ArrayList<SpanQuery>();
      TokenStream ts=analyzer.reusableTokenStream(fieldName,new StringReader(value));
      CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
     
      ts.reset();
      while (ts.incrementToken()) {
          SpanTermQuery stq=new SpanTermQuery(new Term(fieldName, termAtt.toString()));
          clausesList.add(stq);
View Full Code Here

    throws IOException
  {
       TokenStream ts = analyzer.reusableTokenStream(fieldName, r);
      int tokenCount=0;
      // for every token
      CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
      ts.reset();
      while (ts.incrementToken()) {
        String word = termAtt.toString();
        tokenCount++;
        if(tokenCount>maxNumTokensParsed)
View Full Code Here

                      String field,
                      Set<?> stop)
                      throws IOException
 
    TokenStream ts = a.reusableTokenStream( field, new StringReader( body));
    CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
   
    BooleanQuery tmp = new BooleanQuery();
    Set<String> already = new HashSet<String>(); // ignore dups
    while (ts.incrementToken()) {
      String word = termAtt.toString();
View Full Code Here

        List<String> terms = new ArrayList<String>();
        try {
          boolean hasMoreTokens = false;
         
          stream.reset();
          final CharTermAttribute termAtt = stream.addAttribute(CharTermAttribute.class);

          hasMoreTokens = stream.incrementToken();
          while (hasMoreTokens) {
            terms.add(termAtt.toString());
            hasMoreTokens = stream.incrementToken();
View Full Code Here

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.