Package org.apache.lucene.analysis.snowball

Examples of org.apache.lucene.analysis.snowball.SnowballAnalyzer.tokenStream()


                                        org.apache.lucene.util.Version.LUCENE_CURRENT,
                                        "Spanish",
                                        SPANISH_STOP_WORDS);

                       
                        TokenStream tokenStream = analyzer.tokenStream(
                                        "content",
                                        new StringReader(indexCleanedOfHTMLTags));
                       
                        Token token = new Token();
View Full Code Here


      TokenStream ts ;
      TermAttribute termAtt;
     
      // Query data
      doc = (Document)ir.document(testDocId);
      ts = analyzer.tokenStream("title", new StringReader(doc.get("title")));
      termAtt = ts.addAttribute(TermAttribute.class)
      while (ts.incrementToken())
      {
        rTitle.add(termAtt.term());
      }
View Full Code Here

      while (ts.incrementToken())
      {
        rTitle.add(termAtt.term());
      }
     
      ts = analyzer.tokenStream("content", new StringReader(doc.get("content")));
      termAtt = ts.addAttribute(TermAttribute.class)
      while (ts.incrementToken())
      {
        rContent.add(termAtt.term());
      }
View Full Code Here

      while (ts.incrementToken())
      {
        rContent.add(termAtt.term());
      }
     
      ts = analyzer.tokenStream("desc", new StringReader(doc.get("desc")));
      termAtt = ts.addAttribute(TermAttribute.class)
      while (ts.incrementToken())
      {
        rDesc.add(termAtt.term());
      }
View Full Code Here

      {
        int docId = (Integer)trainIterator.next();
        doc = (Document)ir.document(docId);
               
        //********************** Title Similarity Score ***************
        ts = analyzer.tokenStream("title", new StringReader(doc.get("title")));
        termAtt = ts.addAttribute(TermAttribute.class);   
       
        // Construct a HashMap of Train record title
        titleMap = new HashMap<String , Integer>();
        titleNI = 0;
View Full Code Here

              titleSimScore += ((double)titleMap.get(tempToken) + (100 * titleVocabMap.get(tempToken)/titleLength))/(titleNI + 100);
          }
        }
       
        //********************** Description Similarity Score ****************
        ts = analyzer.tokenStream("desc", new StringReader(doc.get("desc")));
        termAtt = ts.addAttribute(TermAttribute.class)
               
        // Construct a HashMap of Train record description
        descMap = new HashMap<String , Integer>();
        descNI = 0;
View Full Code Here

            descSimScore += ((double)descMap.get(tempToken) + (100 * descVocabMap.get(tempToken))/descLength)/(descNI + 100);
          }
        }
       
        //********************** Content Similarity Score ****************
        ts = analyzer.tokenStream("content", new StringReader(doc.get("content")));
        termAtt = ts.addAttribute(TermAttribute.class)
               
        // Construct a HashMap of Train record content
        contentMap = new HashMap<String , Integer>();
        contentNI = 0;
View Full Code Here

   
    Iterator trainIterator = trainDocIds.iterator();
    while (trainIterator.hasNext())
    {
      doc = (Document)ir.document((Integer)trainIterator.next());
      ts = analyzer.tokenStream("title", new StringReader(doc.get("title")));
      termAtt = ts.addAttribute(TermAttribute.class);   
     
      while(ts.incrementToken())
      {
        tempToken = termAtt.term();
View Full Code Here

          else
            titleVocabMap.put(tempToken, 1);
        }
      }
     
      ts = analyzer.tokenStream("content", new StringReader(doc.get("content")));
      termAtt = ts.addAttribute(TermAttribute.class);   
     
      while(ts.incrementToken())
      {
        tempToken = termAtt.term();
View Full Code Here

          else
            contentVocabMap.put(tempToken, 1);
        }
      }
     
      ts = analyzer.tokenStream("desc", new StringReader(doc.get("desc")));
      termAtt = ts.addAttribute(TermAttribute.class);   
     
      while(ts.incrementToken())
      {
        tempToken = termAtt.term();
View Full Code Here

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.