Package org.apache.lucene.analysis.tokenattributes

Examples of org.apache.lucene.analysis.tokenattributes.CharTermAttribute


    BooleanQuery bq=new BooleanQuery(DOMUtils.getAttribute(e,"disableCoord",false));
    bq.setMinimumNumberShouldMatch(DOMUtils.getAttribute(e,"minimumNumberShouldMatch",0));
    try
    {
      TokenStream ts = analyzer.reusableTokenStream(fieldName, new StringReader(text));
      CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
      Term term = null;
      ts.reset();
      while (ts.incrementToken()) {
        if (term == null)
        {
          term = new Term(fieldName, termAtt.toString());
        } else
        {
//           create from previous to save fieldName.intern overhead
          term = term.createTerm(termAtt.toString());
        }
        bq.add(new BooleanClause(new TermQuery(term),BooleanClause.Occur.SHOULD));
      }
      ts.end();
      ts.close();
View Full Code Here


    String fieldName = DOMUtils.getAttributeWithInheritanceOrFail(e, "fieldName");
   
    try
    {
      TokenStream ts = analyzer.reusableTokenStream(fieldName, new StringReader(text));
      CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
      Term term = null;
      ts.reset();
        while (ts.incrementToken()) {
        if (term == null)
        {
          term = new Term(fieldName, termAtt.toString());
        } else
        {
//           create from previous to save fieldName.intern overhead
          term = term.createTerm(termAtt.toString());
        }
        tf.addTerm(term);
      }
      ts.end();
      ts.close();
View Full Code Here

        for (int i = 0; i < fields.length; i++)
            {
                try
                {
                  TokenStream ts = analyzer.reusableTokenStream(fields[i],new StringReader(stopWords));
                  CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
                  ts.reset();
                  while(ts.incrementToken()) {
                      stopWordsSet.add(termAtt.toString());
                  }
                  ts.end();
                  ts.close();
                }
                catch(IOException ioe)
View Full Code Here

   
    private void addTerms(IndexReader reader,FieldVals f) throws IOException
    {
        if(f.queryString==null) return;
        TokenStream ts=analyzer.reusableTokenStream(f.fieldName,new StringReader(f.queryString));
        CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
       
        int corpusNumDocs=reader.numDocs();
        Term internSavingTemplateTerm =new Term(f.fieldName); //optimization to avoid constructing new Term() objects
        HashSet<String> processedTerms=new HashSet<String>();
        ts.reset();
        while (ts.incrementToken())
        {
                String term = termAtt.toString();
          if(!processedTerms.contains(term))
          {
            processedTerms.add(term);
                ScoreTermQueue variantsQ=new ScoreTermQueue(MAX_VARIANTS_PER_TERM); //maxNum variants considered for any one term
                float minScore=0;
View Full Code Here

   
    try
    {
      ArrayList<SpanQuery> clausesList=new ArrayList<SpanQuery>();
      TokenStream ts=analyzer.reusableTokenStream(fieldName,new StringReader(value));
      CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
     
      ts.reset();
      while (ts.incrementToken()) {
          SpanTermQuery stq=new SpanTermQuery(new Term(fieldName, termAtt.toString()));
          clausesList.add(stq);
      }
      ts.end();
      ts.close();
      SpanOrQuery soq=new SpanOrQuery(clausesList.toArray(new SpanQuery[clausesList.size()]));
View Full Code Here

  }
 
  private void doTestStopPositons(StopFilter stpf, boolean enableIcrements) throws IOException {
    log("---> test with enable-increments-"+(enableIcrements?"enabled":"disabled"));
    stpf.setEnablePositionIncrements(enableIcrements);
    CharTermAttribute termAtt = stpf.getAttribute(CharTermAttribute.class);
    PositionIncrementAttribute posIncrAtt = stpf.getAttribute(PositionIncrementAttribute.class);
    stpf.reset();
    for (int i=0; i<20; i+=3) {
      assertTrue(stpf.incrementToken());
      log("Token "+i+": "+stpf);
      String w = English.intToEnglish(i).trim();
      assertEquals("expecting token "+i+" to be "+w,w,termAtt.toString());
      assertEquals("all but first token must have position increment of 3",enableIcrements?(i==0?1:3):1,posIncrAtt.getPositionIncrement());
    }
    assertFalse(stpf.incrementToken());
    stpf.end();
    stpf.close();
View Full Code Here

    throws IOException
  {
       TokenStream ts = analyzer.reusableTokenStream(fieldName, r);
      int tokenCount=0;
      // for every token
      CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
      ts.reset();
      while (ts.incrementToken()) {
        String word = termAtt.toString();
        tokenCount++;
        if(tokenCount>maxNumTokensParsed)
        {
          break;
        }
View Full Code Here

                      String field,
                      Set<?> stop)
                      throws IOException
 
    TokenStream ts = a.reusableTokenStream( field, new StringReader( body));
    CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
   
    BooleanQuery tmp = new BooleanQuery();
    Set<String> already = new HashSet<String>(); // ignore dups
    while (ts.incrementToken()) {
      String word = termAtt.toString();
      // ignore opt stop words
      if ( stop != null &&
         stop.contains( word)) continue;
      // ignore dups
      if ( ! already.add( word)) continue;
View Full Code Here

        List<String> terms = new ArrayList<String>();
        try {
          boolean hasMoreTokens = false;
         
          stream.reset();
          final CharTermAttribute termAtt = stream.addAttribute(CharTermAttribute.class);

          hasMoreTokens = stream.incrementToken();
          while (hasMoreTokens) {
            terms.add(termAtt.toString());
            hasMoreTokens = stream.incrementToken();
          }
          processTerms(terms.toArray(new String[terms.size()]));
        } catch (IOException e) {
        }
View Full Code Here

      HashMap<String,ArrayIntList> terms = new HashMap<String,ArrayIntList>();
      int numTokens = 0;
      int numOverlapTokens = 0;
      int pos = -1;
     
      CharTermAttribute termAtt = stream.addAttribute(CharTermAttribute.class);
      PositionIncrementAttribute posIncrAttribute = stream.addAttribute(PositionIncrementAttribute.class);
      OffsetAttribute offsetAtt = stream.addAttribute(OffsetAttribute.class);
      stream.reset();
      while (stream.incrementToken()) {
        String term = termAtt.toString();
        if (term.length() == 0) continue; // nothing to do
//        if (DEBUG) System.err.println("token='" + term + "'");
        numTokens++;
        final int posIncr = posIncrAttribute.getPositionIncrement();
        if (posIncr == 0)
View Full Code Here

TOP

Related Classes of org.apache.lucene.analysis.tokenattributes.CharTermAttribute

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.