Package org.apache.lucene.analysis

Examples of org.apache.lucene.analysis.Token


    assertTrue("Iterator should have 1 attributes left", it.hasNext());
    assertSame("Second AttributeImpl from iterator should be typeAtt", typeAtt, it.next());
    assertFalse("Iterator should have 0 attributes left", it.hasNext());

    src = new AttributeSource();
    src.addAttributeImpl(new Token());
    // this should not add a new attribute as Token implements TermAttribute, too
    termAtt = (TermAttribute) src.addAttribute(TermAttribute.class);
    assertTrue("TermAttribute should be implemented by Token", termAtt instanceof Token);
    // get the Token attribute and check, that it is the only one
    it = src.getAttributeImplsIterator();
    Token tok = (Token) it.next();
    assertFalse("There should be only one attribute implementation instance", it.hasNext());
   
    termAtt.setTermBuffer("TestTerm");
    assertEquals("Token should only printed once", "("+tok.toString()+")", src.toString());
  }
View Full Code Here


      if (expansions == null) {
        return true;
      }
      st = new StringTokenizer(expansions, ",");
      if (st.hasMoreTokens()) {
        currentRealToken = new Token(realOffsetAtt.startOffset(), realOffsetAtt.endOffset());
        currentRealToken.setTermBuffer(realTermAtt.term());
      }
     
      return true;
    } else {
View Full Code Here

            Field f = new Field("e", i + " Heres Johnny!", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS);
            f.setOmitNorms(true);
            document.add(f);
            if (i > 4) {
              final List<Token> tokens = new ArrayList<Token>(2);
              Token t = createToken("the", 0, 2, "text");
              t.setPayload(new Payload(new byte[]{1, 2, 3}));
              tokens.add(t);
              t = createToken("end", 3, 5, "text");
              t.setPayload(new Payload(new byte[]{2}));
              tokens.add(t);
              tokens.add(createToken("fin", 7, 9));
              TokenStream ts = new TokenStream(Token.TOKEN_ATTRIBUTE_FACTORY) {
                final AttributeImpl reusableToken = (AttributeImpl) addAttribute(CharTermAttribute.class);
                Iterator<Token> it = tokens.iterator();
View Full Code Here

    testReader.close();
  }

  private static Token createToken(String term, int start, int offset)
  {
    return new Token(term, start, offset);
  }
View Full Code Here

    return new Token(term, start, offset);
  }

  private static Token createToken(String term, int start, int offset, String type)
  {
    return new Token(term, start, offset, type);
  }
View Full Code Here

     * @throws Exception DOCUMENT ME!
     */
    public String reTokenize(File file) throws Exception {
        TokenStream ts = new StandardAnalyzer().tokenStream(new HTMLParser(file).getReader());

        Token token = null;

        while ((token = ts.next()) != null) {
            System.out.println("ReTokenizeFile.reTokenize(File): " + token.termText() + " " +
                token.startOffset() + " " + token.endOffset() + " " + token.type());
        }

        return file.getAbsolutePath();
    }
View Full Code Here

                     Document aDoc = createDocument(parent, getNamespaceMappings(), getIndex().getIndexFormatVersion());
                     try
                     {
                        // find the right fields to transfer
                        Fieldable[] fields = aDoc.getFieldables(FieldNames.PROPERTIES);
                        Token t = new Token();
                        for (int k = 0; k < fields.length; k++)
                        {
                           Fieldable field = fields[k];
                           // assume properties fields use
                           // SingleTokenStream
                           t = field.tokenStreamValue().next(t);
                           String value = new String(t.termBuffer(), 0, t.termLength());
                           if (value.startsWith(namePrefix))
                           {
                              // extract value
                              value = value.substring(namePrefix.length());
                              // create new named value
                              QPath p = getRelativePath(state, propState);
                              String path = getNamespaceMappings().translatePath(p);
                              value = FieldNames.createNamedValue(path, value);
                              t.setTermBuffer(value);
                              doc.add(new Field(field.name(), new SingletonTokenStream(t)));
                              doc.add(new Field(FieldNames.AGGREGATED_NODE_UUID, parent.getIdentifier(),
                                 Field.Store.NO, Field.Index.NOT_ANALYZED_NO_NORMS));
                           }
                        }
View Full Code Here

                     Document aDoc = createDocument(parent, getNamespaceMappings(), getIndex().getIndexFormatVersion());
                     try
                     {
                        // find the right fields to transfer
                        Fieldable[] fields = aDoc.getFieldables(FieldNames.PROPERTIES);
                        Token t = new Token();
                        for (int k = 0; k < fields.length; k++)
                        {
                           Fieldable field = fields[k];
                           // assume properties fields use
                           // SingleTokenStream
                           t = field.tokenStreamValue().next(t);
                           String value = new String(t.termBuffer(), 0, t.termLength());
                           if (value.startsWith(namePrefix))
                           {
                              // extract value
                              value = value.substring(namePrefix.length());
                              // create new named value
                              QPath p = getRelativePath(state, propState);
                              String path = getNamespaceMappings().translatePath(p);
                              value = FieldNames.createNamedValue(path, value);
                              t.setTermBuffer(value);
                              doc.add(new Field(field.name(), new SingletonTokenStream(t)));
                              doc.add(new Field(FieldNames.AGGREGATED_NODE_UUID, parent.getIdentifier(),
                                 Field.Store.NO, Field.Index.NOT_ANALYZED_NO_NORMS));
                           }
                        }
View Full Code Here

    private void addTermFrequencies(Reader r, Map termFreqMap, String fieldName)
            throws IOException {
        TokenStream ts = analyzer.tokenStream(fieldName, r);
        int tokenCount = 0;
        // for every token
        final Token reusableToken = new Token();
        for (Token nextToken = ts.next(reusableToken); nextToken != null; nextToken = ts.next(reusableToken)) {
            String word = nextToken.term();
            tokenCount++;
            if (tokenCount > maxNumTokensParsed) {
                break;
View Full Code Here

   {
      // term -> TermVectorOffsetInfo[]
      final SortedMap termMap = new TreeMap();
      Reader r = new StringReader(text);
      TokenStream ts = index.getTextAnalyzer().tokenStream("", r);
      Token t = new Token();
      try
      {
         while ((t = ts.next(t)) != null)
         {
            String termText = t.term();
            TermVectorOffsetInfo[] info = (TermVectorOffsetInfo[])termMap.get(termText);
            if (info == null)
            {
               info = new TermVectorOffsetInfo[1];
            }
            else
            {
               TermVectorOffsetInfo[] tmp = info;
               info = new TermVectorOffsetInfo[tmp.length + 1];
               System.arraycopy(tmp, 0, info, 0, tmp.length);
            }
            info[info.length - 1] = new TermVectorOffsetInfo(t.startOffset(), t.endOffset());
            termMap.put(termText, info);
         }
      }
      catch (IOException e)
      {
View Full Code Here

TOP

Related Classes of org.apache.lucene.analysis.Token

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.