Package org.apache.lucene.analysis

Examples of org.apache.lucene.analysis.TokenStream.addAttribute()


    try {
      UIMATypeAwareAnalyzer analyzer = new UIMATypeAwareAnalyzer("/HmmTaggerAggregate.xml",
              "org.apache.uima.TokenAnnotation", "posTag");
      TokenStream ts = analyzer.tokenStream("text", new StringReader("the big brown fox jumped on the wood"));
      CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
      OffsetAttribute offsetAtt = ts.addAttribute(OffsetAttribute.class);
      TypeAttribute typeAttr = ts.addAttribute(TypeAttribute.class);
      PositionIncrementAttribute posAtt = ts.addAttribute(PositionIncrementAttribute.class);
      while (ts.incrementToken()) {
        assertNotNull(offsetAtt);
        assertNotNull(termAtt);
View Full Code Here


      UIMATypeAwareAnalyzer analyzer = new UIMATypeAwareAnalyzer("/HmmTaggerAggregate.xml",
              "org.apache.uima.TokenAnnotation", "posTag");
      TokenStream ts = analyzer.tokenStream("text", new StringReader("the big brown fox jumped on the wood"));
      CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
      OffsetAttribute offsetAtt = ts.addAttribute(OffsetAttribute.class);
      TypeAttribute typeAttr = ts.addAttribute(TypeAttribute.class);
      PositionIncrementAttribute posAtt = ts.addAttribute(PositionIncrementAttribute.class);
      while (ts.incrementToken()) {
        assertNotNull(offsetAtt);
        assertNotNull(termAtt);
        assertNotNull(posAtt);
View Full Code Here

              "org.apache.uima.TokenAnnotation", "posTag");
      TokenStream ts = analyzer.tokenStream("text", new StringReader("the big brown fox jumped on the wood"));
      CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
      OffsetAttribute offsetAtt = ts.addAttribute(OffsetAttribute.class);
      TypeAttribute typeAttr = ts.addAttribute(TypeAttribute.class);
      PositionIncrementAttribute posAtt = ts.addAttribute(PositionIncrementAttribute.class);
      while (ts.incrementToken()) {
        assertNotNull(offsetAtt);
        assertNotNull(termAtt);
        assertNotNull(posAtt);
        assertNotNull(typeAttr);
View Full Code Here

  @Test
  public void baseUIMAPayloadsAnalyzerStreamTest() {
    try {
      TokenStream ts = analyzer.tokenStream("text", new StringReader("the big brown fox jumped on the wood"));
      CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
      PayloadAttribute payloadAttribute = ts.addAttribute(PayloadAttribute.class);
      while (ts.incrementToken()) {
        assertNotNull(termAtt);
        assertNotNull(payloadAttribute);
        System.out.println("token '" + termAtt.toString() + "' has payload " + new String(payloadAttribute.getPayload().getData()));
View Full Code Here

  @Test
  public void baseUIMAPayloadsAnalyzerStreamTest() {
    try {
      TokenStream ts = analyzer.tokenStream("text", new StringReader("the big brown fox jumped on the wood"));
      CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
      PayloadAttribute payloadAttribute = ts.addAttribute(PayloadAttribute.class);
      while (ts.incrementToken()) {
        assertNotNull(termAtt);
        assertNotNull(payloadAttribute);
        System.out.println("token '" + termAtt.toString() + "' has payload " + new String(payloadAttribute.getPayload().getData()));
      }
View Full Code Here

    public List<String> getTermList(String contentText) {
        List<String> result = new ArrayList<String>(contentText.length() / 10);
       
        try {
            TokenStream stream = _analyzer.tokenStream("content", new StringReader(contentText));
            CharTermAttribute termAtt = (CharTermAttribute) stream.addAttribute(CharTermAttribute.class);

            stream.reset();
            while (stream.incrementToken()) {
                if (termAtt.length() > 0) {
                    String term = termAtt.toString();
View Full Code Here

  private static List<String> splitByTokenizer(String source, TokenizerFactory tokFactory){
    StringReader reader = new StringReader( source );
    TokenStream ts = loadTokenizer(tokFactory, reader);
    List<String> tokList = new ArrayList<String>();
    try {
      CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
      while (ts.incrementToken()){
        if( termAtt.length() > 0 )
          tokList.add( termAtt.toString() );
      }
    } catch (IOException e) {
View Full Code Here

   * @param analyzer The analyzer to use.
   */
  protected Set<String> getQueryTokenSet(String query, Analyzer analyzer) {
    final Set<String> tokens = new HashSet<String>();
    final TokenStream tokenStream = analyzer.tokenStream("", new StringReader(query));
    final CharTermAttribute termAtt = tokenStream.addAttribute(CharTermAttribute.class);
    try {
      tokenStream.reset();
      while (tokenStream.incrementToken()) {
        tokens.add(termAtt.toString());
      }
View Full Code Here

      String word = matcher.group(0);
      if (word.equals("AND") == false && word.equals("OR") == false) {
        try {
          stream = analyzer.reusableTokenStream("", new StringReader(word));
          // TODO: support custom attributes
          CharTermAttribute termAtt = stream.addAttribute(CharTermAttribute.class);
          FlagsAttribute flagsAtt = stream.addAttribute(FlagsAttribute.class);
          TypeAttribute typeAtt = stream.addAttribute(TypeAttribute.class);
          PayloadAttribute payloadAtt = stream.addAttribute(PayloadAttribute.class);
          PositionIncrementAttribute posIncAtt = stream.addAttribute(PositionIncrementAttribute.class);
          stream.reset();
View Full Code Here

      if (word.equals("AND") == false && word.equals("OR") == false) {
        try {
          stream = analyzer.reusableTokenStream("", new StringReader(word));
          // TODO: support custom attributes
          CharTermAttribute termAtt = stream.addAttribute(CharTermAttribute.class);
          FlagsAttribute flagsAtt = stream.addAttribute(FlagsAttribute.class);
          TypeAttribute typeAtt = stream.addAttribute(TypeAttribute.class);
          PayloadAttribute payloadAtt = stream.addAttribute(PayloadAttribute.class);
          PositionIncrementAttribute posIncAtt = stream.addAttribute(PositionIncrementAttribute.class);
          stream.reset();
          while (stream.incrementToken()) {
View Full Code Here

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.