Package org.sf.mustru.test

Source Code of org.sf.mustru.test.TestAnalyzers

package org.sf.mustru.test;

//import java.io.File;
import java.io.IOException;
import java.io.StringReader;
import java.util.ArrayList;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenStream;
//import org.apache.lucene.analysis.standard.StandardAnalyzer;
//import org.apache.lucene.analysis.StopAnalyzer;
//import org.apache.lucene.analysis.WhitespaceAnalyzer;
//import org.apache.lucene.analysis.standard.StandardAnalyzer;
//import org.sf.mustru.utils.StandardGapAnalyzer;
import org.sf.mustru.utils.StandardBgramAnalyzer;

//import com.aliasi.util.Files;

/**
* A Class similar to the AnalyzerUtils in Lucene in Action
*/

public class TestAnalyzers
{
  public static void main(String[] args
  {
  try
  {
   System.out.println("Started");
//  String[] stopwords = {};
// WhitespaceAnalyzer analyzer = new WhitespaceAnalyzer();
//  StandardAnalyzer analyzer = new StandardAnalyzer();
    StandardBgramAnalyzer analyzer = new StandardBgramAnalyzer();
    String str = "Alice was 1987 and 233,999.145$ beginning to get very tired of sitting-by-her sister on the bank, and of having nothing to do: once or twice she had peeped into the book her sister was reading, but it had no pictures or conversations in it, 'and what is the use of a book,' thought Alice 'without pictures or conversation?'";
    str = "We were on the east flank of Mt. Kilimanjaro -- at 19,342 feet the highest mountain in Africa. In the pre-dawn hours I locked my eyes onto the";;
//  String dirname = "/home/manuk/mustru_bak/test/data/tcat_testing/coffee";
  //String filename = "/home/manuk/html/junk/LA122489-0101.txt";
   // String str = Files.readFromFile( new File(filename) );
//    String str = "The quick brown fox ...";
//    String str = "This is a test of the emergency broadcast system";

    System.out.println("STRING: " + str);
    displayTokensWithDetails(analyzer, str);
    }
    catch (IOException ie) { System.out.println("IO Error " + ie.getMessage()); }
    System.out.println("Ended");
  }

  private static void displayTokensWithDetails(Analyzer analyzer, String text) throws IOException
  {
  //*-- get the list of tokens using the passed analyzer
    Token[] tokens = tokensFromAnalysis(analyzer, text);

    int position = 0;
    for (int i = 0; i < tokens.length; i++)
     {
      Token token = tokens[i];
      int increment = token.getPositionIncrement();
      if (increment > 0)
      { position = position + increment;
        System.out.println();
        System.out.print(position + ": ");
      }
      System.out.print("\t [" + token.termText() + ": " + token.type() + "] " + token.startOffset() + ":" + token.endOffset());
     
     } //*-- end of for
    System.out.println("");
  }

  /**
   * Use the passed analyzer to get a list of tokens from the text
   */
  private static Token[] tokensFromAnalysis(Analyzer analyzer, String text) throws IOException
  {
    TokenStream stream = analyzer.tokenStream("contents", new StringReader(text));
    ArrayList<Token> tokenList = new ArrayList<Token>(); Token token = null;
    while ( (token = stream.next()) != null) tokenList.add(token);
    Token[] tokens = new Token[tokenList.size()];
    for (int i = 0; i < tokens.length; i++) tokens[i] = tokenList.get(i);
    return (tokens);
    }

}
TOP

Related Classes of org.sf.mustru.test.TestAnalyzers

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.