Examples of opennlp.tools.tokenize.TokenizerME.tokenize()

Class opennlp.tools.tokenize.TokenizerME

Examples of opennlp.tools.tokenize.TokenizerME.tokenize()

opennlp.tools.tokenize.TokenizerME.tokenize()
Tokenize a String. @param s The string to be tokenized. @return A string array containing individual tokens as elements.

   */
  public SentencesToTree(String text, TokenizerModel model){
    /* Configure the tokenizer with preloaded model */
    Tokenizer tokenizer = new TokenizerME(model);
    /* tokens has an array of strings, where each string is a token */
    String s = spaces(tokenizer.tokenize(text));
    this.text = this.upperCase(s);
  }
  
  /**
   *

View Full Code Here


  // non-private for test cases
  String[] tokenize(String sentence) {
    TokenizerME tokenizer = new TokenizerME(tokenModel);
    String cleanString = sentence.replace('’', '\'');  // this is the type of apostrophe that OpenNLP expects
    return tokenizer.tokenize(cleanString);
  }


  private String[] posTag(String[] tokens) {
    POSTaggerME posTagger = new POSTaggerME(posModel);
    return posTagger.tag(tokens);

View Full Code Here

    // this makes sure it doesn't get confused with output from the parser
    sent = untokenizedParenPattern1.matcher(sent).replaceAll("$1 $2");
    sent = untokenizedParenPattern2.matcher(sent).replaceAll("$1 $2");


    // get the tokenizer to break apart the sentence
    String[] tokens = tokenizer.tokenize(sent);


    // build a string to parse as well as a list of tokens
    StringBuffer sb = new StringBuffer();
    List<String> tokenList = new ArrayList<String>();
    for (int j = 0; j < tokens.length; j++) {

View Full Code Here

TOP

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.