Package opennlp.tools.tokenize

Examples of opennlp.tools.tokenize.TokenizerME.tokenize()


   */
  public SentencesToTree(String text, TokenizerModel model){
    /* Configure the tokenizer with preloaded model */
    Tokenizer tokenizer = new TokenizerME(model);
    /* tokens has an array of strings, where each string is a token */
    String s = spaces(tokenizer.tokenize(text));
    this.text = this.upperCase(s);
  }
 
  /**
   *
 
View Full Code Here


  // non-private for test cases
  String[] tokenize(String sentence) {
    TokenizerME tokenizer = new TokenizerME(tokenModel);
    String cleanString = sentence.replace('’', '\'')// this is the type of apostrophe that OpenNLP expects
    return tokenizer.tokenize(cleanString);
  }

  private String[] posTag(String[] tokens) {
    POSTaggerME posTagger = new POSTaggerME(posModel);
    return posTagger.tag(tokens);
View Full Code Here

    // this makes sure it doesn't get confused with output from the parser
    sent = untokenizedParenPattern1.matcher(sent).replaceAll("$1 $2");
    sent = untokenizedParenPattern2.matcher(sent).replaceAll("$1 $2");

    // get the tokenizer to break apart the sentence
    String[] tokens = tokenizer.tokenize(sent);

    // build a string to parse as well as a list of tokens
    StringBuffer sb = new StringBuffer();
    List<String> tokenList = new ArrayList<String>();
    for (int j = 0; j < tokens.length; j++) {
View Full Code Here

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.