Package edu.stanford.nlp.ling

Examples of edu.stanford.nlp.ling.Word


    }
    String cat = t.label().value();
    String baseParentStr = tlpParams.treebankLanguagePack().basicCategory(parentStr);
    String baseGrandParentStr = tlpParams.treebankLanguagePack().basicCategory(grandParentStr);
    if (t.isLeaf()) {
      return tf.newLeaf(new Word(t.label().value()));
    }
    String word = t.headTerminal(hf).value();
    if (t.isPreTerminal()) {
      nonTerms.incrementCount(t.label().value());
    } else {
View Full Code Here


    assert(ptbInputs.length == ptbGold.length);
    for (int sent = 0; sent < ptbInputs.length; sent++) {
      PTBTokenizer<Word> ptbTokenizer = PTBTokenizer.newPTBTokenizer(new StringReader(ptbInputs[sent]));
      int i = 0;
      while (ptbTokenizer.hasNext()) {
        Word w = ptbTokenizer.next();
        try {
          assertEquals("PTBTokenizer problem", ptbGold[sent][i], w.value());
        } catch (ArrayIndexOutOfBoundsException aioobe) {
          // the assertion below outside the loop will fail
        }
        i++;
      }
View Full Code Here

    assert(mtInputs.length == mtGold.length);
    for (int sent = 0; sent < mtInputs.length; sent++) {
      PTBTokenizer<Word> ptbTokenizer = PTBTokenizer.newPTBTokenizer(new StringReader(mtInputs[sent]));
      int i = 0;
      while (ptbTokenizer.hasNext()) {
        Word w = ptbTokenizer.next();
        try {
          assertEquals("PTBTokenizer problem on string " + sent + " token " + i, mtGold[sent][i], w.value());
        } catch (ArrayIndexOutOfBoundsException aioobe) {
          // the assertion below outside the loop will fail
        }
        i++;
      }
View Full Code Here

   *
   * @return the next token in the token stream, or null if none exists.
   */
  @Override
  protected Word getNext() {
    Word token = null;
    if (lexer == null) {
      return token;
    }
    try {
      token = lexer.next();
View Full Code Here

      return;
    }
    WhitespaceTokenizer tokenizer = new WhitespaceTokenizer(new InputStreamReader(new FileInputStream(args[args.length - 1]), "UTF-8"), args[0].equals("-cr"));
    PrintWriter pw = new PrintWriter(new OutputStreamWriter(System.out, "UTF-8"), true);
    while (tokenizer.hasNext()) {
      Word w = tokenizer.next();
      if (w == WhitespaceLexer.crValue) {
        pw.println("***CR***");
      } else {
        pw.println(w);
      }
View Full Code Here

          }

          // looks up tag name in list of known block-level tags
          String tagName = ws.substring(tagStartIndex, tagEndIndex + 1).toLowerCase();
          if (blockTags.contains(tagName)) {
            out.add(new Word("\n")); // mark newline for block-level tags
            justInsertedNewline = true;
          }
        }
      } else {
        out.add(w); // normal word
View Full Code Here

      // store back cached position
      zzMarkedPos = zzMarkedPosL;

      switch (zzAction < 0 ? zzAction : ZZ_ACTION[zzAction]) {
        case 1:
          { return new Word(yytext());
          }
        case 4: break;
        case 2:
          { return crValue;
          }
View Full Code Here

* @author Jenny Finkel
*/
public class WordTokenFactory implements LexedTokenFactory<Word> {

  public Word makeToken(String str, int begin, int length) {
    return new Word(str, begin, begin+length);
  }
View Full Code Here

  public Word next() throws IOException {
    String nx = lexer.next();
    if (nx == null) {
      return null;
    } else {
      return new Word(nx);
    }
  }
View Full Code Here

  public Word stem(Word w) {
    try {
      lexer.yyreset(new StringReader(w.value()));
      lexer.yybegin(Morpha.any);
      String wordRes = lexer.next();
      return new Word(wordRes);
    } catch (Exception e) {
      e.printStackTrace();
    }
    return w;
  }
View Full Code Here

TOP

Related Classes of edu.stanford.nlp.ling.Word

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.