Examples of edu.stanford.nlp.ling.HasWord

edu.stanford.nlp.ling.HasWord
Something that implements the HasWord interface knows about words. @author Christopher Manning

  }


  @SuppressWarnings("OverlyStrongTypeCast")
  private String getString(IN o) {
    if (o instanceof HasWord) {
      HasWord h = (HasWord) o;
      return h.word();
    } else if (o instanceof String) {
      return (String) o;
    } else if (o instanceof CoreMap) {
      return ((CoreMap) o).get(CoreAnnotations.TextAnnotation.class);
    } else {

View Full Code Here

    taggedWordList = new List[length];
    int terminalCount = 0;
    originalLabels = new CoreLabel[wordList.size()];
    for (int i = 0; i < length; i++) {
      taggedWordList[i] = new ArrayList<IntTaggedWord>(numTags);
      HasWord wordObject = wordList.get(i);
      if (wordObject instanceof CoreLabel) {
        originalLabels[i] = (CoreLabel) wordObject;
      }
      String wordStr = wordObject.word();


      //Word context (e.g., morphosyntactic info)
      String wordContextStr = null;
      if(wordObject instanceof HasContext) {
        wordContextStr = ((HasContext) wordObject).originalText();

View Full Code Here

      return;
    }
    List<? extends HasWord> tokens = parser.tokenize(arg);
    OutputStreamWriter osw = new OutputStreamWriter(outStream, "utf-8");
    for (int i = 0; i < tokens.size(); ++i) {
      HasWord word = tokens.get(i);
      if (i > 0) {
        osw.write(" ");
      }
      osw.write(word.toString());
    }
    osw.write("\n");
    osw.flush();
  }

View Full Code Here

      nextSentCarryover.clear();
      boolean seenBoundary = false;


      while (tokenizer.hasNext()) {


        HasWord token = tokenizer.next();
        if (splitTag != null) {
          String[] toks = splitTag.apply(token.word());
          token.setWord(toks[0]);
          if(toks.length == 2 && token instanceof HasTag) {
            //wsg2011: Some of the underlying tokenizers return old
            //JavaNLP labels.  We could convert to CoreLabel here, but
            //we choose a conservative implementation....
            ((HasTag) token).setTag(toks[1]);
          }
        }


        if (sentDelims.contains(token.word())) {
          seenBoundary = true;
        } else if (seenBoundary && !delimFollowers.contains(token.word())) {
          nextSentCarryover.add(token);
          break;
        }


        if ( ! (token.word().matches("\\s+") //|| 
                /*token.word().equals(PTBLexer.NEWLINE_TOKEN)*/)) {
          nextSent.add(token);
        }


        // If there are no words that can follow a sentence delimiter,

View Full Code Here

    List<IN> lastSentence = null;
    boolean insideRegion = false;
    for (IN o: words) {
      String word;
      if (o instanceof HasWord) {
        HasWord h = (HasWord) o;
        word = h.word();
      } else if (o instanceof String) {
        word = (String) o;
      } else if (o instanceof CoreMap) {
        word = ((CoreMap)o).get(CoreAnnotations.WordAnnotation.class);
      } else {

View Full Code Here

   * @return The sentence with tokens space separated.
   */
  private static String glueSentence(List<HasWord> sentence) {
    StringBuilder result = new StringBuilder();
    if ( ! sentence.isEmpty()) {
      HasWord word = sentence.get(0);
      String s = word.word();
      result.append(s);
      for (int i = 1, sz = sentence.size(); i < sz; i++) {
        word = sentence.get(i);
        s = word.word();
        result.append(" ").append(s);
      }
    }
    return result.toString();
  }

View Full Code Here

    // see if there is a quote at the end
    if (input.get(inputSize - 1).word().equals("\"")) {
      // alternate from the end
      begin = false;
      for (int i = inputSize - 1; i >= 0; i--) {
        HasWord hw = input.get(i);
        String tok = hw.word();
        if (tok.equals("\"")) {
          if (begin) {
            hw.setWord("``");
            begin = false;
          } else {
            hw.setWord("\'\'");
            begin = true;
          }
        } // otherwise leave it alone
        result.addFirst(hw);
      } // end loop
    } else {
      // alternate from the beginning
      begin = true;
      for (int i = 0; i < inputSize; i++) {
        HasWord hw = input.get(i);
        String tok = hw.word();
        if (tok.equals("\"")) {
          if (begin) {
            hw.setWord("``");
            begin = false;
          } else {
            hw.setWord("\'\'");
            begin = true;
          }
        } // otherwise leave it alone
        result.addLast(hw);
      } // end loop

View Full Code Here

   */
  public void percolateHeads(HeadFinder hf) {
    Label cwt = label();
    if (isLeaf()) {
      if (cwt instanceof HasWord) {
        HasWord w = (HasWord) cwt;
        if (w.word() == null) {
          w.setWord(cwt.value());
        }
      }
    } else {
      Tree[] kids = children();
      for (int i = 0; i < kids.length; i++) {

View Full Code Here

0 1

TOP

Related Classes of edu.stanford.nlp.ling.HasWord

cc.util.SentenceTokeniser$PlainTextIterator

edu.stanford.nlp.parser.lexparser.BiLexPCFGParser

edu.stanford.nlp.parser.lexparser.ExhaustivePCFGParser

edu.stanford.nlp.parser.lexparser.LexicalizedParserQuery

edu.stanford.nlp.parser.server.LexicalizedParserServer

edu.stanford.nlp.parser.shiftreduce.ShiftReduceParser

edu.stanford.nlp.process.DocumentPreprocessor

edu.stanford.nlp.process.DocumentPreprocessor$PlainTextIterator

edu.stanford.nlp.process.PTBEscapingProcessor

edu.stanford.nlp.process.WordToSentenceProcessor

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.