Package edu.stanford.nlp.ling

Examples of edu.stanford.nlp.ling.HasWord


        for (int end = start + 1; (end < length - 1 && end - start <= op.testOptions.maxSpanForTags) || (start + 1 == end); end++) {
          StringBuilder word = new StringBuilder();
          //wsg: Feb 2010 - Appears to support character-level parsing
          for (int i = start; i < end; i++) {
            if (sentence.get(i) instanceof HasWord) {
              HasWord cl = sentence.get(i);
              word.append(cl.word());
            } else {
              word.append(sentence.get(i).toString());
            }
          }
          for (int state = 0; state < numStates; state++) {
View Full Code Here


        Tree wordNode = null;
        if (sentence != null) {
          StringBuilder word = new StringBuilder();
          for (int i = start; i < end; i++) {
            if (sentence.get(i) instanceof HasWord) {
              HasWord cl = (HasWord) sentence.get(i);
              word.append(cl.word());
            } else {
              word.append(sentence.get(i).toString());
            }
          }
          wordNode = tf.newLeaf(word.toString());

        } else if (lr != null) {
          List<LatticeEdge> latticeEdges = lr.getEdgesOverSpan(start, end);
          for (LatticeEdge edge : latticeEdges) {
            IntTaggedWord itw = new IntTaggedWord(edge.word, stateIndex.get(goal), wordIndex, tagIndex);

            float tagScore = (floodTags) ? -1000.0f : lex.score(itw, start, edge.word, null);
            if (matches(bestScore, tagScore + (float) edge.weight)) {
              wordNode = tf.newLeaf(edge.word);
              if(wordNode.label() instanceof CoreLabel) {
                CoreLabel cl = (CoreLabel) wordNode.label();
                cl.setBeginPosition(start);
                cl.setEndPosition(end);
              }
              break;
            }
          }
          if (wordNode == null) {
View Full Code Here

          if (leaf.label() instanceof HasIndex) {
            HasIndex hi = (HasIndex) leaf.label();
            hi.setIndex(wordIndex);
          }
          if (leaf.label() instanceof HasWord) {
            HasWord hw = (HasWord) leaf.label();
            hw.setWord(leaf.label().value());
          }
          wordIndex++;

          currentTree.addChild(leaf);
          // cdm: Note: this implementation just isn't as efficient as the old recursive descent parser (see 2008 code), where all the daughters are gathered before the tree is made....
View Full Code Here

  private boolean addSentenceFinalPunctIfNeeded(List<HasWord> sentence, int length) {
    int start = length - 3;
    if (start < 0) start = 0;
    TreebankLanguagePack tlp = op.tlpParams.treebankLanguagePack();
    for (int i = length - 1; i >= start; i--) {
      HasWord item = sentence.get(i);
      // An object (e.g., CoreLabel) can implement HasTag but not actually store
      // a tag so we need to check that there is something there for this case.
      // If there is, use only it, since word tokens can be ambiguous.
      String tag = null;
      if (item instanceof HasTag) {
        tag = ((HasTag) item).tag();
      }
      if (tag != null && ! tag.isEmpty()) {
        if (tlp.isSentenceFinalPunctuationTag(tag)) {
          return false;
        }
      } else {
        String str = item.word();
        if (tlp.isPunctuationWord(str)) {
          return false;
        }
      }
    }
View Full Code Here

  public void percolateHeads(HeadFinder hf) {
    Label nodeLabel = label();
    if (isLeaf()) {
      // Sanity check: word() is usually set by the TreeReader.
      if (nodeLabel instanceof HasWord) {
        HasWord w = (HasWord) nodeLabel;
        if (w.word() == null) {
          w.setWord(nodeLabel.value());
        }
      }

    } else {
      for (Tree kid : children()) {
View Full Code Here

   */
  @SuppressWarnings("unchecked")
  public <T> List<T> yield(List<T> y) {
    if (isLeaf()) {
      if(label() instanceof HasWord) {
        HasWord hw = (HasWord) label();
        hw.setWord(label().value());
      }
      y.add((T) label());

    } else {
      Tree[] kids = children();
View Full Code Here

  }

  public static State initialStateFromTaggedSentence(List<? extends HasWord> words) {
    List<Tree> preterminals = Generics.newArrayList();
    for (int index = 0; index < words.size(); ++index) {
      HasWord hw = words.get(index);

      CoreLabel wordLabel;
      String tag;
      if (hw instanceof CoreLabel) {
        wordLabel = (CoreLabel) hw;
        tag = wordLabel.tag();
        CoreLabel cl = (CoreLabel) hw;
      } else {
        wordLabel = new CoreLabel();
        wordLabel.setValue(hw.word());
        wordLabel.setWord(hw.word());
        if (!(hw instanceof HasTag)) {
          throw new IllegalArgumentException("Expected tagged words");
        }
        tag = ((HasTag) hw).tag();
        wordLabel.setTag(tag);
View Full Code Here

    return newPosProb.second();
  }

  public void printSamples(List samples, PrintStream out) {
    for (int i = 0; i < document.size(); i++) {
      HasWord word = (HasWord) document.get(i);
      String s = "null";
      if (word!=null) {
        s = word.word();
      }
      out.print(StringUtils.padOrTrim(s, 10));
      for (int j = 0; j < samples.size(); j++) {
        int[] sequence = (int[]) samples.get(j);
        out.print(" " + StringUtils.padLeft(sequence[i], 2));
View Full Code Here

        nextSent = null;
        return;
      }

      do {
        HasWord token = tokenizer.next();
        if (splitTag != null) {
          String[] toks = splitTag.apply(token.word());
          token.setWord(toks[0]);
          if (token instanceof Label) {
            ((Label) token).setValue(toks[0]);
          }
          if(toks.length == 2 && token instanceof HasTag) {
            //wsg2011: Some of the underlying tokenizers return old
            //JavaNLP labels.  We could convert to CoreLabel here, but
            //we choose a conservative implementation....
            ((HasTag) token).setTag(toks[1]);
          }
        }

        if (sentDelims.contains(token.word())) {
          seenBoundary = true;
        } else if (seenBoundary && !delimFollowers.contains(token.word())) {
          nextSentCarryover.add(token);
          break;
        }

        if ( ! (wsPattern.matcher(token.word()).matches() ||
                token.word().equals(PTBLexer.NEWLINE_TOKEN))) {
          nextSent.add(token);
        }

        // If there are no words that can follow a sentence delimiter,
        // then there are two cases.  In one case is we already have a
View Full Code Here

    // see if there is a quote at the end
    if (input.get(inputSize - 1).word().equals("\"")) {
      // alternate from the end
      begin = false;
      for (int i = inputSize - 1; i >= 0; i--) {
        HasWord hw = input.get(i);
        String tok = hw.word();
        if (tok.equals("\"")) {
          if (begin) {
            hw.setWord("``");
            begin = false;
          } else {
            hw.setWord("\'\'");
            begin = true;
          }
        } // otherwise leave it alone
        result.addFirst(hw);
      } // end loop
    } else {
      // alternate from the beginning
      begin = true;
      for (int i = 0; i < inputSize; i++) {
        HasWord hw = input.get(i);
        String tok = hw.word();
        if (tok.equals("\"")) {
          if (begin) {
            hw.setWord("``");
            begin = false;
          } else {
            hw.setWord("\'\'");
            begin = true;
          }
        } // otherwise leave it alone
        result.addLast(hw);
      } // end loop
View Full Code Here

TOP

Related Classes of edu.stanford.nlp.ling.HasWord

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.