Package edu.stanford.nlp.ling

Examples of edu.stanford.nlp.ling.CoreLabel


    List<CoreLabel> words = new ArrayList<CoreLabel>();
    String[] testWords = {"I", "think", "I", "'ll",
                          "go", "to", "Boston", "."};

    for (String word : testWords) {
      CoreLabel label = new CoreLabel(new Word(word));
      label.setWord(label.value());
      words.add(label);
    }

    tagger.tagCoreLabels(words);
View Full Code Here


        case MATCH_ONE_TOKEN_PHRASE_ONLY:
          if (tokens.size() > 1) return true;
          // fall through
        case MATCH_AT_LEAST_ONE_TOKEN:
          for (int i = start; i < end; i++) {
            CoreLabel token = tokens.get(i);
            String pos = token.get(CoreAnnotations.PartOfSpeechAnnotation.class);
            if (pos != null && validPosPattern.matcher(pos).matches()) {
              return true;
            }
          }
          return false;
View Full Code Here

   * also have tag annotations (the parser requires part-of-speech tags).
   *
   * @see #predict(edu.stanford.nlp.util.CoreMap)
   */
  public GrammaticalStructure predict(List<? extends HasWord> sentence) {
    CoreLabel sentenceLabel = new CoreLabel();
    List<CoreLabel> tokens = new ArrayList<>();

    int i = 1;
    for (HasWord wd : sentence) {
      CoreLabel label;
      if (wd instanceof CoreLabel) {
        label = (CoreLabel) wd;
        if (label.tag() == null)
          throw new IllegalArgumentException("Parser requires words " +
              "with part-of-speech tag annotations");
      } else {
        label = new CoreLabel();
        label.setValue(wd.word());
        label.setWord(wd.word());

        if (!(wd instanceof HasTag))
          throw new IllegalArgumentException("Parser requires words " +
              "with part-of-speech tag annotations");

        label.setTag(((HasTag) wd).tag());
      }

      label.setIndex(i);
      i++;

      tokens.add(label);
    }

View Full Code Here

   */
  protected List<ParserConstraint> constraints = null;

  private CoreLabel getCoreLabel(int labelIndex) {
    if (originalCoreLabels[labelIndex] != null) {
      CoreLabel terminalLabel = originalCoreLabels[labelIndex];
      if (terminalLabel.value() == null && terminalLabel.word() != null) {
        terminalLabel.setValue(terminalLabel.word());
      }
      return terminalLabel;
    }

    String wordStr = wordIndex.get(words[labelIndex]);
    CoreLabel terminalLabel = new CoreLabel();
    terminalLabel.setValue(wordStr);
    terminalLabel.setWord(wordStr);
    terminalLabel.setBeginPosition(beginOffsets[labelIndex]);
    terminalLabel.setEndPosition(endOffsets[labelIndex]);
    if (originalTags[labelIndex] != null) {
      terminalLabel.setTag(originalTags[labelIndex].tag());
    }
    return terminalLabel;
  }
View Full Code Here

            float tagScore = (floodTags) ? -1000.0f : lex.score(itw, start, edge.word, null);
            if (matches(bestScore, tagScore + (float) edge.weight)) {
              wordNode = tf.newLeaf(edge.word);
              if(wordNode.label() instanceof CoreLabel) {
                CoreLabel cl = (CoreLabel) wordNode.label();
                cl.setBeginPosition(start);
                cl.setEndPosition(end);
              }
              break;
            }
          }
          if (wordNode == null) {
            throw new RuntimeException("could not find matching word from lattice in parse reconstruction");
          }

        } else {
          throw new RuntimeException("attempt to get word when sentence and lattice are null!");
        }
        Tree tagNode = tf.newTreeNode(goalStr, Collections.singletonList(wordNode));
        tagNode.setScore(bestScore);
        if (originalTags[start] != null) {
          tagNode.label().setValue(originalTags[start].tag());
        }
        return tagNode;
      } else // normal lexicon is single words case
        IntTaggedWord tagging = new IntTaggedWord(words[start], tagIndex.indexOf(goalStr));
        String contextStr = getCoreLabel(start).originalText();
        float tagScore = lex.score(tagging, start, wordIndex.get(words[start]), contextStr);
        if (tagScore > Float.NEGATIVE_INFINITY || floodTags) {
          // return a pre-terminal tree
          CoreLabel terminalLabel = getCoreLabel(start);

          Tree wordNode = tf.newLeaf(terminalLabel);
          Tree tagNode = tf.newTreeNode(goalStr, Collections.singletonList(wordNode));
          tagNode.setScore(bestScore);
          if (terminalLabel.tag() != null) {
            tagNode.label().setValue(terminalLabel.tag());
          }
          if (tagNode.label() instanceof HasTag) {
            ((HasTag) tagNode.label()).setTag(tagNode.label().value());
          }
          return tagNode;
View Full Code Here

        sentence.set(CoreAnnotations.LineNumberAnnotation.class, lineNumber);
      }

      // Annotation sentence with section information
      // Assume section start and end appear as first and last tokens of sentence
      CoreLabel sentenceStartToken = sentenceTokens.get(0);
      CoreLabel sentenceEndToken = sentenceTokens.get(sentenceTokens.size()-1);

      CoreMap sectionStart = sentenceStartToken.get(CoreAnnotations.SectionStartAnnotation.class);
      if (sectionStart != null) {
        // Section is started
        sectionAnnotations = sectionStart;
      }
      if (sectionAnnotations != null) {
        // transfer annotations over to sentence
        ChunkAnnotationUtils.copyUnsetAnnotations(sectionAnnotations, sentence);
      }
      String sectionEnd = sentenceEndToken.get(CoreAnnotations.SectionEndAnnotation.class);
      if (sectionEnd != null) {
        sectionAnnotations = null;
      }
     
      if (docID != null) {
View Full Code Here

      IntTaggedWord tagging = new IntTaggedWord(words[start], tagIndex.indexOf(goalStr));
      String contextStr = getCoreLabel(start).originalText();
      float tagScore = lex.score(tagging, start, wordIndex.get(words[start]), contextStr);
      if (tagScore > Float.NEGATIVE_INFINITY || floodTags) {
        // return a pre-terminal tree
        CoreLabel terminalLabel = getCoreLabel(start);

        Tree wordNode = tf.newLeaf(terminalLabel);
        Tree tagNode = tf.newTreeNode(goalStr, Collections.singletonList(wordNode));
        if (originalTags[start] != null) {
          tagNode.label().setValue(originalTags[start].tag());
View Full Code Here

            System.err.println(toks.length);
            System.err.println(line);
            System.err.println(lineId);
            throw new RuntimeException(String.format("line %d: Morfette format is |word lemma tag|: |%s|", lineId, line));
          }
          CoreLabel cl = new CoreLabel();
          String word = toks[0];
          String lemma = toks[1];
          String tag = toks[2];
          cl.setWord(word);
          cl.setValue(word);
          cl.setLemma(lemma);
          cl.setTag(tag);
          nextList.add(cl);
        }

        // File is exhausted
        if (nextList.size() == 0) {
View Full Code Here

      extentTokens.add(initCoreLabel("It"));
      extentTokens.add(initCoreLabel("was"));
      final int ADDED_WORDS = 2;
      for (int i = m.startIndex; i < endIdx; i++) {
        // Add everything except separated dashes! The separated dashes mess with the parser too badly.
        CoreLabel label = tokens.get(i);
        if ( ! "-".equals(label.word())) {
          extentTokens.add(tokens.get(i));
        } else {
          approximateness++;
        }
      }
      extentTokens.add(initCoreLabel("."));

      // constrain the parse to the part we're interested in.
      // Starting from ADDED_WORDS comes from skipping "It was".
      // -1 to exclude the period.
      // We now let it be any kind of nominal constituent, since there
      // are VP and S ones
      ParserConstraint constraint = new ParserConstraint(ADDED_WORDS, extentTokens.size() - 1, Pattern.compile(".*"));
      List<ParserConstraint> constraints = Collections.singletonList(constraint);
      Tree tree = parse(extentTokens, constraints);
      convertToCoreLabels(tree)// now unnecessary, as parser uses CoreLabels?
      tree.indexSpans(m.startIndex - ADDED_WORDS)// remember it has ADDED_WORDS extra words at the beginning
      Tree subtree = findPartialSpan(tree, m.startIndex);
      // There was a possible problem that with a crazy parse, extentHead could be one of the added words, not a real word!
      // Now we make sure in findPartialSpan that it can't be before the real start, and in safeHead, we disallow something
      // passed the right end (that is, just that final period).
      Tree extentHead = safeHead(subtree, endIdx);
      assert(extentHead != null);
      // extentHead is a child in the local extent parse tree. we need to find the corresponding node in the main tree
      // Because we deleted dashes, it's index will be >= the index in the extent parse tree
      CoreLabel l = (CoreLabel) extentHead.label();
      Tree realHead = funkyFindLeafWithApproximateSpan(root, l.value(), l.get(CoreAnnotations.BeginIndexAnnotation.class), approximateness);
      assert(realHead != null);
      return realHead;
    }

    // If reparsing wasn't allowed, try to find a span in the tree
View Full Code Here

    return endLeaf;
  }

  /** Find the tree that covers the portion of interest. */
  private static Tree findPartialSpan(final Tree root, final int start) {
    CoreLabel label = (CoreLabel) root.label();
    int startIndex = label.get(CoreAnnotations.BeginIndexAnnotation.class);
    if (startIndex == start) {
      return root;
    }
    for (Tree kid : root.children()) {
      CoreLabel kidLabel = (CoreLabel) kid.label();
      int kidStart = kidLabel.get(CoreAnnotations.BeginIndexAnnotation.class);
      int kidEnd = kidLabel.get(CoreAnnotations.EndIndexAnnotation.class);
      if (kidStart <= start && kidEnd > start) {
        return findPartialSpan(kid, start);
      }
    }
    throw new RuntimeException("Shouldn't happen: " + start + " " + root);
View Full Code Here

TOP

Related Classes of edu.stanford.nlp.ling.CoreLabel

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.