Package edu.stanford.nlp.ling

Examples of edu.stanford.nlp.ling.CoreLabel


  private static Tree funkyFindLeafWithApproximateSpan(Tree root, String token, int index, int approximateness) {
    // System.err.println("Searching " + root + "\n  for " + token + " at position " + index + " (plus up to " + approximateness + ")");
    List<Tree> leaves = root.getLeaves();
    for (Tree leaf : leaves) {
      CoreLabel label = CoreLabel.class.cast(leaf.label());
      Integer indexInteger = label.get(CoreAnnotations.IndexAnnotation.class);
      if (indexInteger == null) continue;
      int ind = indexInteger - 1;
      if (token.equals(leaf.value()) && ind >= index && ind <= index + approximateness) {
        return leaf;
      }
View Full Code Here


    SieveCoreferenceSystem.logger.warning("RuleBasedCorefMentionFinder: Last resort: returning as head: " + leaves.get(fallback));
    return leaves.get(fallback); // last except for the added period.
  }

  private static CoreLabel initCoreLabel(String token) {
    CoreLabel label = new CoreLabel();
    label.set(CoreAnnotations.TextAnnotation.class, token);
    label.set(CoreAnnotations.ValueAnnotation.class, token);
    return label;
  }
View Full Code Here

        // make sure we annotate only valid POS tags
        if (containsValidPos(document, start, start + entry.regex.size())) {
          // annotate each matching token
          for (int i = start; i < start + entry.regex.size(); i++) {
            CoreLabel token = document.get(i);
            token.set(CoreAnnotations.AnswerAnnotation.class, entry.type);
          }
        }
        start++;
      }
    }
View Full Code Here

    for (int start = searchStart, end = document.size() - regex.size(); start <= end; start++) {
      boolean failed = false;
      for (int i = 0; i < rSize; i++) {
        Pattern pattern = regex.get(i);
        String exact = entry.exact.get(i);
        CoreLabel token = document.get(start + i);
        String NERType = token.get(CoreAnnotations.NamedEntityTagAnnotation.class);
        String currentType = token.get(CoreAnnotations.AnswerAnnotation.class);

        if (
            currentType != null ||
            (exact != null && ! (ignoreCase ? exact.equalsIgnoreCase(token.word()) : exact.equals(token.word()))) ||
            ! (entry.overwritableTypes.contains(NERType) || myLabels.contains(NERType))  ||
            ! pattern.matcher(token.word()).matches()  // last, as this is likely the expensive operation
            ) {
          failed = true;
          break;
        }
      }
View Full Code Here

  // This probably isn't needed now; everything is always a core label. But no-op.
  private static void convertToCoreLabels(Tree tree) {
    Label l = tree.label();
    if (! (l instanceof CoreLabel)) {
      CoreLabel cl = new CoreLabel();
      cl.setValue(l.value());
      tree.setLabel(cl);
    }

    for (Tree kid : tree.children()) {
      convertToCoreLabels(kid);
View Full Code Here

    Tree endLeaf = leaves.get(end - 1);
    return Trees.getLowestCommonAncestor(Arrays.asList(startLeaf, endLeaf), tree);
  }

  private static Tree findTreeWithSpan(Tree tree, int start, int end) {
    CoreLabel l = (CoreLabel) tree.label();
    if (l != null && l.has(CoreAnnotations.BeginIndexAnnotation.class) && l.has(CoreAnnotations.EndIndexAnnotation.class)) {
      int myStart = l.get(CoreAnnotations.BeginIndexAnnotation.class);
      int myEnd = l.get(CoreAnnotations.EndIndexAnnotation.class);
      if (start == myStart && end == myEnd){
        // found perfect match
        return tree;
      } else if (end < myStart) {
        return null;
View Full Code Here

     
      for (int i = 0; i < lines.length; i++) {
        if (lines[i].trim().length() < 1) {
          continue;
        }
        CoreLabel wi = new CoreLabel();
        String[] info = lines[i].split("\\s+");             
        wi.set(CoreAnnotations.AnswerAnnotation.class, info[0]);
        wi.set(CoreAnnotations.GoldAnswerAnnotation.class, info[0]);
        for (int j = 1; j < info.length; j++) {
          String[] bits = info[j].split(":");
          //wi.set(bits[0], bits[1]);
        }
//        System.err.println(wi);
View Full Code Here

        if (token.get(CoreAnnotations.NamedEntityTagAnnotation.class) == null)
          token.set(CoreAnnotations.NamedEntityTagAnnotation.class, classifier.flags.backgroundSymbol);
      }

      for (int start = 0; start < tokens.size(); start++) {
        CoreLabel token = tokens.get(start);
        String answerType = token.get(CoreAnnotations.AnswerAnnotation.class);
        if (answerType == null) continue;
        String NERType = token.get(CoreAnnotations.NamedEntityTagAnnotation.class);

        int answerEnd = findEndOfAnswerAnnotation(tokens, start);
        int NERStart = findStartOfNERAnnotation(tokens, start);
        int NEREnd = findEndOfNERAnnotation(tokens, start);
View Full Code Here

      // first construct the actual nodes; keep them indexed by their index and copy count
      // sentences such as "I went over the river and through the woods" have
      // copys for "went" in the collapsed dependencies
      TwoDimensionalMap<Integer, Integer, IndexedWord> nodeMap = TwoDimensionalMap.hashMap();
      for (IntermediateNode in: nodes){
        CoreLabel token = sentence.get(in.index - 1); // index starts at 1!
        IndexedWord word;
        if (in.copyAnnotation > 0) {
          // TODO: if we make a copy wrapper CoreLabel, use it here instead
          word = new IndexedWord(new CoreLabel(token));
          word.setCopyCount(in.copyAnnotation);
        } else {
          word = new IndexedWord(token);
        }
       
View Full Code Here

  public static String joinWithOriginalWhiteSpace(List<CoreLabel> tokens) {
    if (tokens.isEmpty()) {
      return "";
    }

    CoreLabel lastToken = tokens.get(0);
    StringBuilder buffer = new StringBuilder(lastToken.word());

    for (int i = 1; i < tokens.size(); i++) {
      CoreLabel currentToken = tokens.get(i);
      int numSpaces = currentToken.beginPosition() - lastToken.endPosition();
      if (numSpaces < 0) {
        numSpaces = 0;
      }

      buffer.append(repeat(' ', numSpaces)).append(currentToken.word());
      lastToken = currentToken;
    }

    return buffer.toString();
  }
View Full Code Here

TOP

Related Classes of edu.stanford.nlp.ling.CoreLabel

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.