Package edu.stanford.nlp.ling.tokensregex

Source Code of edu.stanford.nlp.ling.tokensregex.MultiCoreMapNodePattern$StringSequenceAnnotationPattern

package edu.stanford.nlp.ling.tokensregex;

import edu.stanford.nlp.pipeline.ChunkAnnotationUtils;
import edu.stanford.nlp.pipeline.CoreMapAttributeAggregator;
import edu.stanford.nlp.util.CoreMap;
import edu.stanford.nlp.util.Interval;

import java.util.*;

/**
* Pattern for matching across multiple core maps.
*
* <p>
* This class allows for string matches across tokens.  It is not implemented efficiently
* (it basically creates a big pretend token and tries to do string match on that)
* so can be expensive to use.  Whenever possible, <code>SequencePattern</code> should be used instead.
* </p>
*
* @author Angel Chang
*/
public class MultiCoreMapNodePattern extends MultiNodePattern<CoreMap> {

  Map<Class, CoreMapAttributeAggregator> aggregators = CoreMapAttributeAggregator.getDefaultAggregators();
  NodePattern nodePattern;

  public MultiCoreMapNodePattern() {}

  public MultiCoreMapNodePattern(NodePattern nodePattern) {
    this.nodePattern = nodePattern;
  }

  public MultiCoreMapNodePattern(NodePattern nodePattern, Map<Class, CoreMapAttributeAggregator> aggregators) {
    this.nodePattern = nodePattern;
    this.aggregators = aggregators;
  }

  protected Collection<Interval<Integer>> match(List<? extends CoreMap> nodes, int start)
  {
    List<Interval<Integer>> matched = new ArrayList<Interval<Integer>>();
    int minEnd = start + minNodes;
    int maxEnd = nodes.size();
    if (maxNodes >= 0 && maxNodes + start < nodes.size()) {
      maxEnd = maxNodes + start;
    }
    for (int end = minEnd; end <= maxEnd; end++) {
      CoreMap chunk = ChunkAnnotationUtils.getMergedChunk(nodes, start, end, aggregators);
      if (nodePattern.match(chunk)) {
        matched.add(Interval.toInterval(start, end));
      }
    }
    return matched;
  }

  public static class StringSequenceAnnotationPattern extends MultiNodePattern<CoreMap> {
    Class textKey;
    PhraseTable phraseTable;

    public StringSequenceAnnotationPattern(Class textKey, Set<List<String>> targets, boolean ignoreCase) {
      this.textKey = textKey;
      phraseTable = new PhraseTable(false, ignoreCase, false);
      for (List<String> target:targets) {
        phraseTable.addPhrase(target);
        if (maxNodes < 0 || target.size() > maxNodes) maxNodes = target.size();
      }
    }

    public StringSequenceAnnotationPattern(Class textKey, Set<List<String>> targets) {
      this(textKey, targets, false);
    }

    public StringSequenceAnnotationPattern(Class textKey, Map<List<String>, Object> targets, boolean ignoreCase) {
      this.textKey = textKey;
      phraseTable = new PhraseTable(false, ignoreCase, false);
      for (List<String> target:targets.keySet()) {
        phraseTable.addPhrase(target, null, targets.get(target));
        if (maxNodes < 0 || target.size() > maxNodes) maxNodes = target.size();
      }
    }

    public StringSequenceAnnotationPattern(Class textKey, Map<List<String>, Object> targets) {
      this(textKey, targets, false);
    }

    protected Collection<Interval<Integer>> match(List<? extends CoreMap> nodes, int start) {
      PhraseTable.WordList words = new PhraseTable.TokenList(nodes, textKey);
      List<PhraseTable.PhraseMatch> matches = phraseTable.findMatches(words, start, nodes.size(), false);
      Collection<Interval<Integer>> intervals = new ArrayList<Interval<Integer>>(matches.size());
      for (PhraseTable.PhraseMatch match:matches) {
        intervals.add(match.getInterval());
      }
      return intervals;
    }

    public String toString() {
      return ":" + phraseTable;
    }
  }


}
TOP

Related Classes of edu.stanford.nlp.ling.tokensregex.MultiCoreMapNodePattern$StringSequenceAnnotationPattern

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.