Package edu.stanford.nlp.trees

Source Code of edu.stanford.nlp.trees.DateTreeTransformer

package edu.stanford.nlp.trees;

import edu.stanford.nlp.trees.tregex.TregexMatcher;
import edu.stanford.nlp.trees.tregex.TregexPattern;
import edu.stanford.nlp.trees.tregex.TregexPatternCompiler;

/**
* Flattens the following two structures:
* <br>
* (NP (NP (NNP Month) (CD Day) )
* (, ,)
* (NP (CD Year) ))
* <br>
* becomes
* <br>
* (NP (NNP Month) (CD Day) (, ,) (CD Year) )
* <br>
* (NP (NP (NNP Month) )
* (NP (CD Year) ))
* <br>
* becomes
* <br>
* (NP (NNP Month) (CD Year))
*
* @author John Bauer
*/
public class DateTreeTransformer implements TreeTransformer {
  static final String MONTH_REGEX = "January|February|March|April|May|June|July|August|September|October|November|December|Jan\\.|Feb\\.|Mar\\.|Apr\\.|Aug\\.|Sep\\.|Sept\\.|Oct\\.|Nov\\.|Dec\\.";
  static final TregexPattern tregexMonthYear = TregexPatternCompiler.defaultCompiler.compile("NP=root <1 (NP <: (NNP=month <: /" + MONTH_REGEX + "/)) <2 (NP=yearnp <: (CD=year <: __)) : =root <- =yearnp");
  static final TregexPattern tregexMonthDayYear = TregexPatternCompiler.defaultCompiler.compile("NP=root <1 (NP=monthdayroot <1 (NNP=month <: /" + MONTH_REGEX +"/) <2 (CD=day <: __)) <2 (/^,$/=comma <: /^,$/) <3 (NP=yearroot <: (CD=year <: __)) : (=root <- =yearroot) : (=monthdayroot <- =day)");

  public Tree transformTree(Tree t) {
    TregexMatcher matcher = tregexMonthYear.matcher(t);
    while (matcher.find()) {
      Tree root = matcher.getNode("root");
      Tree month = matcher.getNode("month");
      Tree year = matcher.getNode("year");
      Tree[] children = new Tree[] {month, year};
      root.setChildren(children);
      matcher = tregexMonthYear.matcher(t);
    }
    matcher = tregexMonthDayYear.matcher(t);
    while (matcher.find()) {
      Tree root = matcher.getNode("root");
      Tree month = matcher.getNode("month");
      Tree day = matcher.getNode("day");
      Tree comma = matcher.getNode("comma");
      Tree year = matcher.getNode("year");
      Tree[] children = new Tree[] {month, day, comma, year};
      root.setChildren(children);
      matcher = tregexMonthDayYear.matcher(t);
    }
    return t;
  }
}
TOP

Related Classes of edu.stanford.nlp.trees.DateTreeTransformer

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.