Package edu.stanford.nlp.trees.tregex.gui

Source Code of edu.stanford.nlp.trees.tregex.gui.Tdiff

package edu.stanford.nlp.trees.tregex.gui;

import java.io.*;
import java.util.*;

import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.trees.Constituent;
import edu.stanford.nlp.trees.ConstituentFactory;
import edu.stanford.nlp.trees.LabeledConstituent;
import edu.stanford.nlp.trees.LabeledScoredTreeReaderFactory;
import edu.stanford.nlp.trees.Tree;
import edu.stanford.nlp.trees.TreeReader;
import edu.stanford.nlp.trees.TreeReaderFactory;
import edu.stanford.nlp.util.Generics;
import edu.stanford.nlp.util.IntPair;

/**
* Extracts the differences between the sets of constituents indicated by
* a pair of parse trees. This class requires trees with CoreLabels.
*
* @author Spence Green
*
*/
public class Tdiff {

  private static final ConstituentFactory cf = LabeledConstituent.factory();


  private Tdiff() {
  }


  /**
   * Marks bracketings in t2 not in t1 using the DoAnnotation field.
   * Returns a list of brackets in t1 not in t2.
   *
   * @param t1
   * @param t2
   * @return A list of brackets in t1 not in t2;
   */
  public static Set<Constituent> markDiff(Tree t1, Tree t2) {
//    if (t1 == null || t2 == null || ! t1.value().equals(t2.value())) {
//      System.err.printf("t1 value is %s; t2 value is %s; t1 is %s t2 is %s", t1.value(), t2.value(), t1, t2);
//    }
    Set<Constituent> t1Labels = (t1 == null) ? Generics.<Constituent>newHashSet() : t1.constituents(cf);
    if(t2 != null) {
      t2.setSpans();
      for(Tree subTree : t2) {
        if(subTree.isPhrasal()) {
          IntPair span = subTree.getSpan();
          Constituent c = cf.newConstituent(span.getSource(), span.getTarget(), subTree.label(), 0.0);
          if(t1Labels.contains(c)) {
            t1Labels.remove(c);
            ((CoreLabel) subTree.label()).set(CoreAnnotations.DoAnnotation.class, false);
          } else {
            ((CoreLabel) subTree.label()).set(CoreAnnotations.DoAnnotation.class, true);
          }
        }
      }
    }

    return t1Labels;
  }


  /**
   * @param args
   */
  public static void main(String[] args) {
    if(args.length != 2) {
      System.out.println("Usage: java Tdiff tree1 tree2");
      return;
    }

    File tree1Path = new File(args[0]);
    File tree2Path = new File(args[1]);

    try {
      TreeReaderFactory trf = new LabeledScoredTreeReaderFactory();
      TreeReader tR1 = trf.newTreeReader(new BufferedReader(new FileReader(tree1Path)));
      TreeReader tR2 = trf.newTreeReader(new BufferedReader(new FileReader(tree2Path)));

      Tree t1 = tR1.readTree();
      Tree t2 = tR2.readTree();

      Set<Constituent> t1Diff = markDiff(t1,t2);
      System.out.println(t2.pennString());
      System.out.println();
      for(Constituent c : t1Diff)
        System.out.println(c);


    } catch (FileNotFoundException e) {
      System.err.println("File not found!");
    } catch (IOException e) {
      System.err.println("Unable to read file!");
    }
  }

}
TOP

Related Classes of edu.stanford.nlp.trees.tregex.gui.Tdiff

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.