Package com.clearnlp.run

Source Code of com.clearnlp.run.PBPostProcess

/**
* Copyright (c) 2009/09-2012/08, Regents of the University of Colorado
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
*    list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright notice,
*    this list of conditions and the following disclaimer in the documentation
*    and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/**
* Copyright 2012/09-2013/04, 2013/11-Present, University of Massachusetts Amherst
* Copyright 2013/05-2013/10, IPSoft Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.clearnlp.run;

import java.io.File;
import java.io.PrintStream;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import org.kohsuke.args4j.Option;

import com.carrotsearch.hppc.IntOpenHashSet;
import com.clearnlp.constituent.CTLib;
import com.clearnlp.constituent.CTLibEn;
import com.clearnlp.constituent.CTNode;
import com.clearnlp.constituent.CTTree;
import com.clearnlp.propbank.PBArg;
import com.clearnlp.propbank.PBInstance;
import com.clearnlp.propbank.PBLib;
import com.clearnlp.propbank.PBLoc;
import com.clearnlp.reader.AbstractReader;
import com.clearnlp.util.UTHppc;
import com.clearnlp.util.UTOutput;
import com.clearnlp.util.list.SortedIntArrayList;
import com.clearnlp.util.pair.Pair;


public class PBPostProcess extends AbstractRun
{
  /** The error code for mis-aligned arguments. */ 
  static final public String ERR_ALIGN   = "A";
  /** The error code for cyclic relations. */ 
  static final public String ERR_CYCLIC  = "C";
  /** The error code for overlapping arguments. */ 
  static final public String ERR_OVERLAP = "O";
  /** The error code for no counterpart of light-verb. */ 
  static final public String ERR_LV      = "L";
 
  @Option(name="-i", usage="the PropBank file to be post-processed (required)", required=true, metaVar="<filename>")
  private String s_propFile;
  @Option(name="-o", usage="the post-processed PropBank file (default: null)", required=false, metaVar="<filename>")
  private String s_postFile = null;
  @Option(name="-t", usage="the directory path to Treebank files (required)", required=true, metaVar="<dirpath>")
  private String s_treeDir;
  @Option(name="-n", usage="if set, normalize co-indices of constituent trees", required=false)
  private boolean b_norm;
  @Option(name="-l", usage="language (default: "+AbstractReader.LANG_EN+")", required=false, metaVar="<language>")
  private String s_language = AbstractReader.LANG_EN;
 
  public PBPostProcess(String[] args)
  {
    initArgs(args);
    postProcess(s_propFile, s_postFile, s_treeDir, b_norm, s_language);
  }
 
  public void postProcess(String propFile, String postFile, String treeDir, boolean norm, String language)
  {
    List<PBInstance> instances = PBLib.getPBInstanceList(propFile, treeDir, norm);
    List<PBInstance> remove = new ArrayList<PBInstance>();
    mergeLightVerbs(instances);
    CTTree tree;
    PBArg  aDSP;
   
    for (PBInstance instance : instances)
    {
      System.out.println(instance.getKey());
      tree = instance.getTree();
     
      // LINK-SLC, LINK-PSV are found here
      if (language.equals(AbstractReader.LANG_EN))
        CTLibEn.preprocessTree(tree);
      else if (language.equals(AbstractReader.LANG_AR))
        ;
     
      // removes instances that do not align with the constiteunt tree
      if (isSkip(instance, tree))    // varies by languages
      {
        remove.add(instance);
        continue;
      }
     
      // sorts by arguments' terminal IDs
      instance.sortArgs();
     
      joinConcatenations(instance);
      fixCyclicLocs(instance);
      removeRedundantLocs(instance);
      // annotating NP(PRO) under S following the verb
      if (instance.isVerbPredicate())        // English only
        fixIllegalPROs(instance);
      aDSP = getArgDSP(instance);          // English only
      getLinks(instance);
      normalizeLinks(instance);          // varies by languages
      instance.sortArgs();
      removeRedundantLocs(instance);
      findOverlappingArguments(instance);
      addLinks(instance);
      raiseEmptyArguments(instance);        // English only
      if (aDSP != nullinstance.addArg(aDSP)// English only
    }
   
    instances.removeAll(remove);
   
    if (postFile == null)
      printInstances(instances, treeDir);
    else
      PBLib.printPBInstances(instances, postFile);
  }
 
 
  /**
   * Returns {@code true} if the specific PropBank instance is valid.
   * @param instance a PropBank instance
   * @param tree a constiteunt tree associated with the PropBank instance.
   * @return {@code true} if the specific PropBank instance is valid.
   */
  private boolean isSkip(PBInstance instance, CTTree tree)
  {
    if (PBLib.ILLEGAL_ROLESET.matcher(instance.roleset).find())
      return true;
   
    if (findMisalignedArgs(instance))
      return true;
   
    if (instance.isVerbPredicate() && tree.getTerminal(instance.predId).getParent().isPTag(CTLibEn.PTAG_PP))
      return true;
   
    return false;
  }
 
  private void mergeLightVerbs(List<PBInstance> instances)
  {
    Map<String,PBInstance> mNouns = new HashMap<String,PBInstance>();
    List<PBInstance> lVerbs = new ArrayList<PBInstance>();
    List<PBInstance> remove = new ArrayList<PBInstance>();
    PBInstance nInst;
    List<PBArg> args;
    PBArg rel;
   
    for (PBInstance instance : instances)
    {
      if (instance.isVerbPredicate())
      {
        if (instance.roleset.endsWith("LV"))
          lVerbs.add(instance);
      }
      else
        mNouns.put(instance.getKey(), instance);
    }
   
    for (PBInstance instance : lVerbs)
    {
      nInst = null;
      args  = new ArrayList<PBArg>();
     
      for (PBArg arg : instance.getArgs())
      {
        if (arg.label.endsWith("PRR"))
          nInst = mNouns.get(instance.getKey(arg.getLoc(0).terminalId));
        else if (arg.label.startsWith("LINK") || arg.isLabel(PBLib.PB_ARG0))
          args.add(arg);
      }
     
      if (nInst == null)
      {
        StringBuilder build = new StringBuilder();
       
        build.append(ERR_LV);
        build.append(":");
        build.append(" ");
        build.append(instance.toString());
       
        System.err.println(build.toString());
        remove.add(instance);
      }
      else
      {
        nInst.addArgs(args);
        rel = nInst.getFirstArg(PBLib.PB_REL);
        rel.addLoc(new PBLoc(instance.predId, 0, ","));
       
        args.clear();
       
        for (PBArg arg : instance.getArgs())
        {
          if (!arg.isLabel(PBLib.PB_REL) && !arg.label.endsWith("PRR"))
            args.add(arg);
        }
       
        instance.removeArgs(args);
      }
    }
   
    instances.removeAll(remove);
  }
 
  /** Returns {@code true} if the specific instance includes arguments misaligned to the constituent tree. */
  private boolean findMisalignedArgs(PBInstance instance)
  {
    CTTree tree  = instance.getTree();
    String label = null;
   
    if (!tree.isRange(instance.predId, 0) ||
      (instance.isVerbPredicate() && !tree.getTerminal(instance.predId).pTag.startsWith("VB")) ||
      (instance.isNounPredicate() && !tree.getTerminal(instance.predId).pTag.startsWith("NN")))
    {
      label = PBLib.PB_REL;
    }
    else
    {
      outer: for (PBArg arg : instance.getArgs())
      {
        for (PBLoc loc : arg.getLocs())
        {
          if (!tree.isRange(loc))
          {
            label = arg.label;
            break outer;
          }
         
          if (loc.isType("&"))
            loc.type = "*";
        }
      }
    }
   
    if (label != null)
    {
      StringBuilder build = new StringBuilder();
     
      build.append(ERR_ALIGN);
      build.append(":");
      build.append(label);
      build.append(" ");
      build.append(instance.toString());
     
      System.err.println(build.toString());
      return true;
    }
   
    return false;
  }
 
  /**
   * Joins concatenated locations by replacing them with higher nodes.
   * PRE: {@link PBInstance#sortArgs()} is called.
   */
  private void joinConcatenations(PBInstance instance)
  {
    SortedIntArrayList ids = new SortedIntArrayList();
    CTTree tree = instance.getTree();
    int terminalId, height;
    CTNode node, parent;
    List<PBLoc> lNew;
   
    for (PBArg arg : instance.getArgs())
    {
      if (arg.isLabel(PBLib.PB_REL))  continue;
      ids.clear();
     
      for (PBLoc loc : arg.getLocs())
      {
        if (!loc.isType("") && !loc.isType(","))  return;
        if (loc.height > 0)              return;
        ids.add(loc.terminalId);
      }
     
      lNew = new ArrayList<PBLoc>();
     
      while (!ids.isEmpty())
      {
        terminalId = ids.get(0);
        height     = 0;
        node       = tree.getNode(terminalId, height);
       
        while ((parent = node.getParent()) != null && !parent.isPTag(CTLib.PTAG_TOP) && UTHppc.isSubset(ids, parent.getSubTerminalIdSet()))
        {
          node = parent;
          height++;
        }
       
        lNew.add(new PBLoc(terminalId, height, ","));
        ids.removeAll(node.getSubTerminalIdSet());
      }
     
      if (lNew.size() < arg.getLocSize())
      {
        lNew.get(0).type = "";
        arg.replaceLocs(lNew);
      }
    }
  }
 
  /**
   * Fixes locations cyclic to its predicate.
   * PRE: {@link PBInstance#sortArgs()} is called.
   */
  private void fixCyclicLocs(PBInstance instance)
  {
    CTTree  tree  = instance.getTree();
    int    predId = instance.predId;
    boolean isCyc = false;
    CTNode  node, tmp;
   
    StringBuilder build = new StringBuilder();
    build.append(ERR_CYCLIC);
   
    for (PBArg arg : instance.getArgs())
    {
      if (arg.isLabel(PBLib.PB_REL))  continue;
     
      for (PBLoc loc : arg.getLocs())
      {
        if ((node = tree.getNode(loc)).getSubTerminalIdSet().contains(predId))
        {
          if (arg.isLabel(PBLib.PB_ARGM_MOD))
            loc.height = 0;
          else if (arg.isLabel(PBLib.PB_LINK_SLC) && node.isPTag(CTLibEn.PTAG_SBAR) && (tmp = node.getFirstChild("+WH.*")) != null)
            loc.set(tmp.getPBLoc(), loc.type);
          else if (node.isPTag(CTLibEn.PTAG_NP) && (tmp = node.getChild(0)).isPTag(CTLibEn.PTAG_NP) && !tmp.getSubTerminalIdSet().contains(predId))
            loc.height--;
          else
          {
            build.append(":");
            build.append(arg.label);
            isCyc = true;
            break;
          }
        }
      }
    }
   
    if (isCyc)
    {
      build.append(" ");
      build.append(instance.toString());
      System.err.println(build.toString());
    //  System.err.println(tree.toString(true,true));
    }
  }
 
  /**
   * Removes redundant or overlapping locations of this argument.
   * PRE: {@link PBInstance#sortArgs()} is called.
   */
  private void removeRedundantLocs(PBInstance instance)
  {
    List<PBLoc> lDel = new ArrayList<PBLoc>();
    PBLoc curr, next;
    int i, size;
   
    for (PBArg arg : instance.getArgs())
    {
      size = arg.getLocSize() - 1;
      lDel.clear();
     
      for (i=0; i<size; i++)
      {
        curr = arg.getLoc(i);
        next = arg.getLoc(i+1);
       
        if (curr.terminalId == next.terminalId)
          lDel.add(curr);
      }
     
      if (!lDel.isEmpty())
        arg.removeLocs(lDel);
    }
  }
 
  /** Fixes illegal PROs. */
  private void fixIllegalPROs(PBInstance instance)
  {
    CTTree tree = instance.getTree();
    CTNode node;
   
    for (PBArg arg : instance.getArgs())
    {
      if (arg.isLabel(PBLib.PB_REL))  continue;
     
      for (PBLoc loc : arg.getLocs())
      {
        if (loc.terminalId > instance.predId)
        {
          node = tree.getNode(loc);
         
          if (node.isEmptyCategoryRec() && node.hasFTag(CTLibEn.FTAG_SBJ) && node.getParent().isPTag(CTLibEn.PTAG_S))
            loc.height++;
        }
      }
    }
  }
 
  /**
   * Adds antecedents from manual annotation of LINK-*.
   * PRE: {@link PBInstance#sortArgs()} is called.
   */
  private void getLinks(PBInstance instance)
  {
    CTTree tree = instance.getTree();
    CTNode node, link;
    List<PBArg> lLinks = new ArrayList<PBArg>();
    PBLoc loc; int i;
   
    for (PBArg arg : instance.getArgs())
    {
      if (arg.label.startsWith("LINK"))
        lLinks.add(arg);
     
      for (i=arg.getLocSize()-1; i>0; i--)
      {
        loc  = arg.getLoc(i);
        node = tree.getNode(loc);
       
        if (node.pTag.startsWith("WH"))
        {
          link = CTLibEn.getComplementizer(node);

          if (link.getAntecedent() == null)
          {
            link.setAntecedent(tree.getNode(arg.getLoc(i-1)));
            break;
          }
        }
        else if (CTLibEn.isComplementizer(node))
        {
          if (node.getAntecedent() == null)
          {
            node.setAntecedent(tree.getNode(arg.getLoc(i-1)));
            break;
          }
        }
        else if (node.isEmptyCategoryRec() && loc.isType("*"))
        {
          link = node.getFirstTerminal();
         
          if (link.getAntecedent() == null)
            link.setAntecedent(tree.getNode(arg.getLoc(i-1)));
        }
      }
    }
   
    if (!lLinks.isEmpty())
      instance.removeArgs(lLinks);
  }
 
  /**
   * Normalizes links.
   * PRE: {@link CTTree#setPBLocs()} and {@link C} needs to be called before.
   */
  private void normalizeLinks(PBInstance instance)
  {
    List<PBLoc> lDel = new ArrayList<PBLoc>();
    CTTree tree = instance.getTree();
    CTNode curr, node, ante;
    PBLoc  cLoc; int i;
    List<CTNode> list;
    CTNode pred = tree.getTerminal(instance.predId);
   
    for (PBArg arg : instance.getArgs())
    {
      if (arg.isLabel(PBLib.PB_REL))  continue;
      lDel.clear();
     
      for (i=0; i<arg.getLocSize(); i++// size() gets changed dynamically
      {
        cLoc = arg.getLoc(i);
        curr = tree.getNode(cLoc);
       
        if (CTLibEn.isComplementizer(curr))
        {
          if ((ante = curr.getAntecedent()) != null)
            arg.addLoc(new PBLoc(ante.getPBLoc(), "*"));
         
          if ((node = getCoIndexedWHNode(curr)) != null)
            cLoc.set(node.getPBLoc(), "*");
        }
        else if (curr.pTag.startsWith("WH"))
        {
          if ((node = CTLibEn.getComplementizer(curr)) != null && (ante = node.getAntecedent()) != null)
            arg.addLoc(new PBLoc(ante.getPBLoc(), "*"));
        }
        else if (curr.isEmptyCategoryRec())    // *T*, *
        {
          cLoc.height = 0;
          node = tree.getTerminal(cLoc.terminalId);
         
          if ((ante = node.getAntecedent()) != null)
            arg.addLoc(new PBLoc(ante.getPBLoc(), "*"));
        }
        else if (!(list = curr.getIncludedEmptyCategory("\\*(ICH|RNR)\\*.*")).isEmpty())
        {
          for (CTNode ec : list)
          {
            lDel.add(new PBLoc(ec.getPBLoc(), ""));
           
            if ((ante = ec.getAntecedent()) != null)
            {
              if (ante.isDescendantOf(curr) || pred.isDescendantOf(ante))
                lDel.add(new PBLoc(ante.getPBLoc(), ""));
              else
                arg.addLoc(new PBLoc(ante.getPBLoc(), ";"));
            }
          }
        }
        else if (curr.isPTag(CTLibEn.PTAG_S) && (node = curr.getFirstChild("-"+CTLibEn.FTAG_SBJ)) != null && node.isEmptyCategoryRec() && curr.containsTags(CTLibEn.PTAG_VP))
        {
          node = node.getFirstTerminal();
         
          if (CTLibEn.RE_NULL.matcher(node.form).find() && (ante = node.getAntecedent()) != null && ante.hasFTag(CTLibEn.FTAG_SBJ) && !ante.isEmptyCategoryRec() && !existsLoc(instance, ante.getPBLoc()))
            arg.addLoc(new PBLoc(ante.getPBLoc(), "*"));
        }
      }
     
      // removes errorneous arguments
      for (PBLoc rLoc : lDel)
        arg.removeLoc(rLoc.terminalId, rLoc.height);
    }
  }
 
  /** Called by {@link PBLibEn#normalizeLinks(CTTree, PBArg, int)}. */
  private CTNode getCoIndexedWHNode(CTNode node)
  {
    CTNode parent = node.getParent();
   
    while (parent != null)
    {
      if (!parent.pTag.startsWith("WH"))
        break;
     
      if (parent.coIndex != -1)
        return parent;
     
      parent = parent.getParent();
    }
   
    return null;
  }
 
  private boolean existsLoc(PBInstance instance, PBLoc loc)
  {
    for (PBArg arg : instance.getArgs())
    {
      for (PBLoc l : arg.getLocs())
      {
        if (l.equals(loc.terminalId, loc.height))
          return true;
      }
    }
   
    return false;
  }
 
  private boolean findOverlappingArguments(PBInstance instance)
  {
    CTTree  tree = instance.getTree();
    PBArg ai, aj;
    IntOpenHashSet si, sj;
    int i, j, size = instance.getArgSize(), ni, nj;
    List<PBArg> lDel = new ArrayList<PBArg>();
   
    for (i=0; i<size; i++)
    {
      ai = instance.getArg(i);
      si = getTerminalIdSet(ai, tree);
      ni = si.size();
     
      for (j=i+1; j<size; j++)
      {
        aj = instance.getArg(j);
        sj = getTerminalIdSet(aj, tree);
        nj = sj.size();
       
        if (UTHppc.isSubset(si, sj) && ni != nj)
        {
          lDel.add(aj);
        }
        else if (UTHppc.isSubset(sj, si) && ni != nj)
        {
          lDel.add(ai);
        }
        else if (!UTHppc.intersection(si, sj).isEmpty())
        {
          StringBuilder build = new StringBuilder();
         
          build.append(ERR_OVERLAP);
          build.append(":");
          build.append(ai.label);
          build.append(":");
          build.append(aj.label);
          build.append(" ");
          build.append(instance.toString());
         
          System.err.println(build.toString());
        //  System.err.println(tree.toString(true,true));
          return true;
        }
      }
    }
   
    instance.removeArgs(lDel);
    return false;
  }
 
  /** Returns the set of terminal IDs associated with this argument. */
  private IntOpenHashSet getTerminalIdSet(PBArg arg, CTTree tree)
  {
    IntOpenHashSet set = new IntOpenHashSet();
   
    for (PBLoc loc : arg.getLocs())
    {
      if (!loc.isType(";"))
        set.addAll(tree.getNode(loc).getSubTerminalIdSet());
    }
   
    return set;
  }
 
  private void addLinks(PBInstance instance)
  {
    CTTree tree = instance.getTree();
    CTNode node, comp, ante = null;
    String label;
    List<PBArg> lAdd = new ArrayList<PBArg>();
    PBArg nArg;
   
    for (PBArg arg : instance.getArgs())
    {
      for (PBLoc loc : arg.getLocs())
      {
        node  = tree.getNode(loc);
        label = null;
       
        if (node.pTag.startsWith("WH"))
        {
          if ((comp = CTLibEn.getComplementizer(node)) != null && (ante = comp.getAntecedent()) != null)
            label = PBLib.PB_LINK_SLC;
        }
        else if (node.isEmptyCategory())
        {
          if ((ante = node.getAntecedent()) != null)
          {
            if (node.form.equals(CTLibEn.EC_NULL))
              label = PBLib.PB_LINK_PSV;
            else if (node.form.equals(CTLibEn.EC_PRO))
              label = PBLib.PB_LINK_PRO;
          }
        }
       
        if (label != null)
        {
          nArg = new PBArg();
          nArg.label = label;
          nArg.addLoc(new PBLoc(ante.getPBLoc(), ""));
          nArg.addLoc(new PBLoc(node.getPBLoc(), "*"));
         
          lAdd.add(nArg);
        }
      }
    }
   
    instance.addArgs(lAdd);
  }
 
  private void raiseEmptyArguments(PBInstance instance)
  {
    CTTree tree = instance.getTree();
    CTNode node, parent;
    int i, size;
    PBLoc loc;
   
    for (PBArg arg : instance.getArgs())
    {
      if (arg.isLabel(PBLib.PB_REL))  continue;
      size = arg.getLocSize();
     
      for (i=0; i<size; i++)
      {
        loc  = arg.getLoc(i);
        node = tree.getNode(loc);
        parent = node.getParent();
       
        if (parent != null && !parent.isPTag(CTLib.PTAG_TOP) && parent.getChildrenSize() == 1)
          node = parent;
       
        loc.set(node.getPBLoc(), loc.type);
      }
    }
  }
 
  private void printInstances(List<PBInstance> instances, String treeDir)
  {
    String treePath = "", propPath;
    PrintStream fout = null;
   
    for (PBInstance instance : instances)
    {
      if (!treePath.equals(instance.treePath))
      {
        if (fout != nullfout.close();
        treePath = instance.treePath;
        propPath = treePath.substring(0, treePath.lastIndexOf(".")) + ".prop";
       
        if (new File(propPath).exists())
          System.err.println("Warning: '"+propPath+"' already exists");
       
        fout = UTOutput.createPrintBufferedFileStream(treeDir+File.separator+propPath);
      }
     
      fout.println(instance.toString());
    }
   
    if (fout != nullfout.close();
  }
 
  private PBArg getArgDSP(PBInstance instance)
  {
    CTTree tree = instance.getTree();
    CTNode pred = tree.getTerminal(instance.predId);
    Pair<CTNode,CTNode> pair = getESMPair(pred);
    if (pair == nullreturn null;
   
    Pair<PBArg,IntOpenHashSet> max = new Pair<PBArg,IntOpenHashSet>(null, new IntOpenHashSet());
    IntOpenHashSet set;
    CTNode prn = pair.o1;
    CTNode esm = pair.o2;
   
    for (PBArg arg : instance.getArgs())
    {
      if (!PBLib.isNumberedArgument(arg) || arg.isLabel(PBLib.PB_ARG0))
        continue;
     
      set = arg.getTerminalIdSet(tree);
     
      if (set.contains(esm.getTerminalId()))
      {
        max.set(arg, set);
        break;
      }
     
      if (arg.hasType(",") && max.o2.size() < set.size())
        max.set(arg, set);
    }
   
    if (max.o1 == nullreturn null;
    CTNode dsp = esm.getAntecedent();
    if (dsp == nulldsp = prn.getNearestAncestor("+S.*");
   
    if (dsp != null)
    {
      PBArg arg = new PBArg();
      arg.addLoc(dsp.getPBLoc());
      arg.label = max.o1.label+"-"+PBLib.PB_DSP;
      instance.removeArgs(max.o1.label);
     
      return arg;
    }
   
    return null;
  }
 
  private Pair<CTNode,CTNode> getESMPair(CTNode pred)
  {
    CTNode s = pred.getNearestAncestor("+S.*");
   
    if (s != null && s.getParent().isPTag(CTLibEn.PTAG_PRN))
    {
      CTNode next = pred.getNextSibling("+S|SBAR");
     
      if (next != null)
      {
        CTNode ec = getESM(next);
        if (ec != nullreturn new Pair<CTNode,CTNode>(s.getParent(), ec);
      }
    }
   
    return null;
  }
 
  private CTNode getESM(CTNode node)
  {
    if (node.isPTag(CTLibEn.PTAG_S))
      return getESMAux(node);
    else if (node.isPTag(CTLibEn.PTAG_SBAR))
    {
      if (node.getChildrenSize() == 2)
      {
        CTNode fst = node.getChild(0);
        CTNode snd = node.getChild(1);
       
        if (fst.isEmptyCategory() && fst.form.equals(CTLibEn.EC_ZERO))
          return getESMAux(snd);
      }
    }
   
    return null;
  }
 
  private CTNode getESMAux(CTNode node)
  {
    if (node.isEmptyCategoryRec())
    {
      CTNode ec = node.getFirstTerminal();
     
      if (ec != null && (ec.form.startsWith(CTLibEn.EC_TRACE) || ec.form.startsWith(CTLibEn.EC_ESM)))
        return ec;
    }
   
    return null;
  }
 
  static public void main(String[] args)
  {
    new PBPostProcess(args);
  //  new PBPostProcess().postProcess(propFile, postFile, treeDir, norm);
  }
}
TOP

Related Classes of com.clearnlp.run.PBPostProcess

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.