Package com.clearnlp.dependency

Source Code of com.clearnlp.dependency.DEPLibEn

/**
* Copyright (c) 2009/09-2012/08, Regents of the University of Colorado
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
*    list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright notice,
*    this list of conditions and the following disclaimer in the documentation
*    and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/**
* Copyright 2012/09-2013/04, 2013/11-Present, University of Massachusetts Amherst
* Copyright 2013/05-2013/10, IPSoft Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.clearnlp.dependency;

import java.util.ArrayDeque;
import java.util.Deque;
import java.util.List;
import java.util.regex.Pattern;

import com.clearnlp.constituent.CTLibEn;
import com.clearnlp.dependency.srl.SRLArc;
import com.clearnlp.dependency.srl.SRLLib;
import com.clearnlp.morphology.MPLibEn;
import com.clearnlp.util.pair.Pair;

/**
* Dependency library for English.
* @since 1.0.0
* @author Jinho D. Choi ({@code jdchoi77@gmail.com})
*/
public class DEPLibEn extends DEPLib
{
  /** The dependency label for passive. */
  static public final String DEP_PASS  = "pass";
  /** The dependency label for subjects. */
  static public final String DEP_SUBJ  = "subj";
 
  /** The dependency label for adjectival complements. */
  static public final String DEP_ACOMP    = "acomp";
  /** The dependency label for adverbial clause modifiers. */
  static public final String DEP_ADVCL    = "advcl";
  /** The dependency label for adverbial modifiers. */
  static public final String DEP_ADVMOD    = "advmod";
  /** The dependency label for agents. */
  static public final String DEP_AGENT    = "agent";
  /** The dependency label for adjectival modifiers. */
  static public final String DEP_AMOD      = "amod";
  /** The dependency label for appositional modifiers. */
  static public final String DEP_APPOS    = "appos";
  /** The dependency label for attributes. */
  static public final String DEP_ATTR      = "attr";
  /** The dependency label for auxiliary verbs. */
  static public final String DEP_AUX      = "aux";
  /** The dependency label for passive auxiliary verbs. */
  static public final String DEP_AUXPASS    = DEP_AUX+DEP_PASS;
  /** The dependency label for coordinating conjunctions. */
  static public final String DEP_CC      = "cc";
  /** The dependency label for clausal complements. */
  static public final String DEP_CCOMP    = "ccomp";
  /** The dependency label for complementizers. */
  static public final String DEP_COMPLM    = "complm";
  /** The dependency label for conjuncts. */
  static public final String DEP_CONJ      = "conj";
  /** The dependency label for clausal subjects. */
  static public final String DEP_CSUBJ    = "c"+DEP_SUBJ;
  /** The dependency label for clausal passive subjects. */
  static public final String DEP_CSUBJPASS  = DEP_CSUBJ+DEP_PASS;
  /** The dependency label for unknown dependencies. */
  static public final String DEP_DEP      = "dep";
  /** The dependency label for determiners. */
  static public final String DEP_DET      = "det";
  /** The dependency label for direct objects. */
  static public final String DEP_DOBJ     = "dobj";
  /** The dependency label for expletives. */
  static public final String DEP_EXPL     = "expl";
  /** The dependency label for modifiers in hyphenation. */
  static public final String DEP_HMOD     = "hmod";
  /** The dependency label for hyphenation. */
  static public final String DEP_HYPH     = "hyph";
  /** The dependency label for indirect objects. */
  static public final String DEP_IOBJ     = "iobj";
  /** The dependency label for interjections. */
  static public final String DEP_INTJ      = "intj";
  /** The dependency label for markers. */
  static public final String DEP_MARK      = "mark";
  /** The dependency label for meta modifiers. */
  static public final String DEP_META      = "meta";
  /** The dependency label for negation modifiers. */
  static public final String DEP_NEG      = "neg";
  /** The dependency label for non-finite modifiers. */
  static public final String DEP_NFMOD    = "nfmod";
  /** The dependency label for infinitival modifiers. */
  static public final String DEP_INFMOD    = "infmod";
  /** The dependency label for noun phrase modifiers. */
  static public final String DEP_NMOD     = "nmod";
  /** The dependency label for noun compound modifiers. */
  static public final String DEP_NN      = "nn";
  /** The dependency label for noun phrase as adverbial modifiers. */
  static public final String DEP_NPADVMOD    = "npadvmod";
  /** The dependency label for nominal subjects. */
  static public final String DEP_NSUBJ    = "n"+DEP_SUBJ;
  /** The dependency label for nominal passive subjects. */
  static public final String DEP_NSUBJPASS  = DEP_NSUBJ+DEP_PASS;
  /** The dependency label for numeric modifiers. */
  static public final String DEP_NUM      = "num";
  /** The dependency label for elements of compound numbers. */
  static public final String DEP_NUMBER    = "number";
  /** The dependency label for object predicates. */
  static public final String DEP_OPRD      = "oprd";
  /** The dependency label for parataxis. */
  static public final String DEP_PARATAXIS   = "parataxis";
  /** The dependency label for participial modifiers. */
  static public final String DEP_PARTMOD    = "partmod";
  /** The dependency label for modifiers of prepositions. */
  static public final String DEP_PMOD     = "pmod";
  /** The dependency label for prepositional complements. */
  static public final String DEP_PCOMP     = "pcomp";
  /** The dependency label for objects of prepositions. */
  static public final String DEP_POBJ     = "pobj";
  /** The dependency label for possession modifiers. */
  static public final String DEP_POSS      = "poss";
  /** The dependency label for possessive modifiers. */
  static public final String DEP_POSSESSIVE   = "possessive";
  /** The dependency label for pre-conjuncts. */
  static public final String DEP_PRECONJ    = "preconj";
  /** The dependency label for pre-determiners. */
  static public final String DEP_PREDET    = "predet";
  /** The dependency label for prepositional modifiers. */
  static public final String DEP_PREP      = "prep";
  /** The dependency label for particles. */
  static public final String DEP_PRT       = "prt";
  /** The dependency label for punctuation. */
  static public final String DEP_PUNCT    = "punct";
  /** The dependency label for modifiers of quantifiers. */
  static public final String DEP_QMOD      = "qmod";
  /** The dependency label for quantifier phrase modifiers. */
  static public final String DEP_QUANTMOD    = "quantmod";
  /** The dependency label for relative clause modifiers. */
  static public final String DEP_RCMOD    = "rcmod";
  /** The dependency label for roots. */
  static public final String DEP_ROOT     = "root";
  /** The dependency label for open clausal modifiers. */
  static public final String DEP_XCOMP    = "xcomp";
  /** The dependency label for open clausal subjects. */
  static public final String DEP_XSUBJ    = "x"+DEP_SUBJ;
  /** The secondary dependency label for gapping relations. */
  static public final String DEP_GAP      = "gap";
  /** The secondary dependency label for referents. */
  static public final String DEP_REF      = "ref";
  /** The secondary dependency label for right node raising. */
  static public final String DEP_RNR      = "rnr";
 
  static public Pattern P_SBJ = Pattern.compile("^[nc]subj");
  static public Pattern P_OBJ = Pattern.compile("^[di]obj");
  static public Pattern P_AUX = Pattern.compile("^aux");
  static public Pattern P_NSUBJ = Pattern.compile("^nsubj");
  static public Pattern P_PUNCT = Pattern.compile("^(hyph|punct)$");
 
/*  static public final String CONLL_ADV  = "ADV";
  static public final String CONLL_AMOD  = "AMOD";
  static public final String CONLL_APPO  = "APPO";
  static public final String CONLL_COORD  = "COORD";
  static public final String CONLL_CONJ  = "CONJ";
  static public final String CONLL_DEP  = "DEP";
  static public final String CONLL_DTV  = "DTV";
  static public final String CONLL_EXTR  = "EXTR";
  static public final String CONLL_IM    = "IM";
  static public final String CONLL_INTJ  = "INTJ";
  static public final String CONLL_LGS  = "LGS";
  static public final String CONLL_LOC  = "LOC";
  static public final String CONLL_META  = "META";
  static public final String CONLL_NMOD  = "NMOD";
  static public final String CONLL_OBJ  = "OBJ";
  static public final String CONLL_OPRD  = "OPRD";
  static public final String CONLL_P    = "P";
  static public final String CONLL_PMOD  = "PMOD";
  static public final String CONLL_PRD  = "PRD";
  static public final String CONLL_PRN  = "PRN";
  static public final String CONLL_PRT  = "PRT";
  static public final String CONLL_PUT  = "PUT";
  static public final String CONLL_QMOD  = "QMOD";
  static public final String CONLL_ROOT  = "ROOT";
  static public final String CONLL_SBJ  = "SBJ";
  static public final String CONLL_SUB  = "SUB";
  static public final String CONLL_VC    = "VC";
  static public final String CONLL_XCOMP  = "XCOMP";*/
 
  static public boolean isSubject(String label)
  {
    return P_SBJ.matcher(label).find();
  }
 
  static public boolean isObject(String label)
  {
    return P_OBJ.matcher(label).find();
  }
 
  static public boolean isAuxiliary(String label)
  {
    return P_AUX.matcher(label).find();
  }

  static public Deque<DEPNode> getPreviousConjuncts(DEPNode node)
  {
    Deque<DEPNode> deque = new ArrayDeque<DEPNode>();
   
    while (node.isLabel(DEPLibEn.DEP_CONJ))
    {
      node = node.getHead();
      deque.add(node);
    }
   
    return deque;
  }
 
  static public Deque<DEPNode> getNextConjuncts(DEPNode node)
  {
    Deque<DEPNode> deque = new ArrayDeque<DEPNode>();
   
    getNextConjunctsAux(node, deque);
    return deque;
  }
 
  static private void getNextConjunctsAux(DEPNode node, Deque<DEPNode> deque)
  {
    for (DEPNode dep : node.getDependentsByLabels(DEPLibEn.DEP_CONJ))
    {
      deque.add(dep);
      getNextConjunctsAux(dep, deque);
    }
  }
 
  static public boolean isSmallClause(DEPNode verb)
  {
    DEPNode sbj = verb.getFirstDependentByLabel(P_SBJ);
   
    if (sbj != null)
      return isSmallClauseAux(verb);
   
    return false;
  }
 
  static public boolean isSmallClauseAux(DEPNode verb)
  {
    DEPNode aux = verb.getFirstDependentByLabel(DEP_AUX);
   
    if (aux == nullreturn verb.isPos(CTLibEn.POS_VBG);
    else        return aux.isPos(CTLibEn.POS_TO);
  }
 
  static public List<List<DEPArc>> postLabel(DEPTree tree)
  {
    List<List<DEPArc>> argLists;
    int i, size = tree.size();
    List<DEPArc> argList;
    DEPNode node;
   
    tree.setDependents();
    argLists = tree.getArgumentList();
   
    for (i=1; i<size; i++)
    {
      node = tree.get(i);
      argList = argLists.get(i);
     
      if (node.isLabel(DEP_PREP))
      {
        relinkPreposition(node);
      }
      else if (MPLibEn.isVerb(node.pos))
      {
        labelReferentOfRelativeClause(node, argList);
       
        if (!relinkReferent(node))
          relabelPrepositionWithReferent(node);
      }
    }
   
    tree.resetDependents();
    return argLists;
  }
 
  /**
   * Called by {@link DEPLibEn#postLabel(DEPTree)}.
   * @param verb a dependency node whose dependency relation to its head is {@link DEPLibEn#DEP_PREP}.
   */
  static private void relinkPreposition(DEPNode prep)
  {
    DEPNode head = prep.getHead();
   
    if (head.isLabel(DEPLibEn.DEP_POBJ))
      head = head.getHead();
   
    if (MPLibEn.isNoun(head.pos) || head.isPos(CTLibEn.POS_IN) || head.isPos(CTLibEn.POS_RP))
    {
      DEPNode gHead = head.getHead();
      SRLArc  sp, sh;
     
      if (gHead != null && (sp = prep.getSHead(gHead)) != null)
      {
        if ((sh = head.getSHead(gHead)) != null)
        {
          if (head.isPos(CTLibEn.POS_IN) && sh.isLabel(SRLLib.C_V))
          {
            head.pos = CTLibEn.POS_RP;
            head.setLabel(DEP_PRT);
          }
         
          prep.setHead(gHead);
        }
        else
        {
          prep.removeSHead(sp);
          head.addSHead(gHead, sp.getLabel());
        }
      }
     
//      if (gHead != null && (sp = prep.getSHead(gHead)) != null && (sh = head.getSHead(gHead)) != null)
//        prep.setHead(gHead);
    }
  }
 
  /** Called by {@link DEPLibEn#postLabel(DEPTree)}. */
  static private void labelReferentOfRelativeClause(DEPNode verb, List<DEPArc> argList)
  {
    DEPNode top  = getTopVerbChain(verb);
    DEPNode head = top.getHead();
   
    if (top.isLabel(DEPLibEn.DEP_RCMOD) && !head.containsSHead(verb))
    {
      for (DEPArc arc : argList)
      {
        if (arc.isLabel(SRLLib.P_ARG_REF) && isReferentArgument(arc.getNode()))
        {
          head.addSHead(verb, SRLLib.getBaseLabel(arc.getLabel()));
          return;
        }
      }
    }
  }
 
  /** Called by {@link DEPLibEn#labelReferentOfRelativeClause(DEPNode, List)}. */
  static private DEPNode getTopVerbChain(DEPNode verb)
  {
    while (MPLibEn.isVerb(verb.getHead().pos) && (verb.isLabel(DEP_CONJ) || verb.isLabel(DEP_XCOMP)))
      verb = verb.getHead();
     
    return verb;
  }
 
  /** Called by {@link DEPLibEn#labelReferentOfRelativeClause(DEPNode, List)}. */
  static boolean isReferentArgument(DEPNode arg)
  {
    return arg.getFirstDependentByLabel(DEP_POBJ) != null || arg.isLemma("that") || arg.isLemma("which");
  }
 
  static public boolean containsRelativizer(DEPNode arg)
  {
    return containsRelativizerAux(arg, arg);
  }
 
  static private boolean containsRelativizerAux(DEPNode arg, DEPNode head)
  {
    DEPNode dep;
   
    for (DEPArc arc : arg.getDependents())
    {
      dep = arc.getNode();
     
      if (dep.pos.startsWith("W") || ((!MPLibEn.isVerb(dep.pos) || (arg == head && dep.isLabel(DEP_XCOMP))) && containsRelativizerAux(dep, head)))
        return true;
    }
   
    return false;
  }
 
  /** Called by {@link DEPLibEn#postLabel(DEPTree)}. */
  static private boolean relinkReferent(DEPNode verb)
  {
    Pair<DEPNode,SRLArc> c = getFirstRelativizer(verb);
    if (c == nullreturn false;
    Pair<DEPNode,SRLArc> p = getLastPrepositionWithoutDependent(verb);
    if (p == nullreturn false;
   
    DEPNode comp = c.o1;
   
    comp.setHead(p.o1, DEP_POBJ);
    comp.removeSHead(c.o2);
   
    if (p.o2 != nullSRLLib.toReferentArgument(p.o2);
    else        p.o1.addSHead(c.o2);
   
    return true;
  }
 
  /** Called by {@link DEPLibEn#relinkReferent(DEPNode)}. */
  static private Pair<DEPNode,SRLArc> getFirstRelativizer(DEPNode verb)
  {
    SRLArc  sHead;
    DEPNode dep;
   
    for (DEPArc arc : verb.getDependents())
    {
      dep = arc.getNode();
     
      if (dep.id > verb.id)
        return null;
     
      if ((sHead = dep.getSHead(verb, SRLLib.P_ARG_REF)) != null)
        return (dep.isPos(CTLibEn.POS_IN)) ? null : new Pair<DEPNode,SRLArc>(dep, sHead);
    }
   
    return null;
  }
 
  /** Called by {@link DEPLibEn#relinkReferent(DEPNode)}. */
  static private Pair<DEPNode,SRLArc> getLastPrepositionWithoutDependent(DEPNode verb)
  {
    List<DEPArc> arcs = verb.getDependents();
    DEPNode dep;
    DEPArc arc;
    SRLArc sHead;
    int i;
   
    for (i=arcs.size()-1; i>=0; i--)
    {
      arc = arcs.get(i);
      dep = arc.getNode();
     
      if (dep.id < verb.id)
        return null;
     
      if (dep.isPos(CTLibEn.POS_IN) && (sHead = dep.getSHead(verb)) != null)
        return dep.getDependents().isEmpty() ? new Pair<DEPNode,SRLArc>(dep, sHead) : null;
     
//      if (dep.isPos(CTLibEn.POS_IN))
//      {
//        if (!dep.getDependents().isEmpty())
//          return null;
//       
//        return new Pair<DEPNode,SRLArc>(dep, dep.getSHead(verb));
//      }
    }
   
    return null;
  }
 
  /** Called by {@link DEPLibEn#postLabel(DEPTree)}. */
  static private void relabelPrepositionWithReferent(DEPNode verb)
  {
    DEPNode dep, pobj;
    SRLArc sHead;
   
    for (DEPArc arc : verb.getDependents())
    {
      dep = arc.getNode();
     
      if (dep.isPos(CTLibEn.POS_IN) && (sHead = dep.getSHead(verb)) != null && !sHead.isLabel(SRLLib.P_ARG_REF))
      {
        pobj = dep.getFirstDependentByLabel(DEPLibEn.DEP_POBJ);
       
        if (pobj != null && getRefDependentNode(pobj) != null)
        {
          SRLLib.toReferentArgument(sHead);
          break;
        }
      }
    }
  }
 
  /** @return a relativizer node in the subtree of the specific node; otherwise, {@code null}. */
  static public DEPNode getRefDependentNode(DEPNode head)
  {
    if (head.isLabel(DEP_RCMOD) || head.isLabel(DEP_CCOMP) || (head.isLabel(DEP_XCOMP) && !isNonProjectiveXcomp(head)) || head.isLabel(DEP_DEP) || head.isLabel(DEP_CONJ))
      return null;
   
    if (isCommonRelativizer(head))
      return head;
   
    DEPNode dep, ref;
   
    for (DEPArc arc : head.getDependents())
    {
      dep = arc.getNode();
      ref = getRefDependentNode(dep);
      if (ref != nullreturn ref;
    }
   
    return null;
  }
 
  static private boolean isNonProjectiveXcomp(DEPNode node)
  {
    DEPNode head = node.getHead();
    if (head == null || head.id > node.id) return false;
   
    List<DEPNode> deps = node.getDependentNodeList();
    if (!deps.isEmpty() && deps.get(0).id < head.id) return true;
   
    return false;
  }
 
  static public boolean isCommonRelativizer(DEPNode node)
  {
    return node.pos.startsWith("W") && MPLibEn.RE_WH_COMMON.matcher(node.lemma).find();
  }
}
TOP

Related Classes of com.clearnlp.dependency.DEPLibEn

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.