Package com.clearnlp.conversion

Source Code of com.clearnlp.conversion.EnglishC2DConverter

/**
* Copyright (c) 2009/09-2012/08, Regents of the University of Colorado
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
*    list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright notice,
*    this list of conditions and the following disclaimer in the documentation
*    and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/**
* Copyright 2012/09-2013/04, 2013/11-Present, University of Massachusetts Amherst
* Copyright 2013/05-2013/10, IPSoft Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.clearnlp.conversion;

import java.util.ArrayDeque;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Deque;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.regex.Pattern;

import com.clearnlp.constituent.CTLib;
import com.clearnlp.constituent.CTLibEn;
import com.clearnlp.constituent.CTNode;
import com.clearnlp.constituent.CTTree;
import com.clearnlp.dependency.DEPArc;
import com.clearnlp.dependency.DEPFeat;
import com.clearnlp.dependency.DEPLib;
import com.clearnlp.dependency.DEPLibEn;
import com.clearnlp.dependency.DEPNode;
import com.clearnlp.dependency.DEPTree;
import com.clearnlp.dependency.srl.SRLArc;
import com.clearnlp.dependency.srl.SRLLib;
import com.clearnlp.headrule.HeadRule;
import com.clearnlp.headrule.HeadRuleMap;
import com.clearnlp.morphology.MPLibEn;
import com.clearnlp.util.UTArray;
import com.clearnlp.util.pair.Pair;
import com.clearnlp.util.pair.StringIntPair;
import com.google.common.collect.Lists;


/**
* Constituent to dependency converter for English.
* @since 1.0.0
* @author Jinho D. Choi ({@code jdchoi77@gmail.com})
*/
public class EnglishC2DConverter extends AbstractC2DConverter
{
  static final public byte TYPE_STANFORD = 0;
  private final int SIZE_HEAD_FLAGS = 4;
 
  private final String[] a_semTags = {CTLibEn.FTAG_BNF, CTLibEn.FTAG_DIR, CTLibEn.FTAG_EXT, CTLibEn.FTAG_LOC, CTLibEn.FTAG_MNR, CTLibEn.FTAG_PRP, CTLibEn.FTAG_TMP, CTLibEn.FTAG_VOC};
  private final String[] a_synTags = {CTLibEn.FTAG_ADV, CTLibEn.FTAG_CLF, CTLibEn.FTAG_CLR, CTLibEn.FTAG_DTV, CTLibEn.FTAG_NOM, CTLibEn.FTAG_PUT, CTLibEn.FTAG_PRD, CTLibEn.FTAG_TPC};
  private Set<String>    s_semTags;
  private Set<String>    s_synTags;
 
  private Map<CTNode,Deque<CTNode>> m_rnr;
  private Map<CTNode,Deque<CTNode>> m_xsbj;
  private Map<String,Pattern>       m_coord;
 
  private List<Pair<String,Set<String>>> l_mergeLabels;
 
  public EnglishC2DConverter(HeadRuleMap headrules, String mergeLabels)
  {
    super(headrules);
   
    initBasic();
    initCoord();
    initMerge(mergeLabels);
  }
 
  private void initBasic()
  {
    s_semTags = UTArray.toSet(a_semTags);
    s_synTags = UTArray.toSet(a_synTags);

    m_rnr   = new HashMap<CTNode,Deque<CTNode>>();
    m_xsbj  = new HashMap<CTNode,Deque<CTNode>>();
  }
 
  private void initCoord()
  {
    m_coord = new HashMap<String,Pattern>();
   
    m_coord.put(CTLibEn.PTAG_ADJP  , Pattern.compile("^(ADJP|JJ.*|VBN|VBG)$"));
    m_coord.put(CTLibEn.PTAG_ADVP  , Pattern.compile("^(ADVP|RB.*)$"));
    m_coord.put(CTLibEn.PTAG_INTJ  , Pattern.compile("^(INTJ|UH)$"));
    m_coord.put(CTLibEn.PTAG_PP    , Pattern.compile("^(PP|IN|VBG)$"));
    m_coord.put(CTLibEn.PTAG_PRT   , Pattern.compile("^(PRT|RP)$"));
    m_coord.put(CTLibEn.PTAG_NAC   , Pattern.compile("^(NP)$"));
    m_coord.put(CTLibEn.PTAG_NML   , Pattern.compile("^(NP|NML|NN.*|PRP)$"));
    m_coord.put(CTLibEn.PTAG_NP    , Pattern.compile("^(NP|NML|NN.*|PRP)$"));
    m_coord.put(CTLibEn.PTAG_NX    , Pattern.compile("^(NX)$"));
    m_coord.put(CTLibEn.PTAG_VP    , Pattern.compile("^(VP|VB.*)$"));
    m_coord.put(CTLibEn.PTAG_S     , Pattern.compile("^(S|SINV|SQ|SBARQ)$"));
    m_coord.put(CTLibEn.PTAG_SBAR  , Pattern.compile("^(SBAR.*)$"));
    m_coord.put(CTLibEn.PTAG_SBARQ  , Pattern.compile("^(SBAR.*)$"));
    m_coord.put(CTLibEn.PTAG_SINV  , Pattern.compile("^(S|SINV)$"));
    m_coord.put(CTLibEn.PTAG_SQ    , Pattern.compile("^(S|SQ|SBARQ)$"));
    m_coord.put(CTLibEn.PTAG_WHNP  , Pattern.compile("^(NN.*|WP)$"));
    m_coord.put(CTLibEn.PTAG_WHADJP  , Pattern.compile("^(JJ.*|VBN|VBG)$"));
    m_coord.put(CTLibEn.PTAG_WHADVP  , Pattern.compile("^(RB.*|WRB|IN)$"));
  }
 
  private void initMerge(String mergeLabels)
  {
    l_mergeLabels = new ArrayList<Pair<String,Set<String>>>();
   
    if (mergeLabels != null)
    {
      String[]    tmp;
      String      nLabel;
      Set<String> oLabels;
     
      for (String ms : mergeLabels.split("\\"+DEPFeat.DELIM_FEATS))
      {
        tmp     = ms.split(DEPFeat.DELIM_KEY_VALUE);
        nLabel  = tmp[0];
        oLabels = new HashSet<String>();
       
        for (String oLabel : tmp[1].split(DEPFeat.DELIM_VALUES))
          oLabels.add(oLabel);
           
        l_mergeLabels.add(new Pair<String,Set<String>>(nLabel, oLabels));
      }
    }
  }

  @Override
  public DEPTree toDEPTree(CTTree cTree)
  {
    clearMaps();
   
    if (!mapEmtpyCategories(cTree))  return null;
    setHeads(cTree.getRoot());
   
    return getDEPTree(cTree);
  }
 
  private void clearMaps()
  {
    m_rnr.clear();
    m_xsbj.clear();
  }
 
  // ============================= Map empty categories =============================
 
  /**
   * Removes, relocates empty categories in the specific tree.
   * Returns {@true} if the constituent tree contains nodes after relocating empty categories.
   * @param cTree the constituent tree to be processed.
   * @return {@true} if the constituent tree contains nodes after relocating empty categories.
   */
  public boolean mapEmtpyCategories(CTTree cTree)
  {
    for (CTNode node : cTree.getTerminals())
    {
      if (!node.isEmptyCategory())  continue;
      if (node.getParent() == nullcontinue;
     
      if      (node.form.startsWith(CTLibEn.EC_PRO))
        mapPRO(cTree, node);
      else if (node.form.startsWith(CTLibEn.EC_TRACE))
        mapTrace(cTree, node);
      else if (CTLibEn.RE_NULL.matcher(node.form).find())
        mapNull(cTree, node);
      else if (node.isForm("0"))
        continue;
      else if (CTLibEn.RE_ICH_PPA_RNR.matcher(node.form).find())
        mapICH(cTree, node);
    //  else if (node.form.startsWith(CTLibEn.EC_EXP))
    //    reloateEXP(cTree, node);
      else
        removeCTNode(node);
    }
   
    return cTree.getRoot().getChildrenSize() > 0;
  }
 
  /** Called by {@link EnglishC2DConverter#mapEmtpyCategories(CTTree)}. */
  private void mapPRO(CTTree cTree, CTNode ec)
  {
    CTNode np = ec.getParent();
    CTNode vp = np.getParent().getFirstChainedDescendant(CTLibEn.PTAG_VP);
   
    if (vp == null)                // small clauses
      relocatePRD(np, ec);
    else
    {
      CTNode ante;
     
      if ((ante = ec.getAntecedent()) != null && ante.pTag.startsWith("WH"))  // relative clauses
      {
        if (cTree.getCoIndexedEmptyCategories(ante.coIndex).size() == 1)
          mapTrace(cTree, ec);
      }
     
      addXSubject(ec, m_xsbj);
    }
  }
 
  /** Called by {@link EnglishC2DConverter#mapEmtpyCategories(CTTree)}. */
  private void mapTrace(CTTree cTree, CTNode ec)
  {
    CTNode ante = ec.getAntecedent();
   
    if (ante == null || ec.isDescendantOf(ante))
      removeCTNode(ec);
    else if (ante.hasFTag(CTLibEn.FTAG_TPC))
    {
      if (!ante.hasFTag(CTLibEn.FTAG_SBJ))
      {
        CTNode parent = ec.getParent();
        parent.removeChild(ec);
        replaceEC(parent, ante);
      }
      else
        removeCTNode(ec);
    }
    else  // relative clauses
    {
      CTNode parent = ante.getHighestChainedAncestor(CTLibEn.PTAG_SBAR);
      if (parent != null)    parent.addFTag(DEPLibEn.DEP_RCMOD);
      replaceEC(ec, ante);
    }
  }
 
  /** Called by {@link EnglishC2DConverter#mapEmtpyCategories(CTTree)}. */
  private void mapNull(CTTree cTree, CTNode ec)
  {
    CTNode np = ec.getParent();
   
    if (np.hasFTag(CTLibEn.FTAG_SBJ))
    {
      // small clauses
      if (np.getNextSibling(CTLibEn.PTAG_VP) == null)
        relocatePRD(np, ec);
      else
        addXSubject(ec, m_xsbj);
    }
  }
 
  /** Called by {@link EnglishC2DConverter#mapEmtpyCategories(CTTree)}. */
  private void mapICH(CTTree cTree, CTNode ec)
  {
    CTNode parent = ec.getParent();
    CTNode ante   = ec.getAntecedent();
   
    if (ec.form.startsWith(CTLibEn.EC_ICH) && parent.getPrevSibling("+WH.*") != null)
      removeCTNode(ec);
    else if (ante == null || ec.isDescendantOf(ante))
      removeCTNode(ec);
    else
    {
      List<CTNode> list = cTree.getCoIndexedEmptyCategories(ante.coIndex);
      boolean isRNR = ec.form.startsWith(CTLibEn.EC_RNR);
      int i, size = list.size();
      CTNode node;
     
      Deque<CTNode> dq = isRNR ? new ArrayDeque<CTNode>() : null;
     
      if (ec.getTerminalId() < ante.getFirstTerminal().getTerminalId())
      {   
        for (i=0; i<size-1; i++)
        {
          node = list.get(i);
          if (isRNRdq.addLast(node.getParent().getParent());
          removeCTNode(node);
        }
       
        ec = list.get(size-1);
      }
      else
      {
        for (i=size-1; i>0; i--)
        {
          node = list.get(i);
          if (isRNRdq.addFirst(node.getParent().getParent());
          removeCTNode(node);
        }
       
        ec = list.get(0);
      }
     
      if (isRNR && !dq.isEmpty())
        m_rnr.put(ante, dq);
     
      parent = ec.getParent();
      parent.removeChild(ec);
      replaceEC(parent, ante);
    }
  }
 
  /** Called by {@link EnglishC2DConverter#mapPRO(CTTree, CTNode)} and {@link EnglishC2DConverter#mapNull(CTTree, CTNode)}. */
  private void relocatePRD(CTNode np, CTNode ec)
  {
    CTNode s   = np.getParent();
    CTNode prd = s.getFirstChild("-"+CTLibEn.FTAG_PRD);
    Set<String> fTags = s.getFTags();
   
    if (prd != null && (fTags.isEmpty() || fTags.contains(CTLibEn.FTAG_CLR)))
    {
      fTags.clear();
      fTags.add(DEPLibEn.DEP_OPRD);
    }

    removeCTNode(ec);
  }
 
/*  private void reloateEXP(CTTree cTree, CTNode ec)
  {
    int idx = ec.form.lastIndexOf("-");
   
    if (idx != -1)
    {
      int coIndex = Integer.parseInt(ec.form.substring(idx+1));
      CTNode ante = cTree.getCoIndexedAntecedent(coIndex);
      if (ante != null)  ante.addFTag(DEPLibEn.CONLL_EXTR);
    }
   
    removeCTNode(ec);
  }*/
 
  /**
   * @param ec empty subject.
   * @param map key: antecedent, value: list of clauses containing empty subjects.
   */
  private void addXSubject(CTNode ec, Map<CTNode, Deque<CTNode>> map)
  {
    CTNode ante = ec.getAntecedent();
   
    while (ante != null && ante.isEmptyCategoryRec() && !ante.pTag.startsWith("WH"))
      ante = ante.getFirstTerminal().getAntecedent();
   
    if (ante != null)
    {
      CTNode s = ec.getNearestAncestor(CTLibEn.PTAG_S);
     
      if (s != null)
      {
        Deque<CTNode> dq = map.get(ante);
        if (dq == nulldq = new ArrayDeque<CTNode>();
       
        dq.add(s);
        map.put(ante, dq);
      }
    }
  }
 
  private void removeCTNode(CTNode node)
  {
    CTNode parent = node.getParent();
 
    if (parent != null)
    {
      parent.removeChild(node);
     
      if (parent.getChildrenSize() == 0)
        removeCTNode(parent);     
    }
  }
 
  private void replaceEC(CTNode ec, CTNode ante)
  {
    removeCTNode(ante);
    ec.getParent().setChild(ec.getSiblingId(), ante);
  }
 
  // ============================= Find heads =============================
 
  @Override
  protected void setHeadsAux(HeadRule rule, CTNode curr)
  {
    if (findHeadsCoordination(rule, curr))  return;
   
    findHyphens(curr);
    findHeadsApposition(curr);
    findHeadsSmallClause(curr);

    CTNode head = getHead(rule, curr.getChildren(), SIZE_HEAD_FLAGS);
    if (head.c2d.getLabel() != nullhead.c2d.setLabel(null);
    curr.c2d = new C2DInfo(head);
  }
 
 
  /**
   * If the specific node contains a coordination structure, find the head of each coordination.
   * @param curr the specific node to be compared.
   * @return {@code true} if this node contains a coordination structure.
   */
  private boolean findHeadsCoordination(HeadRule rule, CTNode curr)
  {
    // skip pre-conjunctions and punctuation
    int i, sId, size = curr.getChildrenSize();
    CTNode node;
   
    for (sId=0; sId<size; sId++)
    {
      node = curr.getChild(sId);
     
      if (!CTLibEn.isPunctuation(node) && !CTLibEn.isConjunction(node))
        break;
    }
   
    if (!CTLibEn.containsCoordination(curr, curr.getChildren(sId)))
      return false;
   
    // find conjuncts
    Pattern rTags = getConjunctPattern(curr, sId, size);
    CTNode prevHead = null, mainHead = null;
    boolean isFound = false;
    int bId = 0, eId = sId;
   
    for (; eId<size; eId++)
    {
      node = curr.getChild(eId);
     
      if (CTLibEn.isCoordinator(node))
      {
        if (isFound)
        {
          prevHead = findHeadsCoordinationAux(rule, curr, bId, eId, prevHead);
          setHeadCoord(node, prevHead, getDEPLabel(node, curr, prevHead));
          if (mainHead == nullmainHead = prevHead;
          isFound = false;
     
          bId = eId + 1;
        }
        else if (prevHead != null)
        {
          for (i=bId; i<=eId; i++)
          {
            node = curr.getChild(i);
            setHeadCoord(node, prevHead, getDEPLabel(node, curr, prevHead));
          }
         
          bId = eId + 1;
        }
      }
      else if (isConjunct(node, curr, rTags))
        isFound = true;
    }
   
    if (mainHead == nullreturn false;
   
    if (eId - bId > 0)
      findHeadsCoordinationAux(rule, curr, bId, eId, prevHead);
   
    curr.c2d = new C2DInfo(mainHead);
    return true;
  }
 
  /** Called by {@link EnglishC2DConverter#findHeadsCoordination(HeadRule, CTNode)}. */
  private Pattern getConjunctPattern(CTNode curr, int sId, int size)
  {
    Pattern rTags = m_coord.get(curr.pTag);
   
    if (rTags != null)
    {
      boolean b = false;
      int i;
     
      for (i=sId; i<size; i++)
      {
        if (rTags.matcher(curr.getChild(i).pTag).find())
        {
          b = true;
          break;
        }
      }
     
      if (!brTags = Pattern.compile(".*");
    }
    else
      rTags = Pattern.compile(".*");
   
    return rTags;
  }
 
  /** Called by {@link EnglishC2DConverter#findHeadsCoordination(HeadRule, CTNode)}. */
  private boolean isConjunct(CTNode C, CTNode P, Pattern rTags)
  {
    if (P.isPTag(CTLibEn.PTAG_SBAR) && C.isPTagAny(CTLibEn.POS_IN, CTLibEn.POS_DT))
      return false;
    else if (rTags.pattern().equals(".*"))
      return getSpecialLabel(C) == null;
    else if (rTags.matcher(C.pTag).find())
    {
      if (P.isPTag(CTLibEn.PTAG_VP) && getAuxLabel(C) != null)
        return false;
     
      if (P.isPTagAny(CTLibEn.PTAG_S, CTLibEn.PTAG_SQ, CTLibEn.PTAG_SINV) && C.isPTag(CTLibEn.PTAG_S) && hasAdverbialTag(C))
        return false;
     
      return true;
    }
    else if (P.isPTagAny(CTLibEn.PTAG_NP))
    {
      return C.hasFTag(CTLibEn.FTAG_NOM);
    }
   
    return false;
  }
 
  /** Called by {@link EnglishC2DConverter#findHeadsCoordination(HeadRule, CTNode)}. */
  private CTNode findHeadsCoordinationAux(HeadRule rule, CTNode curr, int bId, int eId, CTNode lastHead)
  {
    CTNode currHead = (eId - bId == 1) ? curr.getChild(bId) : getHead(rule, curr.getChildren(bId, eId), SIZE_HEAD_FLAGS);
   
    if (lastHead != null)
    {
      String label = DEPLibEn.DEP_CONJ;
     
      if (isIntj(currHead))            label = DEPLibEn.DEP_INTJ;
      else if (CTLibEn.isPunctuation(currHead))  label = DEPLibEn.DEP_PUNCT;

      setHeadCoord(currHead, lastHead, label);
    }
   
    return currHead;
  }
 
  private void setHeadCoord(CTNode node, CTNode head, String label)
  {
    if (head.isPhrase())
      node.c2d.setHead(head, label);
    else
      node.c2d.setHeadTerminal(head, label);
  }
 
  private boolean findHyphens(CTNode node)
  {
    int i, size = node.getChildrenSize();
    CTNode prev, hyph, next;
    boolean isFound = false;
    boolean isVP = node.isPTag(CTLibEn.PTAG_VP);
   
    for (i=0; i<size-2; i++)
    {
      prev = node.getChild(i);
      hyph = node.getChild(i+1);
      next = node.getChild(i+2);
     
      if (hyph.isPTag(CTLibEn.POS_HYPH))
      {
        if (isVP)
        {
          prev.c2d.setLabel(DEPLibEn.DEP_HMOD);
          hyph.c2d.setLabel(DEPLibEn.DEP_HYPH);
          next.c2d.setLabel(DEPLibEn.DEP_HMOD);
        }
        else
        {
          prev.c2d.setHead(next, DEPLibEn.DEP_HMOD);
          hyph.c2d.setHead(next, DEPLibEn.DEP_HYPH);
        }
       
        isFound = true;
        i++;
      }
    }
   
    return isFound;
  }
 
 
  /**
   * Finds the head of appositional modifiers.
   * @param curr the constituent node to be processed.
   * @return {@code true} if the specific node contains appositional modifiers.
   */
  private boolean findHeadsApposition(CTNode curr)
  {
    if (!curr.isPTagAny(CTLibEn.PTAG_NP, CTLibEn.PTAG_NML) || curr.containsTags("+NN.*"))
      return false;
   
    CTNode fst = curr.getFirstChild("+NP|NML"), snd;
    while (fst != null && fst.containsTags(CTLibEn.POS_POS))
      fst = fst.getNextSibling("+NP|NML");
   
    if (fst == null || fst.c2d.hasHead())  return false;
   
    int i, size = curr.getChildrenSize();
    boolean hasAppo = false;
   
    for (i=fst.getSiblingId()+1; i<size; i++)
    {
      snd = curr.getChild(i);
      if (snd.c2d.hasHead())  continue;
     
      if ((snd.isPTagAny(CTLibEn.PTAG_NP, CTLibEn.PTAG_NML) && !hasAdverbialTag(snd)) ||
        (snd.hasFTagAny(CTLibEn.FTAG_HLN, CTLibEn.FTAG_TTL)) ||
        (snd.isPTag(CTLibEn.PTAG_RRC) && snd.containsTags(CTLibEn.PTAG_NP, "-"+CTLibEn.FTAG_PRD)))
      {
        snd.c2d.setHead(fst, DEPLibEn.DEP_APPOS);
        hasAppo = true;
      }
    }
   
    return hasAppo;
  }

  private boolean findHeadsSmallClause(CTNode node)
  {
    CTNode parent = node.getParent();
   
    if (node.isPTag(CTLibEn.PTAG_S) && !node.containsTags(CTLibEn.PTAG_VP))
    {
      CTNode sbj = node.getFirstChild("-"+CTLibEn.FTAG_SBJ);
      CTNode prd = node.getFirstChild("-"+CTLibEn.FTAG_PRD);
     
      if (sbj != null && prd != null)
      {
        if (parent.isPTag(CTLibEn.PTAG_SQ))
        {
          CTNode vb = parent.getFirstChild("+VB.*");
         
          if (vb != null)
          {
            sbj.c2d.setHead(vb, getDEPLabel(sbj, parent, vb));
            node.pTag = prd.pTag;
            node.addFTag(CTLibEn.FTAG_PRD);
          }
        }
       
        return true;
      }
    }
   
    return false;
  }
 
  @Override
  protected int getHeadFlag(CTNode child)
  {
    if (child.c2d.hasHead())
      return -1;
   
    if (hasAdverbialTag(child))
      return 1;
   
    if (isMeta(child))
      return 2;
   
    if (child.isEmptyCategoryRec() || CTLibEn.isPunctuation(child))
      return 3;
   
    return 0;
  }
 
  // ============================= Get Stanford labels =============================
 
  @Override
  protected String getDEPLabel(CTNode C, CTNode P, CTNode p)
  {
    CTNode c = C.c2d.getPhraseHead();
    CTNode d = C.c2d.getDependencyHead();
    String label;
   
    // function tags
    if (hasAdverbialTag(C))
    {
      if (C.isPTagAny(CTLibEn.PTAG_S, CTLibEn.PTAG_SBAR, CTLibEn.PTAG_SINV))
        return DEPLibEn.DEP_ADVCL;
     
      if (C.isPTagAny(CTLibEn.PTAG_NML, CTLibEn.PTAG_NP, CTLibEn.PTAG_QP))
        return DEPLibEn.DEP_NPADVMOD;
    }
   
    if ((label = getSubjectLabel(C, d)) != null)
      return label;
   
    // coordination
    if (C.isPTag(CTLibEn.PTAG_UCP))
    {
      c.addFTags(C.getFTags());
      return getDEPLabel(c, P, p);
    }
   
    // complements
    if (P.isPTagAny(CTLibEn.PTAG_VP, CTLibEn.PTAG_SINV, CTLibEn.PTAG_SQ))
    {
      if (isAcomp(C))  return DEPLibEn.DEP_ACOMP;
      if ((label = getObjectLabel(C)) != nullreturn label;
      if (isOprd(C))  return DEPLibEn.DEP_OPRD;
      if (isXcomp(C))  return DEPLibEn.DEP_XCOMP;
      if (isCcomp(C))  return DEPLibEn.DEP_CCOMP;
      if ((label = getAuxLabel(C)) != null)    return label;
    }
   
    if (P.isPTagAny(CTLibEn.PTAG_ADJP, CTLibEn.PTAG_ADVP))
    {
      if (isXcomp(C))  return DEPLibEn.DEP_XCOMP;
      if (isCcomp(C))  return DEPLibEn.DEP_CCOMP;
    }
   
    if (P.isPTagAny(CTLibEn.PTAG_NML, CTLibEn.PTAG_NP, CTLibEn.PTAG_WHNP))
    {
      if (isNfmod(C))  return isInfMod(C) ? DEPLibEn.DEP_INFMOD : DEPLibEn.DEP_PARTMOD;
      if (isRcmod(C))  return DEPLibEn.DEP_RCMOD;
      if (isCcomp(C))  return DEPLibEn.DEP_CCOMP;
    }
   
    if (isPoss(C, P))
      return DEPLibEn.DEP_POSS;
   
    // simple labels
    if ((label = getSimpleLabel(C)) != null)
      return label;
     
    // default
    if (P.isPTagAny(CTLibEn.PTAG_PP, CTLibEn.PTAG_WHPP))
    {
      if (p.getParent() == C.getParent())  // p and C are siblings
      {
        if (p.getSiblingId() < C.getSiblingId())
          return getPmodLabel(C, d);
      }
      else                // UCP
      {
        if (p.getFirstTerminal().getTerminalId() < C.getFirstTerminal().getTerminalId())
          return getPmodLabel(C, d);
      }
    }
   
    if (C.isPTag(CTLibEn.PTAG_SBAR) || isXcomp(C) || (P.isPTag(CTLibEn.PTAG_PP) && CTLibEn.isClause(C)))
      return DEPLibEn.DEP_ADVCL;
   
    if (C.isPTagAny(CTLibEn.PTAG_S, CTLibEn.PTAG_SINV, CTLibEn.PTAG_SQ, CTLibEn.PTAG_SBARQ))
      return DEPLibEn.DEP_CCOMP;
   
    if (P.isPTag(CTLibEn.PTAG_QP))
    {
      if (C.isPTagAny(CTLibEn.POS_CD))
        return DEPLibEn.DEP_NUMBER;
      else
        return DEPLibEn.DEP_QUANTMOD;
    }
   
    if (P.isPTagAny(CTLibEn.PTAG_NML, CTLibEn.PTAG_NP, CTLibEn.PTAG_NX, CTLibEn.PTAG_WHNP) || CTLibEn.isNoun(p))
      return getNmodLabel(C);
   
    if (c != null)
    {
      if ((label = getSimpleLabel(c)) != null)
        return label;
     
      if (d.isPTag(CTLibEn.POS_IN))
        return DEPLibEn.DEP_PREP;
     
      if (CTLibEn.isAdverb(d))
        return DEPLibEn.DEP_ADVMOD;
    }
   
    if ((P.isPTagAny(CTLibEn.PTAG_ADJP, CTLibEn.PTAG_ADVP, CTLibEn.PTAG_PP) || CTLibEn.isAdjective(p) || CTLibEn.isAdverb(p)))
    {
      if (C.isPTagAny(CTLibEn.PTAG_NML, CTLibEn.PTAG_NP, CTLibEn.PTAG_QP) || CTLibEn.isNoun(C))
        return DEPLibEn.DEP_NPADVMOD;
     
      return DEPLibEn.DEP_ADVMOD;
    }
   
    if (d.c2d != null && (label = d.c2d.getLabel()) != null)
      return label;
   
    return DEPLibEn.DEP_DEP;
  }
 
  private boolean hasAdverbialTag(CTNode node)
  {
    return node.hasFTag(CTLibEn.FTAG_ADV) ||  node.hasFTagAny(a_semTags);
  }
 
  private String getObjectLabel(CTNode node)
  {
    if (node.isPTagAny(CTLibEn.PTAG_NP, CTLibEn.PTAG_NML))
    {
      if (node.hasFTag(CTLibEn.FTAG_PRD))
        return DEPLibEn.DEP_ATTR;
      else
        return DEPLibEn.DEP_DOBJ;
    }
   
    return null;
  }
 
  private String getSubjectLabel(CTNode C, CTNode d)
  {
    if (C.hasFTag(CTLibEn.FTAG_SBJ))
    {
      if (CTLibEn.isClause(C))
        return DEPLibEn.DEP_CSUBJ;
      else if (d.isPTag(CTLibEn.POS_EX))
        return DEPLibEn.DEP_EXPL;
      else
        return DEPLibEn.DEP_NSUBJ;
    }
    else if (C.hasFTag(CTLibEn.FTAG_LGS))
      return DEPLibEn.DEP_AGENT;
   
    return null;
  }
 
  private String getSimpleLabel(CTNode C)
  {
    String label;
   
    if (isHyph(C))
      return DEPLibEn.DEP_HYPH;
   
    if (isAmod(C))
      return DEPLibEn.DEP_AMOD;
   
    if (C.isPTagAny(CTLibEn.PTAG_PP, CTLibEn.PTAG_WHPP))
      return DEPLibEn.DEP_PREP;
   
    if (CTLibEn.isCorrelativeConjunction(C))
      return DEPLibEn.DEP_PRECONJ;
   
    if (CTLibEn.isConjunction(C))
      return DEPLibEn.DEP_CC;
   
    if (isPrt(C))
      return DEPLibEn.DEP_PRT;

    if ((label = getSpecialLabel(C)) != null)
      return label;
   
    return null;
  }
 
  private String getSpecialLabel(CTNode C)
  {
    CTNode d = C.c2d.getDependencyHead();
   
    if (CTLibEn.isPunctuation(C) || CTLibEn.isPunctuation(d))
      return DEPLibEn.DEP_PUNCT;
   
    if (isIntj(C) || isIntj(d))
      return DEPLibEn.DEP_INTJ;
   
    if (isMeta(C))
      return DEPLibEn.DEP_META;
   
    if (isPrn(C))
      return DEPLibEn.DEP_PARATAXIS;

    if (isAdv(C))
      return DEPLibEn.DEP_ADVMOD;
   
    return null;
  }
 
  private String getAuxLabel(CTNode node)
  {
    if (node.isPTagAny(CTLibEn.POS_MD, CTLibEn.POS_TO))
      return DEPLibEn.DEP_AUX;

    CTNode vp;
   
    if (CTLibEn.isVerb(node) && (vp = node.getNextSibling(CTLibEn.PTAG_VP)) != null)
    {
      if ((MPLibEn.isBe(node.form) || MPLibEn.isBecome(node.form) || MPLibEn.isGet(node.form)))
      {
        if (vp.containsTags("+VBN|VBD"))
          return DEPLibEn.DEP_AUXPASS;
       
        if (!vp.containsTags("+VB.*") && (vp = vp.getFirstChild(CTLibEn.PTAG_VP)) != null && vp.containsTags("+VBN|VBD"))
          return DEPLibEn.DEP_AUXPASS;
      }
     
      return DEPLibEn.DEP_AUX;
    }
   
    return null;
  }
 
  private String getNmodLabel(CTNode node)
  {
    if (node.isPTagAny(CTLibEn.POS_VBG, CTLibEn.POS_VBN))
      return DEPLibEn.DEP_AMOD;
   
    if (node.isPTagAny(CTLibEn.POS_DT, CTLibEn.POS_WDT, CTLibEn.POS_WP))
      return DEPLibEn.DEP_DET;
   
    if (node.isPTagAny(CTLibEn.PTAG_NML, CTLibEn.PTAG_NP, CTLibEn.POS_FW) || node.pTag.startsWith(CTLibEn.POS_NN))
      return DEPLibEn.DEP_NN;
   
    if (node.isPTagAny(CTLibEn.POS_CD, CTLibEn.PTAG_QP))
      return DEPLibEn.DEP_NUM;

    if (node.isPTag(CTLibEn.POS_POS))
      return DEPLibEn.DEP_POSSESSIVE;
   
    if (node.isPTag(CTLibEn.POS_PDT))
      return DEPLibEn.DEP_PREDET;
   
    return DEPLibEn.DEP_NMOD;
  }
 
  private String getPmodLabel(CTNode C, CTNode d)
  {
    if (C.isPTagAny(CTLibEn.PTAG_NP, CTLibEn.PTAG_NML) || CTLibEn.RE_COMP_POS.matcher(d.pTag).find())
      return DEPLibEn.DEP_POBJ;
    else
      return DEPLibEn.DEP_PCOMP; 
  }
 
  private boolean isHyph(CTNode node)
  {
    return node.isPTag(CTLibEn.POS_HYPH);
  }
 
  private boolean isAmod(CTNode node)
  {
    return node.isPTagAny(CTLibEn.PTAG_ADJP, CTLibEn.PTAG_WHADJP) || CTLibEn.isAdjective(node);
  }
 
  private boolean isAdv(CTNode C)
  {
    if (C.isPTag(CTLibEn.PTAG_ADVP) || CTLibEn.isAdverb(C))
    {
      CTNode P = C.getParent();
      int id = C.getSiblingId();
     
      if (P.isPTagAny(CTLibEn.PTAG_PP, CTLibEn.PTAG_WHPP) && id+1 == P.getChildrenSize() && P.getChild(id-1).isPTagAny(CTLibEn.POS_IN, CTLibEn.POS_TO))
        return false;

      return true;
    }
   
    return false;
  }
 
  private boolean isIntj(CTNode node)
  {
    return node.isPTagAny(CTLibEn.PTAG_INTJ, CTLibEn.POS_UH);
  }
 
  private boolean isMeta(CTNode node)
  {
    return node.isPTagAny(CTLibEn.PTAG_EDITED, CTLibEn.PTAG_EMBED, CTLibEn.PTAG_LST, CTLibEn.PTAG_META, CTLibEn.POS_CODE, CTLibEn.PTAG_CAPTION, CTLibEn.PTAG_CIT, CTLibEn.PTAG_HEADING, CTLibEn.PTAG_TITLE);
  }
 
  private boolean isPrn(CTNode node)
  {
    return node.isPTag(CTLibEn.PTAG_PRN);
  }
 
  private boolean isPrt(CTNode curr)
  {
    return curr.isPTagAny(CTLibEn.PTAG_PRT, CTLibEn.POS_RP);
  }
 
  private boolean isAcomp(CTNode node)
  {
    return node.isPTag(CTLibEn.PTAG_ADJP);
  }
 
  private boolean isOprd(CTNode curr)
  {
    if (curr.hasFTag(DEPLibEn.DEP_OPRD))
      return true;
   
    if (curr.isPTag(CTLibEn.PTAG_S) && !curr.containsTags(CTLibEn.PTAG_VP) && curr.containsTags("-"+CTLibEn.FTAG_PRD))
    {
      CTNode sbj = curr.getFirstChild("-"+CTLibEn.FTAG_SBJ);
      return sbj != null && sbj.isEmptyCategoryRec();
    }
   
    return false;
  }
 
  private boolean isPoss(CTNode curr, CTNode parent)
  {
    if (curr.isPTagAny(CTLibEn.POS_PRPS, CTLibEn.POS_WPS))
      return true;
   
    if (parent.isPTagAny(CTLibEn.PTAG_NML, CTLibEn.PTAG_NP, CTLibEn.PTAG_WHNP, CTLibEn.PTAG_QP, CTLibEn.PTAG_ADJP))
      return curr.containsTags(CTLibEn.POS_POS);
   
    return false;
  }
 
  private boolean isXcomp(CTNode node)
  {
    if (node.isPTag(CTLibEn.PTAG_S))
    {
      CTNode sbj = node.getFirstChild("-"+CTLibEn.FTAG_SBJ);
     
      if (node.containsTags(CTLibEn.PTAG_VP) && (sbj == null || sbj.isEmptyCategoryRec()))
        return true;
    }
    else if (node.hasFTag(DEPLibEn.DEP_RCMOD))
    {
      CTNode s = node.getFirstChild(CTLibEn.PTAG_S);
      if (s != nullreturn isXcomp(s);
    }

    return false;
  }
 
  private boolean isCcomp(CTNode node)
  {
    if (node.isPTagAny(CTLibEn.PTAG_S, CTLibEn.PTAG_SQ, CTLibEn.PTAG_SINV, CTLibEn.PTAG_SBARQ))
      return true;
   
    if (node.isPTag(CTLibEn.PTAG_SBAR))
    {
      CTNode comp;
     
      if ((comp = node.getFirstChild(CTLib.POS_NONE)) != null && comp.isForm("0"))
        return true;
     
      if ((comp = node.getFirstChild("+IN|DT")) != null)
      {
        if (comp.form.equalsIgnoreCase("that") || comp.form.equalsIgnoreCase("if") || comp.form.equalsIgnoreCase("whether"))
        {
          comp.c2d.setLabel(DEPLibEn.DEP_COMPLM);
          return true;
        }
      }
     
      if (node.hasFTag(DEPLibEn.DEP_RCMOD) || node.containsTags("+WH.*"))
        return true;
    }
   
    return false;
  }
 
  private boolean isNfmod(CTNode curr)
  {
    return isXcomp(curr) || curr.isPTag(CTLibEn.PTAG_VP);
  }
 
  private boolean isInfMod(CTNode curr)
  {
    CTNode vp = curr.isPTag(CTLibEn.PTAG_VP) ? curr : curr.getFirstDescendant(CTLibEn.PTAG_VP);
   
    if (vp != null)
    {
      CTNode vc = vp.getFirstChild(CTLibEn.PTAG_VP);
     
      while (vc != null)
      {
        vp = vc;
       
        if (vp.getPrevSibling(CTLibEn.POS_TO) != null)
          return true;
       
        vc = vp.getFirstChild(CTLibEn.PTAG_VP);
      }
     
      return vp.containsTags(CTLibEn.POS_TO);
    }
   
    return false;
  }
 
  private boolean isRcmod(CTNode curr)
  {
    return curr.isPTag(CTLibEn.PTAG_RRC) || curr.hasFTag(DEPLibEn.DEP_RCMOD) || (curr.isPTag(CTLibEn.PTAG_SBAR) && curr.containsTags("+WH.*"));
  }
 
  // ============================= Get a dependency tree =============================
 
  private DEPTree getDEPTree(CTTree cTree)
  {
    DEPTree dTree = initDEPTree(cTree);
    addDEPHeads(dTree, cTree);
   
    if (dTree.containsCycle())
      System.err.println("Error: cyclic dependencies exist");
   
    splitLabels(dTree);
    addXHeads(dTree);
    addFeats(dTree, cTree, cTree.getRoot());
    addPBArgs(dTree, cTree);
    mergeLabels(dTree);
   
    return dTree;
  }
 
  private void mergeLabels(DEPTree dTree)
  {
    int i, size = dTree.size();
    DEPNode node;
   
    for (Pair<String,Set<String>> p : l_mergeLabels)
    {
      for (i=1; i<size; i++)
      {
        node = dTree.get(i);
       
        if (p.o2.contains(node.getLabel()))
          node.setLabel(p.o1);
      }
    }
  }
 
  /** Adds dependency heads. */
  private void addDEPHeads(DEPTree dTree, CTTree cTree)
  {
    int currId, headId, size = dTree.size(), rootCount = 0;
    CTNode cNode, ante;
    DEPNode dNode;
    String label;
   
    for (currId=1; currId<size; currId++)
    {
      dNode  = dTree.get(currId);
      cNode  = cTree.getToken(currId-1);
      headId = cNode.c2d.d_head.getTokenId() + 1;
     
      if (currId == headId// root
      {
        dNode.setHead(dTree.get(DEPLib.ROOT_ID), DEPLibEn.DEP_ROOT);
        rootCount++;
      }
      else
      {
        label = cNode.c2d.s_label;
       
        if (cNode.isPTagAny(CTLibEn.POS_IN, CTLibEn.POS_TO, CTLibEn.POS_DT) && cNode.getParent().isPTag(CTLibEn.PTAG_SBAR) && !label.equals(DEPLibEn.DEP_COMPLM))
          label = DEPLibEn.DEP_MARK;
       
        dNode.setHead(dTree.get(headId), label);
      }
     
      if ((ante = cNode.getAntecedent()) != null)
        dNode.addXHead(getDEPNode(dTree, ante), DEPLibEn.DEP_REF);
    }
   
    if (rootCount > 1System.err.println("Warning: multiple roots exist");
  }
 
  /** Splits certain Stanford dependency labels into finer-grained labels. */
  private void splitLabels(DEPTree tree)
  {
    int i, size = tree.size();
    List<DEPNode> list;
    DEPNode node;

    tree.setDependents();
   
    for (i=1; i<size; i++)
    {
      node = tree.get(i);
     
      if (node.isLabel(DEPLibEn.DEP_ADVMOD))
      {
        if (MPLibEn.RE_NEG.matcher(node.form.toLowerCase()).find())
          node.setLabel(DEPLibEn.DEP_NEG);
      }
     
      if (node.containsDependent(DEPLibEn.DEP_AUXPASS))
      {
        for (DEPNode child : node.getDependentsByLabels(DEPLibEn.DEP_CSUBJ, DEPLibEn.DEP_NSUBJ))
          child.setLabel(child.getLabel()+DEPLibEn.DEP_PASS);
      }
     
      if ((list = node.getDependentsByLabels(DEPLibEn.DEP_DOBJ)).size() > 1)
        list.get(0).setLabel(DEPLibEn.DEP_IOBJ);
    }
  }
 
  /** Adds secondary dependency heads. */
  private void addXHeads(DEPTree dTree)
  {
    for (CTNode curr : m_xsbj.keySet())
    {
      if (curr.c2d != null)
        addXHeadsAux(dTree, curr, m_xsbj.get(curr), DEPLibEn.DEP_XSUBJ);
    }
   
    for (CTNode curr : m_rnr.keySet())
    {
      if (curr.getParent() == null)
        continue;
     
      if (curr.getParent().c2d.getPhraseHead() != curr)
        addXHeadsAux(dTree, curr, m_rnr.get(curr), DEPLibEn.DEP_RNR);
      else
        addXChildren(dTree, curr, m_rnr.get(curr), DEPLibEn.DEP_RNR);
    }
  }
 
  /** Called by {@link EnglishC2DConverter#addDEPHeads(DEPTree, CTTree)} */
  private void addXHeadsAux(DEPTree dTree, CTNode cNode, Deque<CTNode> dq, String label)
  {
    DEPNode node = getDEPNode(dTree, cNode);
    DEPNode head;
   
    for (CTNode cHead : dq)
    {
      head = getDEPNode(dTree, cHead);
      node.addXHead(head, label);
     
      if (label.equals(DEPLibEn.DEP_XSUBJ) && head.isLabel(DEPLibEn.DEP_CCOMP))
        head.setLabel(DEPLibEn.DEP_XCOMP);
    }
  }
 
  /** {@link EnglishC2DConverter#addDEPHeads(DEPTree, CTTree)} */
  private void addXChildren(DEPTree dTree, CTNode cHead, Deque<CTNode> dq, String label)
  {
    DEPNode head = getDEPNode(dTree, cHead);
    DEPNode node;
   
    for (CTNode cNode : dq)
    {
      node = getDEPNode(dTree, cNode);
      node.addXHead(head, label);     
    }
  }
 
  /** Add extra features. */
  private void addFeats(DEPTree dTree, CTTree cTree, CTNode cNode)
  {
    CTNode ante;
    String feat;
   
    if (cNode.gapIndex != -1 && cNode.getParent().gapIndex == -1 && (ante = cTree.getCoIndexedAntecedent(cNode.gapIndex)) != null)
    {
      DEPNode dNode = getDEPNode(dTree, cNode);
      dNode.addXHead(getDEPNode(dTree, ante), DEPLibEn.DEP_GAP);
    }
   
    if ((feat = getFunctionTags(cNode, s_semTags)) != null)
      cNode.c2d.putFeat(DEPLibEn.FEAT_SEM, feat);
   
    if ((feat = getFunctionTags(cNode, s_synTags)) != null)
      cNode.c2d.putFeat(DEPLibEn.FEAT_SYN, feat);

    for (CTNode child : cNode.getChildren())
      addFeats(dTree, cTree, child);
  }
 
  /** Called by {@link EnglishC2DConverter#addFeats(DEPTree, CTTree, CTNode)}. */
  private String getFunctionTags(CTNode node, Set<String> sTags)
  {
    List<String> tags = new ArrayList<String>();
   
    for (String tag : node.getFTags())
    {
      if (sTags.contains(tag))
        tags.add(tag);
    }
   
    if (tags.isEmpty())  return null;
    Collections.sort(tags);

    StringBuilder build = new StringBuilder();
   
    for (String tag : tags)
    {
      build.append(DEPFeat.DELIM_VALUES);
      build.append(tag);
    }
   
    return build.substring(DEPFeat.DELIM_VALUES.length());
  }
 
  // ============================= Add PropBank arguments =============================
 
  private void addPBArgs(DEPTree dTree, CTTree cTree)
  {
    CTNode root = cTree.getRoot();
    dTree.initSHeads();
   
    if (root.pbArgs != null)
    {
      initPBArgs(dTree, cTree, root);
      arrangePBArgs(dTree);
      relabelArgNs(dTree);
    }
  }
 
  private void initPBArgs(DEPTree dTree, CTTree cTree, CTNode cNode)
  {
    if (!cNode.isPTag(CTLib.PTAG_TOP))
    {
      DEPNode dNode, sHead;
     
      if (cNode.isPhrase())
        dNode = getDEPNode(dTree, cNode);
      else
        dNode = dTree.get(cNode.getTokenId()+1);
     
      for (StringIntPair p : cNode.pbArgs)
      {
        sHead = dTree.get(p.i);
       
        if (isRefArgument(cNode))
          p.s = "R-"+p.s;
       
        if (!dNode.containsSHead(sHead) && dNode != sHead)
          dNode.addSHead(sHead, p.s);
      }
    }
   
    for (CTNode child : cNode.getChildren())
      initPBArgs(dTree, cTree, child);
  }
 
  private boolean isRefArgument(CTNode cNode)
  {
    if (CTLibEn.isRelPhrase(cNode))
      return true;
   
    if (cNode.isPTag(CTLibEn.PTAG_PP) && containsRefArgument(cNode))
      return true;

    return false;
  }
 
  private boolean containsRefArgument(CTNode cNode)
  {
    for (CTNode child : cNode.getChildren())
    {
      if (child.isPTagAny(CTLibEn.PTAG_ADJP, CTLibEn.PTAG_ADVP, CTLibEn.PTAG_NP, CTLibEn.PTAG_PP))
      {
        for (CTNode gc : child.getChildren())
        {
          if (!gc.isEmptyCategoryRec() && CTLibEn.isRelPhrase(gc))
            return true;   
        }
      }
    }
   
    return false;
  }
 
  private void arrangePBArgs(DEPTree dTree)
  {
    int i, size = dTree.size();
    List<SRLArc> remove;
    DEPNode node, head;
    String label;
   
    for (i=1; i<size; i++)
    {
      node   = dTree.get(i);
      remove = Lists.newArrayList();
     
      for (SRLArc arc : node.getSHeads())
      {
        head  = arc.getNode();
        label = arc.getLabel();
       
        if (ancestorHasSHead(node, head, label))
          remove.add(arc);
      //  else if (rnrHasSHead(node, head, label))
      //    remove.add(arc);
      }
     
      node.removeSHeads(remove);
    }
  }
 
  private boolean ancestorHasSHead(DEPNode dNode, DEPNode sHead, String label)
  {
    DEPNode dHead = dNode.getHead();
   
    while (dHead != null)
    {
      if (dHead.isArgumentOf(sHead, label))
        return true;
     
      dHead = dHead.getHead();
    }
   
    return false;
  }
 
  protected boolean rnrHasSHead(DEPNode dNode, DEPNode sHead, String label)
  {
    for (DEPArc rnr : dNode.getXHeads(DEPLibEn.DEP_RNR))
    {
      if (rnr.getNode().isArgumentOf(sHead, label))
        return true;
    }
   
    return false;
  }
 
  private void relabelArgNs(DEPTree dTree)
  {
    Map<String,DEPNode> map = new HashMap<String,DEPNode>();
    int i, size = dTree.size();
    List<SRLArc> remove;
    DEPNode node;
    String key;
   
    for (i=1; i<size; i++)
    {
      node   = dTree.get(i);
      remove = Lists.newArrayList();
     
      for (DEPArc arc : node.getSHeads())
      {
        if (arc.getLabel().startsWith(SRLLib.PREFIX_REFERENT))
          continue;
       
        if (arc.getLabel().startsWith("AM"))
          continue;
       
        key = arc.toString();
       
        if (map.containsKey(key))
          arc.setLabel(SRLLib.PREFIX_CONCATENATION + arc.getLabel());
        else
          map.put(key, node);
      }
     
      node.removeSHeads(remove);
    }
  }
 
  private DEPNode getDEPNode(DEPTree dTree, CTNode cNode)
  {
    return dTree.get(cNode.c2d.getDependencyHead().getTokenId() + 1);
  }
 
 
  // ============================= Get CoNLL labels =============================
 
/*  private void convertToCoNLLLabels(DEPTree tree)
  {
    int i, size = tree.size();
    DEPNode node;
   
    for (i=1; i<size; i++)
    {
      node = tree.get(i);
     
      if (node.getLabel().equals(DEPLibEn.DEP_ADVMOD))
        node.setLabel(DEPLibEn.CONLL_ADV);
      else if (node.getLabel().equals(DEPLibEn.DEP_APPOS))
        node.setLabel(DEPLibEn.CONLL_APPO);
      else if (node.getLabel().equals(DEPLibEn.DEP_CONJ))
        node.setLabel(DEPLibEn.CONLL_CONJ);
      else if (node.getLabel().equals(DEPLibEn.DEP_INTJ))
        node.setLabel(DEPLibEn.CONLL_INTJ);
      else if (node.getLabel().equals(DEPLibEn.DEP_META))
        node.setLabel(DEPLibEn.CONLL_META);
      else if (node.getLabel().equals(DEPLibEn.DEP_PARATAXIS))
        node.setLabel(DEPLibEn.CONLL_PRN);
      else if (node.getLabel().equals(DEPLibEn.DEP_PRT))
        node.setLabel(DEPLibEn.CONLL_PRT);
      else if (node.getLabel().equals(DEPLibEn.DEP_PUNCT))
        node.setLabel(DEPLibEn.CONLL_P);
      else if (node.getLabel().equals(DEPLibEn.DEP_ROOT))
        node.setLabel(DEPLibEn.CONLL_ROOT);
    }
  }
 
  public String getCoNLLLabel(CTNode C, CTNode P, CTNode p)
  {
    CTNode c = C.c2d.getPhraseHead();
    CTNode d = C.c2d.getDependencyHead();
    String label;
   
    // function tags
    if (hasAdverbialTag(C))
      return DEPLibEn.CONLL_ADV;

    if ((label = getCoNLLFunctionTag(C)) != null)
      return label;
   
    // coordination
    if (C.isPTag(CTLibEn.PTAG_UCP))
    {
      c.addFTags(C.getFTags());
      return getCoNLLLabel(c, P, p);
    }
   
    // complements
    if (P.isPTagAny(CTLibEn.PTAG_VP, CTLibEn.PTAG_SINV, CTLibEn.PTAG_SQ))
    {
      if (getObjectLabel(C) != null)  return DEPLibEn.CONLL_OBJ;
      if (isOprd(C))  return DEPLibEn.CONLL_OPRD;
      if (isXcomp(C))  return DEPLibEn.CONLL_XCOMP;
      if (isCcomp(C))  return DEPLibEn.CONLL_OBJ;
      if ((label = getCoNLLAuxLabel(C, p, d)) != null)  return label;
    }
   
    // subordinate conjunctions
    if (P.isPTag(CTLibEn.PTAG_SBAR) && p.isPTagAny(CTLibEn.POS_IN, CTLibEn.POS_TO, CTLibEn.POS_DT))
      return DEPLibEn.CONLL_SUB;
 
    // simple labels
    if ((label = getCoNLLSimpleLabel(C)) != null)
      return label;
   
    // default
    if (P.isPTagAny(CTLibEn.PTAG_PP, CTLibEn.PTAG_WHPP))
      return DEPLibEn.CONLL_PMOD;
   
    if (C.isPTag(CTLibEn.PTAG_SBAR) || isXcomp(C) || C.isPTag(CTLibEn.PTAG_PP))
      return DEPLibEn.CONLL_ADV;
   
    if (P.isPTag(CTLibEn.PTAG_QP))
      return DEPLibEn.CONLL_QMOD;
   
    if (P.isPTagAny(CTLibEn.PTAG_NML, CTLibEn.PTAG_NP, CTLibEn.PTAG_NX, CTLibEn.PTAG_WHNP) || CTLibEn.isNoun(p))
      return DEPLibEn.CONLL_NMOD;
   
    if ((P.isPTagAny(CTLibEn.PTAG_ADJP, CTLibEn.PTAG_ADVP, CTLibEn.PTAG_WHADJP, CTLibEn.PTAG_WHADVP) || CTLibEn.isAdjective(p) || CTLibEn.isAdverb(p)))
      return DEPLibEn.CONLL_AMOD;

    if (c != null)
    {
      if ((label = getCoNLLSimpleLabel(c)) != null)
        return label;
     
      if (CTLibEn.isAdverb(d))
        return DEPLibEn.CONLL_ADV;
    }
   
    return DEPLibEn.CONLL_DEP;
  }
 
  private String getCoNLLFunctionTag(CTNode C)
  {
    if (C.hasFTag(CTLibEn.FTAG_SBJ))
      return DEPLibEn.CONLL_SBJ;
   
    if (C.hasFTag(CTLibEn.FTAG_LGS))
      return DEPLibEn.CONLL_LGS;
   
    if (C.hasFTag(CTLibEn.FTAG_DTV))
      return DEPLibEn.CONLL_DTV;
   
    if (C.hasFTag(CTLibEn.FTAG_PRD))
      return DEPLibEn.CONLL_PRD;
   
    if (C.hasFTag(CTLibEn.FTAG_PUT))
      return DEPLibEn.CONLL_PUT;
   
    if (C.hasFTag(DEPLibEn.CONLL_EXTR))
      return DEPLibEn.CONLL_EXTR;
   
    return null;
  }
 
  private String getCoNLLAuxLabel(CTNode C, CTNode p, CTNode d)
  {
    CTNode pd = p.c2d.getDependencyHead();
   
    if (C.isPTag(CTLibEn.PTAG_VP) || CTLibEn.isVerb(d))
    {
      if (pd.isPTag(CTLibEn.POS_TO))
        return DEPLibEn.CONLL_IM;
     
      if (CTLibEn.isVerb(pd))
        return DEPLibEn.CONLL_VC;
    }
   
    return null;
  }
 
  private String getCoNLLSimpleLabel(CTNode C)
  {
    String label;
   
    if (CTLibEn.isConjunction(C))
      return DEPLibEn.CONLL_COORD;
   
    if (isPrt(C))
      return DEPLibEn.DEP_PRT;

    if ((label = getSpecialLabel(C)) != null)
      return label;
   
    return null;
  }*/
TOP

Related Classes of com.clearnlp.conversion.EnglishC2DConverter

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.