Package edu.ucla.sspace.dv

Source Code of edu.ucla.sspace.dv.MediumPennTemplateAcceptor

/*
* Copyright 2010 David Jurgens
*
* This file is part of the S-Space package and is covered under the terms and
* conditions therein.
*
* The S-Space package is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as published
* by the Free Software Foundation and distributed hereunder to you.
*
* THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES,
* EXPRESS OR IMPLIED ARE MADE.  BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE
* NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY
* PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION
* WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER
* RIGHTS.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/

package edu.ucla.sspace.dv;

import edu.ucla.sspace.dependency.DependencyPath;
import edu.ucla.sspace.dependency.DependencyPathAcceptor;
import edu.ucla.sspace.dependency.DependencyTreeNode;

import edu.ucla.sspace.text.IteratorFactory;

import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;


/**     
* A {@code DependencyPathAcceptor} that accepts the minimum set of path
* templates specified by <a
* href="http://www.nlpado.de/~sebastian/pub/papers/cl07_pado.pdf">Padó and
* Lapata (2007)</a>.  This acceptor is designed to be used with the Penn
* Treebank part of speech <a
* href="http://www.ling.upenn.edu/courses/Fall_2003/ling001/penn_treebank_pos.html">tag
* set</a> and dependency relations from the pre-1.4 Malt parser (not the
* Stanford typed dependencies).  Note that this template's patters is an
* <i>adaptation</i> of the the original patterns, which were specified using
* the Minipar relations and part of speech tags.
*
* @see MinimumTemplateAcceptor
* @see MaximumTemplateAcceptor
*/
public class MediumPennTemplateAcceptor implements DependencyPathAcceptor {

    static final Set<String> MEDIUM_TEMPLATES = new HashSet<String>();

    /**
     * A mapping from a specific POS tag, e.g. NN, JJS, to the general
     * <i>class</i> of part of speech tags, e.g. noun (N), to which it belongs.
     */
    static final Map<String,String> POS_TAG_TO_CLASS =
        new HashMap<String,String>();

    // Static block for initializing the POS_TAGS_TO_CLASS mapping using the
    // PennTags class
    static {
        // NOTE: the class tags are intentionally short to facilitate faster
        // matching
        for (String noun : PennTags.NOUN_POS_TAGS)
            POS_TAG_TO_CLASS.put(noun, "N"); // Noun
        for (String adj : PennTags.ADJ_POS_TAGS)
            POS_TAG_TO_CLASS.put(adj, "J"); // adJective
        for (String adv : PennTags.ADV_POS_TAGS)
            POS_TAG_TO_CLASS.put(adv, "R"); // adveRb
        for (String verb : PennTags.VERB_POS_TAGS)
            POS_TAG_TO_CLASS.put(verb, "V"); // Verb
    }
   
    /**
     * A mapping from a relation to the more general class of relations to which
     * it belongs, e.g. AMOD and PMOD would be mapped to a "lexical modifier"
     * relation class.
     */
    static final Map<String,String> REL_TO_CLASS =
        new HashMap<String,String>();
   
    // Static block for intializing REL_TO_CLASS
    static {
        for (String mod : PennTags.MODIFIERS)
            REL_TO_CLASS.put(mod, "mod");
    }
   
    // Static block for initializing the medium patterns.  Note that this block
    // uses the shorted class labels for parts of speech, e.g. NNS, NP, NN -> N,
    // in order to handle the combinatorial explosion of patterns that would
    // need to be expressed when moving from the Minipar to Penn tag sets.
    static {
        MEDIUM_TEMPLATES.add("J:nmod:N,N:amod:(null)");
        MEDIUM_TEMPLATES.add("J:nmod:N,N:vmod:(null)");
        MEDIUM_TEMPLATES.add("J:nmod:N,N:nmod:(null)");
        MEDIUM_TEMPLATES.add("J:nmod:N,N:nmod:N");
        MEDIUM_TEMPLATES.add("J:sbj:N,N:amod:(null)");
        MEDIUM_TEMPLATES.add("J:sbj:N,N:nmod:(null)");
        MEDIUM_TEMPLATES.add("J:sbj:N,N:vmod:(null)");
        MEDIUM_TEMPLATES.add("J:sbj:N,N:nmod:N");

        MEDIUM_TEMPLATES.add("R:nmod:N,N:amod:(null)");
        MEDIUM_TEMPLATES.add("R:nmod:N,N:vmod:(null)");
        MEDIUM_TEMPLATES.add("R:nmod:N,N:nmod:(null)");
        MEDIUM_TEMPLATES.add("R:nmod:N,N:nmod:N");
        MEDIUM_TEMPLATES.add("R:sbj:N,N:amod:(null)");
        MEDIUM_TEMPLATES.add("R:sbj:N,N:vmod:(null)");
        MEDIUM_TEMPLATES.add("R:sbj:N,N:nmod:(null)");
        MEDIUM_TEMPLATES.add("R:sbj:N,N:nmod:N");


        MEDIUM_TEMPLATES.add("N:coord:N,N:mod:(null)");
        MEDIUM_TEMPLATES.add("N:coord:N,N:nmod:N");
        MEDIUM_TEMPLATES.add("N:gen:N,N:nmod:(null)");
        MEDIUM_TEMPLATES.add("N:gen:N,N:nmod:N");
        MEDIUM_TEMPLATES.add("N:nmod:N,N:coord:N");
        MEDIUM_TEMPLATES.add("N:nmod:N,N:coord:N,N:nmod:N");
        MEDIUM_TEMPLATES.add("N:nmod:N,N:gen:N");
        MEDIUM_TEMPLATES.add("N:nmod:N,N:gen:N,N:nmod:N");
        MEDIUM_TEMPLATES.add("N:nmod:N,N:mod:A");
        MEDIUM_TEMPLATES.add("N:nmod:N,N:mod:TO");
        MEDIUM_TEMPLATES.add("N:nmod:N,N:obj:V");
        MEDIUM_TEMPLATES.add("N:nmod:N,N:prd:V");
        MEDIUM_TEMPLATES.add("N:nmod:N,N:sbj:A");
        MEDIUM_TEMPLATES.add("N:nmod:N,N:sbj:V");

        MEDIUM_TEMPLATES.add("(null):mod:N,N:coord:N");
        MEDIUM_TEMPLATES.add("(null):mod:N,N:coord:N,N:mod:(null)");
        MEDIUM_TEMPLATES.add("(null):mod:N,N:gen:N");
        MEDIUM_TEMPLATES.add("(null):mod:N,N:gen:N,N:mod:(null)");
        MEDIUM_TEMPLATES.add("(null):mod:N,N:amod:J");
        MEDIUM_TEMPLATES.add("(null):mod:N,N:adv:R");
        MEDIUM_TEMPLATES.add("(null):mod:N,N:pmod:TO");
        MEDIUM_TEMPLATES.add("(null):mod:N,N:obj:V");
        MEDIUM_TEMPLATES.add("(null):mod:N,N:prd:V");
        MEDIUM_TEMPLATES.add("(null):mod:N,N:sbj:J");
        MEDIUM_TEMPLATES.add("(null):mod:N,N:sbj:R");
        MEDIUM_TEMPLATES.add("(null):mod:N,N:sbj:V");

        MEDIUM_TEMPLATES.add("TO:mod:N,N:mod:(null)");
        MEDIUM_TEMPLATES.add("TO:mod:N,N:nmod:N");

        MEDIUM_TEMPLATES.add("V:obj:N,N:mod:(null)");
        MEDIUM_TEMPLATES.add("V:obj:N,N:nmod:N");
        MEDIUM_TEMPLATES.add("V:sbj:N,N:mod:(null)");
        MEDIUM_TEMPLATES.add("V:sbj:N,N:nmod:N");
        MEDIUM_TEMPLATES.add("V:prd:N,N:mod:(null)");
        MEDIUM_TEMPLATES.add("V:prd:N,N:nmod:N");
    }
   
    /**
     * Creates the acceptor with its standard templates
     */
    public MediumPennTemplateAcceptor() { }
  
    /**
     * Returns {@code true} if the path matches one of the predefined templates
     *
     * @param path a dependency path
     *
     * @return {@code true} if the path matches a template
     */
    public boolean accepts(DependencyPath path) {
        return acceptsInternal(path);
    }

    /**
     * A package-private method that checks whether the path matches any of the
     * predefined templates.  This method is provided so other template classes
     * have access to the accept logic used by this class.
     *
     * @param path a dependency path
     *
     * @return {@code true} if the path matches a template
     */
    static boolean acceptsInternal(DependencyPath path) {
        // First check whether the minimum template acceptor would allow this
        // path
        if (MinimumPennTemplateAcceptor.acceptsInternal(path))
            return true;

        // Filter out paths that can't match the template due to length
        if (path.length() > 3)
            return false;

        int pathLength = path.length();

        // The medium set of templates contains "null" matches which are wild
        // cards against any part of speech.  We handle these by generating
        // three possible pattern instances that represent the provided path,
        // two of which include the wildcard "null", one each end.  If any of
        // these patterns are found in the medium set, the path is valid.
        StringBuilder nullStart = new StringBuilder(pathLength * 16);
        StringBuilder nullEnd = new StringBuilder(pathLength * 16);
        StringBuilder noNulls = new StringBuilder(pathLength * 16);

        // Iterate over each pair in the path and create the pattern string that
        // represents this path.  The pattern string is pos:rel:pos[,...] .
        DependencyTreeNode first = path.first();
        for (int i = 1; i < pathLength; ++i) {
            DependencyTreeNode second = path.getNode(i);
            // Check that the nodes weren't filtered out.  If so reject the path
            // even if the part of speech and relation text may have matched a
            // template.
            if (first.word().equals(IteratorFactory.EMPTY_TOKEN))
                return false;

            // Get the relation between the two nodes
            String rel = path.getRelation(i - 1);
            String firstPos = first.pos();
            String secPos = second.pos();

            // Check whether each POS has a class category to which it should be
            // mapped.  These classes are necessary to handle the singificant
            // number of variations for a general category of POS's, e.g. verb
            // -> VBZ, VBJ, etc., which were not present when the MINIPAR tags
            // were designed by Padó and Lapata.
            String class1 = POS_TAG_TO_CLASS.get(firstPos);
            String class2 = POS_TAG_TO_CLASS.get(secPos);
           
            if (class1 != null)
                firstPos = class1;
            if (class2 != null)
                secPos = class2;

            // Similarly, in order to handle the lex-mod relation, we check
            // whether the relation, e.g. PMOD, can be mapped to the general
            // lexical modifier class.
            String relClass = REL_TO_CLASS.get(rel);
            if (relClass != null)
                rel = relClass;
           
            // Create the three relation patterns by checking the current index
            // compared to the path length.
            nullStart.append((i == 1) ? "(null)" : firstPos);
            nullStart.append(":").append(rel).append(":").append(secPos);

            nullEnd.append(firstPos).append(":").append(rel).append(":");
            nullEnd.append((i + 1 == pathLength) ? "(null)" : secPos);

            noNulls.append(firstPos).append(":").append(rel)
                .append(":").append(secPos);

            // Check whether more elements existing, and if so, add the ','
            if (i + 1 < pathLength) {
                nullStart.append(",");
                nullEnd.append(",");
                noNulls.append(",");
            }

            // Last, shift over the node
            first = second;
        }

        // Extra case for the last token in the path
        if (first.word().equals(IteratorFactory.EMPTY_TOKEN))
            return false;

        boolean match = MEDIUM_TEMPLATES.contains(noNulls.toString())
            || MEDIUM_TEMPLATES.contains(nullStart.toString())
            || MEDIUM_TEMPLATES.contains(nullEnd.toString());
       
        return match;
    }

    /**
     * {@inheritDoc}
     */
    public int maxPathLength() {
        return 4;
    }
}
TOP

Related Classes of edu.ucla.sspace.dv.MediumPennTemplateAcceptor

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.