Package edu.washington.cs.knowitall.extractor.conf.featureset

Source Code of edu.washington.cs.knowitall.extractor.conf.featureset.PosFeature

package edu.washington.cs.knowitall.extractor.conf.featureset;

import java.util.Arrays;
import java.util.Collection;
import java.util.HashSet;
import java.util.Set;

import edu.washington.cs.knowitall.commonlib.Range;
import edu.washington.cs.knowitall.nlp.ChunkedSentence;
import edu.washington.cs.knowitall.nlp.extraction.ChunkedArgumentExtraction;
import edu.washington.cs.knowitall.nlp.extraction.ChunkedBinaryExtraction;
import edu.washington.cs.knowitall.nlp.extraction.ChunkedExtraction;

/**
* Parent class for any feature specific to the POS layer
*
* @author Rob
*/
public abstract class PosFeature extends ExtractionFeature {

    public static final String[] allVerbPosTags = new String[] { "VB", "VBD",
            "VBG", "VBN", "VBP", "VBZ" };

    private Set<String> posTags;

    public PosFeature(String... posTags) {

        this(Arrays.asList(posTags));
    }

    public PosFeature(Collection<String> posTags) {

        this.posTags = new HashSet<String>();
        this.posTags.addAll(posTags);
    }

    @Override
    protected boolean testAtIndex(Integer index, ChunkedSentence sentence) {

        String posTag = sentence.getPosTag(index);
        return posTags.contains(posTag);
    }

    /**
     * Given a ChunkedBinaryExtraction, I need the implementation to tell me at
     * which indices to test my predicate.
     *
     * @return
     */
    @Override
    protected abstract Range rangeToExamine(ChunkedBinaryExtraction cbe);

    /**
     * Get a feature that fires if any pos tag from posTags is present within
     * Arg 2.
     *
     * @param posTags
     * @return
     */
    public static PosFeature withinArg2(String... posTags) {
        return new PosFeature(posTags) {
            @Override
            protected Range rangeToExamine(ChunkedBinaryExtraction cbe) {
                return cbe.getArgument2().getRange();
            }
        };
    }

    /**
     * Get a feature that fires if any pos tag from posTags is present within
     * Arg 1.
     *
     * @param posTags
     * @return
     */
    public static PosFeature withinArg1(String... posTags) {
        return new PosFeature(posTags) {
            @Override
            protected Range rangeToExamine(ChunkedBinaryExtraction cbe) {
                return cbe.getArgument1().getRange();
            }
        };
    }

    /**
     * Get a feature that fires if any pos tag from posTags is present within a
     * given single verb relation (false if multiple-token rel)
     *
     * @param posTags
     * @return
     */
    public static PosFeature relSingleVerb() {
        return new PosFeature(PosFeature.allVerbPosTags) {
            @Override
            protected Range rangeToExamine(ChunkedBinaryExtraction cbe) {

                Range range = cbe.getRelation().getRange();
                if (range.getLength() == 1)
                    return range;
                else
                    return Range.EMPTY;
            }
        };
    }

    /**
     * Get a feature that fires if any pos tag from posTags is present within
     * the relation.
     *
     * @param posTags
     * @return
     */
    public static PosFeature withinRel(String... posTags) {
        return new PosFeature(posTags) {
            @Override
            protected Range rangeToExamine(ChunkedBinaryExtraction cbe) {
                return cbe.getRelation().getRange();
            }
        };
    }

    /**
     * Get a feature that fires if any pos tag from posTags is present at the
     * index immediately before arg1.
     *
     * @param posTags
     * @return
     */
    public static PosFeature rightBeforeArg1(String... posTags) {
        return new PosFeature(posTags) {
            @Override
            protected Range rangeToExamine(ChunkedBinaryExtraction cbe) {
                ChunkedArgumentExtraction arg1 = cbe.getArgument1();
                int index = arg1.getStart() - 1;
                if (index < 0 || index >= arg1.getSentence().getLength()) {
                    return Range.EMPTY;
                } else
                    return Range.fromInterval(index, index + 1);
            }
        };
    }

    /**
     * Get a feature that fires if any pos tag from posTags is present at the
     * index immediately after arg2.
     *
     * @param posTags
     * @return
     */
    public static PosFeature rightAfterArg2(String... posTags) {
        return new PosFeature(posTags) {
            @Override
            protected Range rangeToExamine(ChunkedBinaryExtraction cbe) {
                ChunkedArgumentExtraction arg2 = cbe.getArgument2();
                int index = arg2.getStart() + arg2.getLength();
                if (index < 0 || index >= arg2.getSentence().getLength()) {
                    return Range.EMPTY;
                } else
                    return Range.fromInterval(index, index + 1);
            }
        };
    }

    /**
     * Get a feature that fires if any pos tag from posTags is present
     * immediately before the relation.
     *
     * @param posTags
     * @return
     */
    public static PosFeature rightBeforeRel(String... posTags) {
        return new PosFeature(posTags) {
            @Override
            protected Range rangeToExamine(ChunkedBinaryExtraction cbe) {
                ChunkedExtraction rel = cbe.getRelation();
                int index = rel.getStart() - 1;
                if (index < 0 || index >= rel.getSentence().getLength()) {
                    return Range.EMPTY;
                } else
                    return Range.fromInterval(index, index + 1);
            }
        };
    }
}
TOP

Related Classes of edu.washington.cs.knowitall.extractor.conf.featureset.PosFeature

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.