Package edu.washington.cs.knowitall.extractor.conf

Source Code of edu.washington.cs.knowitall.extractor.conf.HypotheticalFeatures

package edu.washington.cs.knowitall.extractor.conf;

import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;

import com.google.common.base.Predicate;

import edu.washington.cs.knowitall.extractor.conf.featureset.VerbTokenFeature;
import edu.washington.cs.knowitall.extractor.conf.featureset.TokenFeature;
import edu.washington.cs.knowitall.nlp.ChunkedSentence;
import edu.washington.cs.knowitall.nlp.extraction.ChunkedArgumentExtraction;
import edu.washington.cs.knowitall.nlp.extraction.ChunkedBinaryExtraction;
import edu.washington.cs.knowitall.nlp.extraction.ChunkedExtraction;
import edu.washington.cs.knowitall.normalization.BasicFieldNormalizer;
import edu.washington.cs.knowitall.sequence.SequenceException;

/**
* Features designed to detect sentences expressing a hypothesis or belief and
* not a fact.
*
* @author Rob
*
*/
public class HypotheticalFeatures {

    private BasicFieldNormalizer stemmer;

    private HashMap<String, Predicate<ChunkedBinaryExtraction>> featureMap;

    private static String[] ifWords = new String[] { "if", "whether", "though",
            "although" };
    private static String[] thatWords = new String[] { "that", "which", "who" };
    private static String[] mayWords = new String[] { "may", "might", "would",
            "could", "should", "suppose" };

    // keyword lists. See end of file for hardcoded keywords.
    private Set<String> ifSet;
    private Set<String> maySet;
    private Set<String> comSet;
    private Set<String> cogSet;
    private Set<String> thatSet;

    public HypotheticalFeatures() {

        this.stemmer = new BasicFieldNormalizer();

        initKeywordSets();

        initFeatureSet();
    }

    private void initKeywordSets() {

        ifSet = new HashSet<String>();
        ifSet.addAll(Arrays.asList(ifWords));

        maySet = new HashSet<String>();
        maySet.addAll(Arrays.asList(mayWords));

        comSet = new HashSet<String>();
        comSet.addAll(Arrays.asList(NestedFeatures.comWords));

        cogSet = new HashSet<String>();
        cogSet.addAll(Arrays.asList(NestedFeatures.cogWords));

        thatSet = new HashSet<String>();
        thatSet.addAll(Arrays.asList(thatWords));
    }

    public Map<String, Predicate<ChunkedBinaryExtraction>> getFeatureMap() {

        return featureMap;
    }

    private void initFeatureSet() throws SequenceException {
        initFeatureMap();
    }

    private void initFeatureMap() {

        featureMap = new HashMap<String, Predicate<ChunkedBinaryExtraction>>();

        featureMap.put("hyp: that,which,who imm before arg1",
                tokenImmediatelyBeforeArg1(thatSet));
        featureMap.put("hyp: that,which,who btw arg1/pred",
                tokenBtwArg1AndPred(thatSet));
        featureMap.put("hyp: if,whether,though,although anwh before arg1",
                TokenFeature.anywhereBeforeArg1(ifSet));
        featureMap.put(
                "hyp: may,might,would,could,should,suppose anwh before arg1",
                TokenFeature.anywhereBeforeArg1(maySet));
        featureMap.put("hyp: communic verb anwh before arg1",
                VerbTokenFeature.anywhereBeforeArg1(comSet));
        featureMap.put("hyp: cognitn verb anwh before arg1",
                VerbTokenFeature.anywhereBeforeArg1(cogSet));
        featureMap.put("hyp: communic verb anwh after arg2",
                VerbTokenFeature.anywhereAfterArg2(comSet));
        featureMap.put("rel is single communication verb",
                VerbTokenFeature.relSingleToken(comSet));
    }

    private Predicate<ChunkedBinaryExtraction> tokenBtwArg1AndPred(
            final Set<String> keyWords) {
        return new Predicate<ChunkedBinaryExtraction>() {
            @Override
            public boolean apply(ChunkedBinaryExtraction extr) {

                ChunkedSentence sentence = extr.getSentence();
                ChunkedArgumentExtraction arg1 = extr.getArgument1();
                ChunkedExtraction rel = extr.getRelation();
                for (int i = arg1.getStart() + arg1.getLength(); i < rel
                        .getStart(); ++i) {
                    String token = sentence.getToken(i);
                    String pos = sentence.getPosTag(i);

                    String lemma = stemmer.stemSingleToken(token, pos);
                    if (keyWords.contains(lemma.toLowerCase())) {
                        return true;
                    }
                }
                return false;
            }
        };
    }

    private Predicate<ChunkedBinaryExtraction> tokenImmediatelyBeforeArg1(
            final Set<String> keyWords) {
        return new Predicate<ChunkedBinaryExtraction>() {
            @Override
            public boolean apply(ChunkedBinaryExtraction extr) {

                ChunkedSentence sentence = extr.getSentence();
                ChunkedArgumentExtraction arg1 = extr.getArgument1();
                int i = arg1.getStart() - 1;
                if (i < 0)
                    return false;
                String token = sentence.getToken(i);
                String pos = sentence.getPosTag(i);
                String lemma = stemmer.stemSingleToken(token, pos);
                if (keyWords.contains(lemma.toLowerCase())) {
                    return true;
                }
                return false;
            }
        };
    }
}
TOP

Related Classes of edu.washington.cs.knowitall.extractor.conf.HypotheticalFeatures

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.