Source Code of edu.washington.cs.knowitall.extractor.conf.HypotheticalFeatures

package edu.washington.cs.knowitall.extractor.conf;


import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;


import com.google.common.base.Predicate;


import edu.washington.cs.knowitall.extractor.conf.featureset.VerbTokenFeature;
import edu.washington.cs.knowitall.extractor.conf.featureset.TokenFeature;
import edu.washington.cs.knowitall.nlp.ChunkedSentence;
import edu.washington.cs.knowitall.nlp.extraction.ChunkedArgumentExtraction;
import edu.washington.cs.knowitall.nlp.extraction.ChunkedBinaryExtraction;
import edu.washington.cs.knowitall.nlp.extraction.ChunkedExtraction;
import edu.washington.cs.knowitall.normalization.BasicFieldNormalizer;
import edu.washington.cs.knowitall.sequence.SequenceException;


/**
 * Features designed to detect sentences expressing a hypothesis or belief and
 * not a fact.
 *
 * @author Rob
 *
 */
public class HypotheticalFeatures {


    private BasicFieldNormalizer stemmer;


    private HashMap<String, Predicate<ChunkedBinaryExtraction>> featureMap;


    private static String[] ifWords = new String[] { "if", "whether", "though",
            "although" };
    private static String[] thatWords = new String[] { "that", "which", "who" };
    private static String[] mayWords = new String[] { "may", "might", "would",
            "could", "should", "suppose" };


    // keyword lists. See end of file for hardcoded keywords.
    private Set<String> ifSet;
    private Set<String> maySet;
    private Set<String> comSet;
    private Set<String> cogSet;
    private Set<String> thatSet;


    public HypotheticalFeatures() {


        this.stemmer = new BasicFieldNormalizer();


        initKeywordSets();


        initFeatureSet();
    }


    private void initKeywordSets() {


        ifSet = new HashSet<String>();
        ifSet.addAll(Arrays.asList(ifWords));


        maySet = new HashSet<String>();
        maySet.addAll(Arrays.asList(mayWords));


        comSet = new HashSet<String>();
        comSet.addAll(Arrays.asList(NestedFeatures.comWords));


        cogSet = new HashSet<String>();
        cogSet.addAll(Arrays.asList(NestedFeatures.cogWords));


        thatSet = new HashSet<String>();
        thatSet.addAll(Arrays.asList(thatWords));
    }


    public Map<String, Predicate<ChunkedBinaryExtraction>> getFeatureMap() {


        return featureMap;
    }


    private void initFeatureSet() throws SequenceException {
        initFeatureMap();
    }


    private void initFeatureMap() {


        featureMap = new HashMap<String, Predicate<ChunkedBinaryExtraction>>();


        featureMap.put("hyp: that,which,who imm before arg1",
                tokenImmediatelyBeforeArg1(thatSet));
        featureMap.put("hyp: that,which,who btw arg1/pred",
                tokenBtwArg1AndPred(thatSet));
        featureMap.put("hyp: if,whether,though,although anwh before arg1",
                TokenFeature.anywhereBeforeArg1(ifSet));
        featureMap.put(
                "hyp: may,might,would,could,should,suppose anwh before arg1",
                TokenFeature.anywhereBeforeArg1(maySet));
        featureMap.put("hyp: communic verb anwh before arg1",
                VerbTokenFeature.anywhereBeforeArg1(comSet));
        featureMap.put("hyp: cognitn verb anwh before arg1",
                VerbTokenFeature.anywhereBeforeArg1(cogSet));
        featureMap.put("hyp: communic verb anwh after arg2",
                VerbTokenFeature.anywhereAfterArg2(comSet));
        featureMap.put("rel is single communication verb",
                VerbTokenFeature.relSingleToken(comSet));
    }


    private Predicate<ChunkedBinaryExtraction> tokenBtwArg1AndPred(
            final Set<String> keyWords) {
        return new Predicate<ChunkedBinaryExtraction>() {
            @Override
            public boolean apply(ChunkedBinaryExtraction extr) {


                ChunkedSentence sentence = extr.getSentence();
                ChunkedArgumentExtraction arg1 = extr.getArgument1();
                ChunkedExtraction rel = extr.getRelation();
                for (int i = arg1.getStart() + arg1.getLength(); i < rel
                        .getStart(); ++i) {
                    String token = sentence.getToken(i);
                    String pos = sentence.getPosTag(i);


                    String lemma = stemmer.stemSingleToken(token, pos);
                    if (keyWords.contains(lemma.toLowerCase())) {
                        return true;
                    }
                }
                return false;
            }
        };
    }


    private Predicate<ChunkedBinaryExtraction> tokenImmediatelyBeforeArg1(
            final Set<String> keyWords) {
        return new Predicate<ChunkedBinaryExtraction>() {
            @Override
            public boolean apply(ChunkedBinaryExtraction extr) {


                ChunkedSentence sentence = extr.getSentence();
                ChunkedArgumentExtraction arg1 = extr.getArgument1();
                int i = arg1.getStart() - 1;
                if (i < 0)
                    return false;
                String token = sentence.getToken(i);
                String pos = sentence.getPosTag(i);
                String lemma = stemmer.stemSingleToken(token, pos);
                if (keyWords.contains(lemma.toLowerCase())) {
                    return true;
                }
                return false;
            }
        };
    }
}
Source Code of edu.washington.cs.knowitall.extractor.conf.HypotheticalFeatures

Related Classes of edu.washington.cs.knowitall.extractor.conf.HypotheticalFeatures