}
private FeatureVector featurizeDeletedSpan(Interval span, LanguageInfo utterance) {
List<String> spanProperties = utterance.getSpanProperties(span.start,span.end);
FeatureVector res = new FeatureVector();
for(String spanProperty: spanProperties) {
if(ParaphraseFeatureMatcher.containsDomain(DELETE))
res.add(DELETE,"match: " + spanProperty);
if(ParaphraseFeatureMatcher.containsDomain(DELETE_IN_CONTEXT)) {
if(span.start>0) {
List<String> lcProperties = utterance.getSpanProperties(span.start-1,span.start);
for(String lcProperty: lcProperties) {
res.add(DELETE_IN_CONTEXT,"match_" + spanProperty+",lc_"+lcProperty); //properties of deleted match
}
}
if(span.end<utterance.numTokens()) {
List<String> rcProperties = utterance.getSpanProperties(span.end,span.end+1);
for(String rcProperty: rcProperties) {
res.add(DELETE_IN_CONTEXT,"match_" + spanProperty+",rc_"+rcProperty); //properties of deleted match
}
}
}
}
//verb semclass features
if(ParaphraseFeatureMatcher.containsDomain(DELETE_V_SEM)) {
String lemmaTokens = utterance.lemmaPhrase(span.start, span.end);
Counter<String> cooccurringWords = verbSemclassMap.get(lemmaTokens);
if(cooccurringWords!=null) {
for(String lemma: utterance.lemmaTokens) {
if(cooccurringWords.containsKey(lemma)) {
res.add(DELETE_V_SEM,"match="+lemmaTokens+",context="+lemma);
res.add(DELETE_V_SEM,"sim",Math.log(cooccurringWords.getCount(lemma)+1));
}
}
}
}
return res;