Dictionaries dict,
Set<Mention> roleSet,
Semantics semantics) throws Exception {
boolean ret = false;
Mention mention = mentionCluster.getRepresentativeMention();
if (flags.USE_INCOMPATIBLES) {
// Check our list of incompatible mentions and don't cluster them together
// Allows definite no's from previous sieves to propagate down
if (document.isIncompatible(mentionCluster, potentialAntecedent)) {
SieveCoreferenceSystem.logger.finest("INCOMPATIBLE clusters: not match: " +ant.spanToString()+"("+ant.mentionID +
") :: "+ mention.spanToString()+"("+mention.mentionID + ") -> "+(mention.goldCorefClusterID!=ant.goldCorefClusterID));
return false;
}
}
if (flags.DO_PRONOUN && Math.abs(mention2.sentNum-ant.sentNum) > 3 &&
mention2.person!=Person.I && mention2.person!=Person.YOU) {
return false;
}
if (mention2.lowercaseNormalizedSpanString().equals("this") && Math.abs(mention2.sentNum-ant.sentNum) > 3) {
return false;
}
if (mention2.person==Person.YOU && document.docType==DocType.ARTICLE &&
mention2.headWord.get(CoreAnnotations.SpeakerAnnotation.class).equals("PER0")) {
return false;
}
if (document.conllDoc != null) {
if (ant.generic && ant.person==Person.YOU) return false;
if (mention2.generic) return false;
}
if(mention2.insideIn(ant) || ant.insideIn(mention2)) return false;
if(flags.USE_DISCOURSEMATCH) {
String mString = mention.lowercaseNormalizedSpanString();
String antString = ant.lowercaseNormalizedSpanString();
// mention and ant both belong to the same speaker cluster
if (mention.speakerInfo != null && mention.speakerInfo == ant.speakerInfo) {
SieveCoreferenceSystem.logger.finest("discourse match: maps to same speaker: " + mention.spanToString() + "\tmatched\t" + ant.spanToString());
return true;
}
// (I - I) in the same speaker's quotation.
if (mention.number==Number.SINGULAR && dict.firstPersonPronouns.contains(mString)
&& ant.number==Number.SINGULAR && dict.firstPersonPronouns.contains(antString)
&& Rules.entitySameSpeaker(document, mention, ant)){
SieveCoreferenceSystem.logger.finest("discourse match: 1st person same speaker: " + mention.spanToString() + "\tmatched\t" + ant.spanToString());
return true;
}
// (speaker - I)
if ((mention.number==Number.SINGULAR && dict.firstPersonPronouns.contains(mString))
&& Rules.antecedentIsMentionSpeaker(document, mention, ant, dict)) {
SieveCoreferenceSystem.logger.finest("discourse match: 1st person mention speaker match antecedent: " + mention.spanToString() + "\tmatched\t" + ant.spanToString());
if (mention.speakerInfo == null && ant.speakerInfo != null) { mention.speakerInfo = ant.speakerInfo; }
return true;
}
// (I - speaker)
if ((ant.number==Number.SINGULAR && dict.firstPersonPronouns.contains(antString))
&& Rules.antecedentIsMentionSpeaker(document, ant, mention, dict)) {
SieveCoreferenceSystem.logger.finest("discourse match: 1st person antecedent speaker match mention: " + mention.spanToString() + "\tmatched\t" + ant.spanToString());
if (ant.speakerInfo == null && mention.speakerInfo != null) { ant.speakerInfo = mention.speakerInfo; }
return true;
}
// Can be iffy if more than two speakers... but still should be okay most of the time
if (dict.secondPersonPronouns.contains(mString)
&& dict.secondPersonPronouns.contains(antString)
&& Rules.entitySameSpeaker(document, mention, ant)) {
SieveCoreferenceSystem.logger.finest("discourse match: 2nd person same speaker: " + mention.spanToString() + "\tmatched\t" + ant.spanToString());
return true;
}
// previous I - you or previous you - I in two person conversation
if (((mention.person==Person.I && ant.person==Person.YOU
|| (mention.person==Person.YOU && ant.person==Person.I))
&& (mention.headWord.get(CoreAnnotations.UtteranceAnnotation.class)-ant.headWord.get(CoreAnnotations.UtteranceAnnotation.class) == 1)
&& document.docType==DocType.CONVERSATION)) {
SieveCoreferenceSystem.logger.finest("discourse match: between two person: " + mention.spanToString() + "\tmatched\t" + ant.spanToString());
return true;
}
if (dict.reflexivePronouns.contains(mention.headString) && Rules.entitySubjectObject(mention, ant)){
SieveCoreferenceSystem.logger.finest("discourse match: reflexive pronoun: " + ant.spanToString() + "(" + ant.mentionID + ") :: " + mention.spanToString() + "(" + mention.mentionID + ") -> " + (mention.goldCorefClusterID == ant.goldCorefClusterID));
return true;
}
}
if (Constants.USE_DISCOURSE_CONSTRAINTS && !flags.USE_EXACTSTRINGMATCH && !flags.USE_RELAXED_EXACTSTRINGMATCH
&& !flags.USE_APPOSITION && !flags.USE_WORDS_INCLUSION) {
for(Mention m : mentionCluster.getCorefMentions()) {
for(Mention a : potentialAntecedent.getCorefMentions()){
// angelx - not sure about the logic here, disable (code was also refactored from original)
// vv gabor - re-enabled code (seems to improve performance) vv
if(m.person!=Person.I && a.person!=Person.I &&
(Rules.antecedentIsMentionSpeaker(document, m, a, dict) || Rules.antecedentIsMentionSpeaker(document, a, m, dict))) {
SieveCoreferenceSystem.logger.finest("Incompatibles: not match(speaker): " +ant.spanToString()+"("+ant.mentionID + ") :: "+ mention.spanToString()+"("+mention.mentionID + ") -> "+(mention.goldCorefClusterID!=ant.goldCorefClusterID));
document.addIncompatible(m, a);
return false;
}
// ^^ end block of code in question ^^
int dist = Math.abs(m.headWord.get(CoreAnnotations.UtteranceAnnotation.class) - a.headWord.get(CoreAnnotations.UtteranceAnnotation.class));
if(document.docType!=DocType.ARTICLE && dist==1 && !Rules.entitySameSpeaker(document, m, a)) {
String mSpeaker = document.speakers.get(m.headWord.get(CoreAnnotations.UtteranceAnnotation.class));
String aSpeaker = document.speakers.get(a.headWord.get(CoreAnnotations.UtteranceAnnotation.class));
if(m.person==Person.I && a.person==Person.I) {
SieveCoreferenceSystem.logger.finest("Incompatibles: neighbor I: " + ant.spanToString() + "(" + ant.mentionID + "," + aSpeaker + ") :: "
+ mention.spanToString() + "(" + mention.mentionID + "," + mSpeaker + ") -> " + (mention.goldCorefClusterID != ant.goldCorefClusterID));
document.addIncompatible(m, a);
return false;
}
if(m.person==Person.YOU && a.person==Person.YOU) {
SieveCoreferenceSystem.logger.finest("Incompatibles: neighbor YOU: " + ant.spanToString() + "(" + ant.mentionID + "," + aSpeaker + ") :: "
+ mention.spanToString() + "(" + mention.mentionID + "," + mSpeaker + ") -> " + (mention.goldCorefClusterID != ant.goldCorefClusterID));
document.addIncompatible(m, a);
return false;
}
// This is weak since we can refer to both speakers
if(m.person==Person.WE && a.person==Person.WE) {
SieveCoreferenceSystem.logger.finest("Incompatibles: neighbor WE: " + ant.spanToString() + "(" + ant.mentionID + "," + aSpeaker + ") :: "
+ mention.spanToString() + "(" + mention.mentionID + "," + mSpeaker + ") -> " + (mention.goldCorefClusterID != ant.goldCorefClusterID));
document.addIncompatible(m, a);
return false;
}
}
}
}
if(document.docType==DocType.ARTICLE) {
for(Mention m : mentionCluster.getCorefMentions()) {
for(Mention a : potentialAntecedent.getCorefMentions()){
if(Rules.entitySubjectObject(m, a)) {
SieveCoreferenceSystem.logger.finest("Incompatibles: subject-object: "+ant.spanToString()+"("+ant.mentionID + ") :: "+ mention.spanToString()+"("+mention.mentionID + ") -> "+(mention.goldCorefClusterID!=ant.goldCorefClusterID));
document.addIncompatible(m, a);
return false;
}
}
}
}
}
// Incompatibility constraints - do before match checks
if(flags.USE_iwithini && Rules.entityIWithinI(mention, ant, dict)) {
SieveCoreferenceSystem.logger.finest("Incompatibles: iwithini: "+ant.spanToString()+"("+ant.mentionID + ") :: "+ mention.spanToString()+"("+mention.mentionID + ") -> "+(mention.goldCorefClusterID!=ant.goldCorefClusterID));
document.addIncompatible(mention, ant);
return false;
}
// Match checks
if(flags.USE_EXACTSTRINGMATCH && Rules.entityExactStringMatch(mentionCluster, potentialAntecedent, dict, roleSet)){
return true;
}
if (flags.USE_NAME_MATCH && checkEntityMatch(document, mentionCluster, potentialAntecedent, dict, roleSet)) {
ret = true;
}
if(flags.USE_RELAXED_EXACTSTRINGMATCH && Rules.entityRelaxedExactStringMatch(mentionCluster, potentialAntecedent, mention, ant, dict, roleSet)){
return true;
}
if(flags.USE_APPOSITION && Rules.entityIsApposition(mentionCluster, potentialAntecedent, mention, ant)) {
SieveCoreferenceSystem.logger.finest("Apposition: " + mention.spanToString() + "\tvs\t" + ant.spanToString());
return true;
}
if(flags.USE_PREDICATENOMINATIVES && Rules.entityIsPredicateNominatives(mentionCluster, potentialAntecedent, mention, ant)) {
SieveCoreferenceSystem.logger.finest("Predicate nominatives: " + mention.spanToString() + "\tvs\t" + ant.spanToString());
return true;
}
if(flags.USE_ACRONYM && Rules.entityIsAcronym(document, mentionCluster, potentialAntecedent)) {
SieveCoreferenceSystem.logger.finest("Acronym: " + mention.spanToString() + "\tvs\t" + ant.spanToString());
return true;
}
if(flags.USE_RELATIVEPRONOUN && Rules.entityIsRelativePronoun(mention, ant)){
SieveCoreferenceSystem.logger.finest("Relative pronoun: " + mention.spanToString() + "\tvs\t" + ant.spanToString());
return true;
}
if(flags.USE_DEMONYM && mention.isDemonym(ant, dict)){
SieveCoreferenceSystem.logger.finest("Demonym: " + mention.spanToString() + "\tvs\t" + ant.spanToString());
return true;
}
if(flags.USE_ROLEAPPOSITION && Rules.entityIsRoleAppositive(mentionCluster, potentialAntecedent, mention, ant, dict)){
SieveCoreferenceSystem.logger.finest("Role Appositive: "+mention.spanToString()+"\tvs\t"+ant.spanToString());
ret = true;
}
if(flags.USE_INCLUSION_HEADMATCH && Rules.entityHeadsAgree(mentionCluster, potentialAntecedent, mention, ant, dict)){
SieveCoreferenceSystem.logger.finest("Entity heads agree: "+mention.spanToString()+"\tvs\t"+ant.spanToString());
ret = true;
}
if(flags.USE_RELAXED_HEADMATCH && Rules.entityRelaxedHeadsAgreeBetweenMentions(mentionCluster, potentialAntecedent, mention, ant) ){
ret = true;
}
if(flags.USE_WORDS_INCLUSION && ret && ! Rules.entityWordsIncluded(mentionCluster, potentialAntecedent, mention, ant)) {
return false;
}
if(flags.USE_INCOMPATIBLE_MODIFIER && ret && Rules.entityHaveIncompatibleModifier(mentionCluster, potentialAntecedent)) {
return false;
}
if(flags.USE_PROPERHEAD_AT_LAST && ret && !Rules.entitySameProperHeadLastWord(mentionCluster, potentialAntecedent, mention, ant)) {
return false;
}
if(flags.USE_ATTRIBUTES_AGREE && !Rules.entityAttributesAgree(mentionCluster, potentialAntecedent)) {
return false;
}
if(flags.USE_DIFFERENT_LOCATION
&& Rules.entityHaveDifferentLocation(mention, ant, dict)) {
if(flags.USE_PROPERHEAD_AT_LAST && ret && mention.goldCorefClusterID!=ant.goldCorefClusterID) {
SieveCoreferenceSystem.logger.finest("DIFFERENT LOCATION: "+ant.spanToString()+" :: "+mention.spanToString());
}
return false;
}
if(flags.USE_NUMBER_IN_MENTION
&& Rules.entityNumberInLaterMention(mention, ant)) {
if(flags.USE_PROPERHEAD_AT_LAST && ret && mention.goldCorefClusterID!=ant.goldCorefClusterID) {
SieveCoreferenceSystem.logger.finest("NEW NUMBER : "+ant.spanToString()+" :: "+mention.spanToString());
}
return false;
}
if(flags.USE_WN_HYPERNYM) {
Method meth = semantics.wordnet.getClass().getMethod("checkHypernym", CorefCluster.class, CorefCluster.class, Mention.class, Mention.class);
if((Boolean) meth.invoke(semantics.wordnet, mentionCluster, potentialAntecedent, mention, ant)) {
ret = true;
} else if (mention.goldCorefClusterID == ant.goldCorefClusterID
&& !mention.isPronominal() && !ant.isPronominal()){
SieveCoreferenceSystem.logger.finest("not hypernym in WN");
SieveCoreferenceSystem.logger.finest("False Negatives:: " + ant.spanToString() +" <= "+mention.spanToString());
}
}
if(flags.USE_WN_SYNONYM) {
Method meth = semantics.wordnet.getClass().getMethod("checkSynonym", new Class[]{Mention.class, Mention.class});
if((Boolean) meth.invoke(semantics.wordnet, mention, ant)) {
ret = true;
} else if (mention.goldCorefClusterID == ant.goldCorefClusterID
&& !mention.isPronominal() && !ant.isPronominal()){
SieveCoreferenceSystem.logger.finest("not synonym in WN");
SieveCoreferenceSystem.logger.finest("False Negatives:: " + ant.spanToString() +" <= "+mention.spanToString());
}
}
try {
if(flags.USE_ALIAS && Rules.entityAlias(mentionCluster, potentialAntecedent, semantics, dict)){
return true;
}
} catch (Exception e) {
throw new RuntimeException(e);
}
if(flags.USE_DISTANCE && Rules.entityTokenDistance(mention2, ant)){
return false;
}
if(flags.USE_COREF_DICT){
// Head match
if(ant.headWord.lemma().equals(mention2.headWord.lemma())) return false;
// Constraint: ignore pairs commonNoun - properNoun
if(ant.mentionType != MentionType.PROPER &&
( mention2.headWord.get(CoreAnnotations.PartOfSpeechAnnotation.class).startsWith("NNP")
|| !mention2.headWord.word().substring(1).equals(mention2.headWord.word().substring(1).toLowerCase()) ) ) return false;
// Constraint: ignore plurals
if(ant.headWord.get(CoreAnnotations.PartOfSpeechAnnotation.class).equals("NNS")
&& mention2.headWord.get(CoreAnnotations.PartOfSpeechAnnotation.class).equals("NNS")) return false;
// Constraint: ignore mentions with indefinite determiners
if(dict.indefinitePronouns.contains(ant.originalSpan.get(0).lemma())
|| dict.indefinitePronouns.contains(mention2.originalSpan.get(0).lemma())) return false;
// Constraint: ignore coordinated mentions
if(ant.isCoordinated() || mention2.isCoordinated()) return false;
// Constraint: context incompatibility
if(Rules.contextIncompatible(mention2, ant, dict)) return false;
// Constraint: sentence context incompatibility when the mentions are common nouns
if(Rules.sentenceContextIncompatible(mention2, ant, dict)) return false;
if(Rules.entityClusterAllCorefDictionary(mentionCluster, potentialAntecedent, dict, 1, 8)) return true;
if(Rules.entityCorefDictionary(mention, ant, dict, 2, 2)) return true;
if(Rules.entityCorefDictionary(mention, ant, dict, 3, 2)) return true;
if(Rules.entityCorefDictionary(mention, ant, dict, 4, 2)) return true;
}
if(flags.DO_PRONOUN){
Mention m;
if (mention.predicateNominatives!=null && mention.predicateNominatives.contains(mention2)) {
m = mention2;
} else {
m = mention;
}
if((m.isPronominal() || dict.allPronouns.contains(m.toString())) && Rules.entityAttributesAgree(mentionCluster, potentialAntecedent)){
if(dict.demonymSet.contains(ant.lowercaseNormalizedSpanString()) && dict.notOrganizationPRP.contains(m.headString)){
document.addIncompatible(m, ant);
return false;
}