***************************************************************************/
// Incremental delta improvements are over the previous feature (dev set, <= 40)
//
// POS Splitting for verbs
annotations.put("-markInf", new Pair("@V > (@VN > @VPinf)",
new SimpleStringFunction("-infinitive")));
annotations.put("-markPart", new Pair("@V > (@VN > @VPpart)",
new SimpleStringFunction("-participle")));
annotations.put("-markVN", new Pair("__ << @VN", new SimpleStringFunction("-withVN")));
// +1.45 F1 (Helps MWEs significantly)
annotations.put("-tagPAFr", new Pair("!@PUNC < (__ !< __) > __=parent",
new AddRelativeNodeFunction("-", "parent", true)));
// +.14 F1
annotations.put("-coord1", new Pair("@COORD <2 __=word",
new AddRelativeNodeFunction("-","word", true)));
// +.70 F1 -- de c-commands other stuff dominated by NP, PP, and COORD
annotations.put("-de2", new Pair("@P < /^([Dd]es?|du|d')$/", new SimpleStringFunction("-de2")));
annotations.put("-de3", new Pair("@NP|PP|COORD >+(@NP|PP) (@PP <, (@P < /^([Dd]es?|du|d')$/))",
new SimpleStringFunction("-de3")));
// +.31 F1
annotations.put("-markP1",new Pair("@P > (@PP > @NP)", new SimpleStringFunction("-n")));
//MWEs
//(for MWADV 75.92 -> 77.16)
annotations.put("-MWAdvS", new Pair("@MWADV > /S/", new SimpleStringFunction("-mwadv-s")));
annotations.put("-MWADVSel1", new Pair("@MWADV <1 @P <2 @N !<3 __",
new SimpleStringFunction("-mwadv1")));
annotations.put("-MWADVSel2", new Pair("@MWADV <1 @P <2 @D <3 @N !<4 __",
new SimpleStringFunction("-mwadv2")));
annotations.put("-MWNSel1", new Pair("@MWN <1 @N <2 @A !<3 __",
new SimpleStringFunction("-mwn1")));
annotations.put("-MWNSel2", new Pair("@MWN <1 @N <2 @P <3 @N !<4 __",
new SimpleStringFunction("-mwn2")));
annotations.put("-MWNSel3", new Pair("@MWN <1 @N <2 @- <3 @N !<4 __",
new SimpleStringFunction("-mwn3")));
annotations.put("-splitPUNC",new Pair("@PUNC < __=" + AnnotatePunctuationFunction.key,
new AnnotatePunctuationFunction()));
/***************************************************************************
* TEST FEATURES
***************************************************************************/
// Mark MWE tags only
annotations.put("-mweTag", new Pair("!@PUNC < (__ !< __) > /MW/=parent",
new AddRelativeNodeFunction("-","parent", true)));
annotations.put("-sq",new Pair("@SENT << /\\?/", new SimpleStringFunction("-Q")));
//New phrasal splits
annotations.put("-hasVP", new Pair("!@ROOT|SENT << /^VP/", new SimpleStringFunction("-hasVP")));
annotations.put("-hasVP2", new Pair("__ << /^VP/", new SimpleStringFunction("-hasVP")));
annotations.put("-npCOORD", new Pair("@NP < @COORD", new SimpleStringFunction("-coord")));
annotations.put("-npVP", new Pair("@NP < /VP/", new SimpleStringFunction("-vp")));
//NPs
annotations.put("-baseNP1", new Pair("@NP <1 @D <2 @N !<3 __",
new SimpleStringFunction("-np1")));
annotations.put("-baseNP2", new Pair("@NP <1 @D <2 @MWN !<3 __",
new SimpleStringFunction("-np2")));
annotations.put("-baseNP3", new Pair("@NP <1 @MWD <2 @N !<3 __ ",
new SimpleStringFunction("-np3")));
//MWEs
annotations.put("-npMWN1", new Pair("@NP < (@MWN < @A)", new SimpleStringFunction("-mwna")));
annotations.put("-npMWN2", new Pair("@NP <1 @D <2 @MWN <3 @PP !<4 __",
new SimpleStringFunction("-mwn2")));
annotations.put("-npMWN3", new Pair("@NP <1 @D <2 (@MWN <1 @N <2 @A !<3 __) !<3 __",
new SimpleStringFunction("-mwn3")));
annotations.put("-npMWN4", new Pair(
"@PP <, @P <2 (@NP <1 @D <2 (@MWN <1 @N <2 @A !<3 __) !<3 __) !<3 __",
new SimpleStringFunction("-mwn3")));
//The whopper....
annotations.put("-MWNSel", new Pair("@MWN", new AddPOSSequenceFunction("-",600,true)));
annotations.put("-MWADVSel", new Pair("@MWADV", new AddPOSSequenceFunction("-",500,true)));
annotations.put("-MWASel", new Pair("@MWA", new AddPOSSequenceFunction("-",100,true)));
annotations.put("-MWCSel", new Pair("@MWC", new AddPOSSequenceFunction("-",400,true)));
annotations.put("-MWDSel", new Pair("@MWD", new AddPOSSequenceFunction("-",100,true)));
annotations.put("-MWPSel", new Pair("@MWP", new AddPOSSequenceFunction("-",600,true)));
annotations.put("-MWPROSel", new Pair("@MWPRO", new AddPOSSequenceFunction("-",60,true)));
annotations.put("-MWVSel", new Pair("@MWV", new AddPOSSequenceFunction("-",200,true)));
//MWN
annotations.put("-mwn1", new Pair("@MWN <1 @N <2 @A !<3 __", new SimpleStringFunction("-na")));
annotations.put("-mwn2", new Pair("@MWN <1 @N <2 @P <3 @N !<4 __",
new SimpleStringFunction("-npn")));
annotations.put("-mwn3", new Pair("@MWN <1 @N <2 @- <3 @N !<4 __",
new SimpleStringFunction("-n-n")));
annotations.put("-mwn4", new Pair("@MWN <1 @N <2 @N !<3 __", new SimpleStringFunction("-nn")));
annotations.put("-mwn5", new Pair("@MWN <1 @D <2 @N !<3 __", new SimpleStringFunction("-dn")));
//wh words
annotations.put("-hasWH", new Pair(
"__ < /^(qui|quoi|comment|quel|quelle|quels|quelles|où|combien|que|pourquoi|quand)$/",
new SimpleStringFunction("-wh")));
//POS splitting
annotations.put("-markNNP2", new Pair("@N < /^[A-Z]/", new SimpleStringFunction("-nnp")));
annotations.put("-markD1",new Pair("@D > (__ > @PP)", new SimpleStringFunction("-p")));
annotations.put("-markD2",new Pair("@D > (__ > @NP)", new SimpleStringFunction("-n")));
annotations.put("-markD3",new Pair("@D > (__ > /^VP/)", new SimpleStringFunction("-v")));
annotations.put("-markD4",new Pair("@D > (__ > /^S/)", new SimpleStringFunction("-s")));
annotations.put("-markD5",new Pair("@D > (__ > @COORD)", new SimpleStringFunction("-c")));
//Appositives?
annotations.put("-app1", new Pair("@NP < /[,]/", new SimpleStringFunction("-app1")));
annotations.put("-app2", new Pair("/[^,\\-:;\"]/ > (@NP < /^[,]$/) $,, /^[,]$/",
new SimpleStringFunction("-app2")));
//COORD
annotations.put("-coord2",new Pair("@COORD !< @C", new SimpleStringFunction("-nonC")));
annotations.put("-hasCOORD",new Pair("__ < @COORD", new SimpleStringFunction("-hasCOORD")));
annotations.put("-hasCOORDLS",new Pair("@SENT <, @COORD",
new SimpleStringFunction("-hasCOORDLS")));
annotations.put("-hasCOORDNonS",new Pair("__ < @COORD !<, @COORD",
new SimpleStringFunction("-hasCOORDNonS")));
// PP / VPInf
annotations.put("-pp1",new Pair("@P < /^(du|des|au|aux)$/=word",
new AddRelativeNodeFunction("-","word", false)));
annotations.put("-vpinf1",new Pair("@VPinf <, __=word",
new AddRelativeNodeFunction("-","word", false)));
annotations.put("-vpinf2",new Pair("@VPinf <, __=word",
new AddRelativeNodeFunction("-","word", true)));
// PP splitting (subsumed by the de2-3 features)
annotations.put("-splitIN",new Pair(
"@PP <, (P < /^([Dd]e|[Dd]'|[Dd]es|[Dd]u|à|[Aa]u|[Aa]ux|[Ee]n|[Dd]ans|[Pp]ar|[Ss]ur|[Pp]our|[Aa]vec|[Ee]ntre)$/=word)",
new AddRelativeNodeFunction("-","word", false,true)));
annotations.put("-splitP",new Pair(
"@P < /^([Dd]e|[Dd]'|[Dd]es|[Dd]u|à|[Aa]u|[Aa]ux|[Ee]n|[Dd]ans|[Pp]ar|[Ss]ur|[Pp]our|[Aa]vec|[Ee]ntre)$/=word",
new AddRelativeNodeFunction("-","word", false,true)));
//de features
annotations.put("-hasde", new Pair("@NP|PP <+(@NP|PP) (P < de)",
new SimpleStringFunction("-hasDE")));
annotations.put("-hasde2", new Pair("@PP < de", new SimpleStringFunction("-hasDE2")));
//NPs
annotations.put("-np1", new Pair("@NP < /^,$/", new SimpleStringFunction("-np1")));
annotations.put("-np2", new Pair("@NP <, (@D < le|la|les)", new SimpleStringFunction("-np2")));
annotations.put("-np3", new Pair("@D < le|la|les", new SimpleStringFunction("-def")));
annotations.put("-baseNP", new Pair("@NP <, @D <- (@N , @D)", new SimpleStringFunction("-baseNP")));
// PP environment
annotations.put("-markP2",new Pair("@P > (@PP > @AP)", new SimpleStringFunction("-a")));
annotations.put("-markP3",new Pair("@P > (@PP > @SENT|Ssub|VPinf|VPpart)",
new SimpleStringFunction("-v")));
annotations.put("-markP4",new Pair("@P > (@PP > @Srel)", new SimpleStringFunction("-r")));
annotations.put("-markP5",new Pair("@P > (@PP > @COORD)", new SimpleStringFunction("-c")));
annotations.put("-markP6",new Pair("@P > @VPinf", new SimpleStringFunction("-b")));
annotations.put("-markP7",new Pair("@P > @VPpart", new SimpleStringFunction("-b")));
annotations.put("-markP8",new Pair("@P > /^MW|NP/", new SimpleStringFunction("-internal")));
annotations.put("-markP9",new Pair("@P > @COORD", new SimpleStringFunction("-c")));
/***************************************************************************
* DIDN'T WORK
***************************************************************************/
//MWEs
annotations.put("-hasMWP", new Pair("!/S/ < @MWP", new SimpleStringFunction("-mwp")));
annotations.put("-hasMWP2", new Pair("@PP < @MWP", new SimpleStringFunction("-mwp2")));
annotations.put("-hasMWN2", new Pair("@PP <+(@NP) @MWN", new SimpleStringFunction("-hasMWN2")));
annotations.put("-hasMWN3", new Pair("@NP < @MWN", new SimpleStringFunction("-hasMWN3")));
annotations.put("-hasMWADV", new Pair("/^A/ < @MWADV", new SimpleStringFunction("-hasmwadv")));
annotations.put("-hasC1", new Pair("__ < @MWC", new SimpleStringFunction("-hasc1")));
annotations.put("-hasC2", new Pair("@MWC > /S/", new SimpleStringFunction("-hasc2")));
annotations.put("-hasC3", new Pair("@COORD < @MWC", new SimpleStringFunction("-hasc3")));
annotations.put("-uMWN", new Pair("@NP <: @MWN", new SimpleStringFunction("-umwn")));
//POS splitting
annotations.put("-splitC", new Pair("@C < __=word",
new AddRelativeNodeFunction("-","word", false)));
annotations.put("-splitD",new Pair("@D < /^[^\\d+]{1,4}$/=word",
new AddRelativeNodeFunction("-","word", false)));
annotations.put("-de1", new Pair("@D < /^([Dd]es?|du|d')$/",
new SimpleStringFunction("-de1")));
annotations.put("-markNNP1", new Pair("@NP < (N < /^[A-Z]/) !< /^[^NA]/",
new SimpleStringFunction("-nnp")));
//PP environment
annotations.put("-markPP1",new Pair("@PP > @NP", new SimpleStringFunction("-n")));
annotations.put("-markPP2",new Pair("@PP > @AP", new SimpleStringFunction("-a")));
annotations.put("-markPP3",new Pair("@PP > @SENT|Ssub|VPinf|VPpart",
new SimpleStringFunction("-v")));
annotations.put("-markPP4",new Pair("@PP > @Srel", new SimpleStringFunction("-r")));
annotations.put("-markPP5",new Pair("@PP > @COORD", new SimpleStringFunction("-c")));
annotations.put("-dominateCC",new Pair("__ << @COORD", new SimpleStringFunction("-withCC")));
annotations.put("-dominateIN",new Pair("__ << @PP", new SimpleStringFunction("-withPP")));
//Klein and Manning style features
annotations.put("-markContainsVP", new Pair("__ << /^VP/",
new SimpleStringFunction("-hasV")));
annotations.put("-markContainsVP2",new Pair("__ << /^VP/=word",
new AddRelativeNodeFunction("-hasV-","word", false)));
annotations.put("-markVNArgs",new Pair("@VN $+ __=word1",
new AddRelativeNodeFunction("-","word1", false)));
annotations.put("-markVNArgs2",new Pair("@VN > __=word1 $+ __=word2",
new AddRelativeNodeFunction("-","word1","word2", false)));
annotations.put("-markContainsMW", new Pair("__ << /^MW/", new SimpleStringFunction("-hasMW")));
annotations.put("-markContainsMW2",new Pair("__ << /^MW/=word",
new AddRelativeNodeFunction("-has-","word", false)));
//MWE Sequence features
annotations.put("-mwStart", new Pair("__ >, /^MW/", new SimpleStringFunction("-mwStart")));
annotations.put("-mwMiddle", new Pair("__ !>- /^MW/ !>, /^MW/ > /^MW/",
new SimpleStringFunction("-mwMid")));
annotations.put("-mwMiddle2", new Pair("__ !>- /^MW/ !>, /^MW/ > /^MW/ , __=pos",
new AddRelativeNodeFunction("-","pos", true)));
annotations.put("-mwEnd", new Pair("__ >- /^MW/", new SimpleStringFunction("-mwEnd")));
//AP Features
annotations.put("-nonNAP",new Pair("@AP !$, @N|AP", new SimpleStringFunction("-nap")));
//Phrasal splitting
annotations.put("-markNPTMP", new Pair(
"@NP < (@N < /^(lundi|mardi|mercredi|jeudi|vendredi|samedi|dimanche|Lundi|Mardi|Mercredi|Jeudi|Vendredi|Samedi|Dimanche|janvier|février|mars|avril|mai|juin|juillet|août|septembre|octobre|novembre|décembre|Janvier|Février|Mars|Avril|Mai|Juin|Juillet|Août|Septembre|Octobre|Novembre|Décembre)$/)",
new SimpleStringFunction("-tmp")));
//Singular
annotations.put("-markSing1", new Pair("@NP < (D < /^(ce|cette|une|la|le|un|sa|son|ma|mon|ta|ton)$/)",
new SimpleStringFunction("-sing")));
annotations.put("-markSing2", new Pair("@AP < (A < (/[^sx]$/ !< __))",
new SimpleStringFunction("-sing")));
annotations.put("-markSing3", new Pair("@VPpart < (V < /(e|é)$/)",
new SimpleStringFunction("-sing")));
//Plural
annotations.put("-markPl1", new Pair("@NP < (D < /s$/)", new SimpleStringFunction("-pl")));
annotations.put("-markPl2", new Pair("@AP < (A < /[sx]$/)", new SimpleStringFunction("-pl")));
annotations.put("-markPl3", new Pair("@VPpart < (V < /(es|és)$/)",
new SimpleStringFunction("-pl")));
compileAnnotations(headFinder());
}