return featuresCp5C;
}
protected Collection<String> featuresCpCp2C(PaddedList<IN> cInfo, int loc) {
CoreLabel c = cInfo.get(loc);
CoreLabel p = cInfo.get(loc - 1);
CoreLabel p2 = cInfo.get(loc - 2);
String pWord = getWord(p);
// String p2Word = getWord(p2);
Collection<String> featuresCpCp2C = new ArrayList<String>();
if (flags.useInternal && flags.useExternal) {
if (flags.strictGoodCoNLL && ! flags.removeStrictGoodCoNLLDuplicates && flags.useTypeySequences && flags.maxLeft >= 2) {
// this feature duplicates -TYPETYPES below, so probably don't include it, but it was in original tests of CMM goodCoNLL
featuresCpCp2C.add(p2.get(CoreAnnotations.ShapeAnnotation.class) + '-' + p.get(CoreAnnotations.ShapeAnnotation.class) + '-' + c.get(CoreAnnotations.ShapeAnnotation.class) + "-TTPS");
}
if (flags.useAbbr) {
featuresCpCp2C.add(p2.get(CoreAnnotations.AbbrAnnotation.class) + '-' + p.get(CoreAnnotations.AbbrAnnotation.class) + '-' + c.get(CoreAnnotations.AbbrAnnotation.class) + "-2PABBRANS");
}
if (flags.useChunks) {
featuresCpCp2C.add(p2.get(CoreAnnotations.ChunkAnnotation.class) + '-' + p.get(CoreAnnotations.ChunkAnnotation.class) + '-' + c.get(CoreAnnotations.ChunkAnnotation.class) + "-2PCHUNKS");
}
if (flags.useLongSequences) {
featuresCpCp2C.add("PPSEQ");
}
if (flags.useBoundarySequences && pWord.equals(CoNLLDocumentReaderAndWriter.BOUNDARY)) {
featuresCpCp2C.add("BNDRY-SPAN-PPSEQ");
}
// This more complex consistency checker didn't help!
// if (flags.useBoundarySequences) {
// // try enforce consistency over "and" and "," as well as boundary
// if (pWord.equals(CoNLLDocumentIteratorFactory.BOUNDARY) ||
// pWord.equalsIgnoreCase("and") || pWord.equalsIgnoreCase("or") ||
// pWord.equals(",")) {
// }
// }
if (flags.useTaggySequences) {
if (flags.useTags) {
featuresCpCp2C.add(p2.getString(CoreAnnotations.PartOfSpeechAnnotation.class) + '-' + p.getString(CoreAnnotations.PartOfSpeechAnnotation.class) + '-' + c.getString(CoreAnnotations.PartOfSpeechAnnotation.class) + "-TTS");
if (flags.useTaggySequencesShapeInteraction) {
featuresCpCp2C.add(p2.getString(CoreAnnotations.PartOfSpeechAnnotation.class) + '-' + p.getString(CoreAnnotations.PartOfSpeechAnnotation.class) + '-' + c.getString(CoreAnnotations.PartOfSpeechAnnotation.class) + '-' + c.get(CoreAnnotations.ShapeAnnotation.class) + "-TTS-CS");
}
}
if (flags.useDistSim) {
featuresCpCp2C.add(p2.get(CoreAnnotations.DistSimAnnotation.class) + '-' + p.get(CoreAnnotations.DistSimAnnotation.class) + '-' + c.get(CoreAnnotations.DistSimAnnotation.class) + "-DISTSIM_TTS1");
if (flags.useTaggySequencesShapeInteraction) {
featuresCpCp2C.add(p2.get(CoreAnnotations.DistSimAnnotation.class) + '-' + p.get(CoreAnnotations.DistSimAnnotation.class) + '-' + c.get(CoreAnnotations.DistSimAnnotation.class) + '-' + c.get(CoreAnnotations.ShapeAnnotation.class) + "-DISTSIM_TTS1-CS");
}
}
}
if (((flags.wordShape > WordShapeClassifier.NOWORDSHAPE) ||
flags.useShapeStrings)
&& flags.useTypeSeqs && flags.useTypeSeqs2 && flags.maxLeft >= 2) {
String cShape = c.get(CoreAnnotations.ShapeAnnotation.class);
String pShape = p.get(CoreAnnotations.ShapeAnnotation.class);
String p2Shape = p2.get(CoreAnnotations.ShapeAnnotation.class);
featuresCpCp2C.add(p2Shape + '-' + pShape + '-' + cShape + "-TYPETYPES");
}
} else if (flags.useInternal) {
if (flags.useLongSequences) {
featuresCpCp2C.add("PPSEQ");
}
} else if (flags.useExternal) {
if (flags.useLongSequences) {
featuresCpCp2C.add("PPSEQ");
}
if (((flags.wordShape > WordShapeClassifier.NOWORDSHAPE) ||
flags.useShapeStrings)
&& flags.useTypeSeqs && flags.useTypeSeqs2 && flags.maxLeft >= 2) {
String cShape = c.get(CoreAnnotations.ShapeAnnotation.class);
String pShape = p.get(CoreAnnotations.ShapeAnnotation.class);
String p2Shape = p2.get(CoreAnnotations.ShapeAnnotation.class);
featuresCpCp2C.add(p2Shape + '-' + pShape + '-' + cShape + "-TYPETYPES");
}
}
return featuresCpCp2C;