// ====================================== FEATURE EXTRACTION ======================================
@Override
protected String getField(FtrToken token, TagState state)
{
DEPNode node = state.getNode(token);
if (node == null) return null;
switch (token.field)
{
case JointFtrXml.F_SIMPLIFIED_FORM:
return containsLowerSimplifiedForm(node) ? node.simplifiedForm : null;
case JointFtrXml.F_LOWER_SIMPLIFIED_FORM:
return containsLowerSimplifiedForm(node) ? node.lowerSimplifiedForm : null;
case JointFtrXml.F_POS:
return node.pos;
case JointFtrXml.F_POS2:
return node.getFeat(DEPLib.FEAT_POS2);
case JointFtrXml.F_AMBIGUITY_CLASS:
return m_ambi.get(node.simplifiedForm);
}
Matcher m;
if ((m = JointFtrXml.P_BOOLEAN.matcher(token.field)).find())
{
int field = Integer.parseInt(m.group(1));
String value = token.field+token.offset;
switch (field)
{
case 0: return UTString.isAllUpperCase(node.simplifiedForm) ? value : null;
case 1: return UTString.isAllLowerCase(node.simplifiedForm) ? value : null;
case 2: return UTString.beginsWithUpperCase(node.simplifiedForm) & !state.isInputFirstNode() ? value : null;
case 3: return UTString.getNumOfCapitalsNotAtBeginning(node.simplifiedForm) == 1 ? value : null;
case 4: return UTString.getNumOfCapitalsNotAtBeginning(node.simplifiedForm) > 1 ? value : null;
case 5: return node.simplifiedForm.contains(".") ? value : null;
case 6: return UTString.containsDigit(node.simplifiedForm) ? value : null;
case 7: return node.simplifiedForm.contains("-") ? value : null;
case 8: return state.isInputLastNode() ? value : null;
case 9: return state.isInputFirstNode() ? value : null;
case 10: return PTPunct.containsOnlyPunctuation(node.lowerSimplifiedForm) ? value : null;
default: throw new IllegalArgumentException("Unsupported feature: "+token.field);
}
}
else if ((m = JointFtrXml.P_FEAT.matcher(token.field)).find())
return node.getFeat(m.group(1));
else if ((m = JointFtrXml.P_PREFIX.matcher(token.field)).find())
{
int n = Integer.parseInt(m.group(1)), len = node.lowerSimplifiedForm.length();
return (n <= len) ? node.lowerSimplifiedForm.substring(0, n) : null;
}