// ====================================== FEATURE EXTRACTION ======================================
@Override
protected String getField(FtrToken token, POSState state)
{
DEPNode node = state.getNode(token);
if (node == null) return null;
Matcher m;
if (token.isField(JointFtrXml.F_SIMPLIFIED_FORM))
{
return containsLowerSimplifiedForm(node) ? node.simplifiedForm : null;
}
else if (token.isField(JointFtrXml.F_LOWER_SIMPLIFIED_FORM))
{
return containsLowerSimplifiedForm(node) ? node.lowerSimplifiedForm : null;
}
else if (token.isField(JointFtrXml.F_LEMMA))
{
return containsLowerSimplifiedForm(node) ? node.lemma : null;
}
else if (token.isField(JointFtrXml.F_POS))
{
return node.pos;
}
else if (token.isField(JointFtrXml.F_AMBIGUITY_CLASS))
{
return m_ambi.get(node.simplifiedForm);
}
else if ((m = JointFtrXml.P_BOOLEAN.matcher(token.field)).find())
{
int field = Integer.parseInt(m.group(1));
switch (field)
{
case 0: return UTString.isAllUpperCase(node.simplifiedForm) ? token.field : null;
case 1: return UTString.isAllLowerCase(node.simplifiedForm) ? token.field : null;
case 2: return UTString.beginsWithUpperCase(node.simplifiedForm) & !state.isInputFirstNode() ? token.field : null;
case 3: return UTString.getNumOfCapitalsNotAtBeginning(node.simplifiedForm) == 1 ? token.field : null;
case 4: return UTString.getNumOfCapitalsNotAtBeginning(node.simplifiedForm) > 1 ? token.field : null;
case 5: return node.simplifiedForm.contains(".") ? token.field : null;
case 6: return UTString.containsDigit(node.simplifiedForm) ? token.field : null;
case 7: return node.simplifiedForm.contains("-") ? token.field : null;
case 8: return state.isInputLastNode() ? token.field : null;
case 9: return state.isInputFirstNode() ? token.field : null;
default: throw new IllegalArgumentException("Unsupported feature: "+field);
}
}
else if ((m = JointFtrXml.P_FEAT.matcher(token.field)).find())
{
return node.getFeat(m.group(1));
}
else if ((m = JointFtrXml.P_PREFIX.matcher(token.field)).find())
{
int n = Integer.parseInt(m.group(1)), len = node.lowerSimplifiedForm.length();
return (n <= len) ? node.lowerSimplifiedForm.substring(0, n) : null;