String ntId = derivElt.getAttributeValue("nt_id");
String simpleCat = derivElt.getAttributeValue("stag");
List childElts = derivElt.getChildren();
int numChildren = childElts.size();
if (numChildren == 0)
throw new ParseException(header
+ ": no child elements for TreeNode for cat: " + cat);
// if no cat element present, adjust list with an initial dummy node,
// to avoid code changes in what follows
Element elt0 = (Element) childElts.get(0);
String elt0name = elt0.getName();
if (elt0name.equals("Treenode") || elt0name.equals("Leafnode")) {
childElts.add(0, new Element("dummy"));
numChildren++;
}
if (numChildren != 2 && numChildren != 3)
throw new ParseException(header
+ ": wrong number of child elements: " + numChildren
+ " for cat: " + cat);
Element firstInputElt = (Element) childElts.get(1);
SignHash firstSigns = followDerivR(firstInputElt);
SignHash retval = new SignHash();
// unary case
if (numChildren == 2) {
// apply rules
for (Sign s : firstSigns.asSignSet()) {
List<Sign> results = rules.applyUnaryRules(s);
for (Sign rSign : results)
retval.insert(rSign);
}
// caution/warn upon failure
if (!containsCat(retval, simpleCat)) {
boolean noResults = retval.isEmpty();
String inCat = firstInputElt.getAttributeValue("cat");
String msg = "Unable to derive: " + cat + " from: " + inCat;
if (!noResults)
ccgBankTaskTestbed.log("Caution for " + header + ": " + msg);
if (ccgBankTaskTestbed.isDebugDerivations()) {
ccgBankTaskTestbed.log(header + ": derivation stymied; inputs: ");
for (Sign s : firstSigns.asSignSet()) {
ccgBankTaskTestbed.log(s.toString());
}
if (!noResults) {
ccgBankTaskTestbed.log("Outputs: ");
for (Sign s : retval.asSignSet())
ccgBankTaskTestbed.log(s.toString());
}
}
if (noResults)
throw new ParseException("Derivation blocked: " + msg);
}
}
// binary case
else if (numChildren == 3) {
Element secondInputElt = (Element) childElts.get(2);
SignHash secondSigns = followDerivR(secondInputElt);
// apply rules
for (Sign sign1 : firstSigns.asSignSet()) {
for (Sign sign2 : secondSigns.asSignSet()) {
List<Sign> results = rules.applyBinaryRules(sign1, sign2);
for (Sign rSign : results)
retval.insert(rSign);
}
}
// if no results, propagate one input if the other is
// internal punct
if (retval.isEmpty()) {
if (isPunct(secondInputElt))
return firstSigns;
else if (isPunct(firstInputElt))
return secondSigns;
}
// caution/warn upon failure
if (!containsCat(retval, simpleCat)) {
boolean noResults = retval.isEmpty();
String inCat1 = firstInputElt.getAttributeValue("cat");
String inCat2 = secondInputElt.getAttributeValue("cat");
String msg = "Unable to derive: " + cat + " from: " + inCat1 + " and: " + inCat2;
if (!noResults)
ccgBankTaskTestbed.log("Caution for " + header + ": " + msg);
if (ccgBankTaskTestbed.isDebugDerivations()) {
ccgBankTaskTestbed.log(header + ": derivation stymied; first inputs: ");
for (Sign sign1 : firstSigns.asSignSet()) {
ccgBankTaskTestbed.log(sign1.toString());
}
ccgBankTaskTestbed.log("Second inputs: ");
for (Sign sign2 : secondSigns.asSignSet()) {
ccgBankTaskTestbed.log(sign2.toString());
}
if (!noResults) {
ccgBankTaskTestbed.log("Outputs: ");
for (Sign s : retval.asSignSet())
ccgBankTaskTestbed.log(s.toString());
}
}
if (noResults)
throw new ParseException("Derivation blocked: " + msg);
}
}
// Store cat ids of tree nodes for printing to aux files
if (treeInfoFlag) {
for (Sign s : retval.asSignSet()) {
Hashtable<String, String> idConvTally = new Hashtable<String, String>();
Hashtable<String, Integer> freqTally = new Hashtable<String, Integer>();
ArrayList<String> fullCat = new ArrayList<String>();
String catId = "";
Category treeCat = s.getCategory();
// System.out.println(header+" "+ntId+" "+treeCat);
recurseCat(treeCat, fullCat, idConvTally, freqTally);
/*
* System.out.println(freqTally);
* System.out.println(fullCat); System.out.println('\n');
*/
if (fullCat.size() > 1) {
for (String x : fullCat) {
String y[] = x.split("_");
if (y.length == 1) {
catId = catId + "," + y[0];
continue;
}
int freq = freqTally.get(y[1]);
freqTally.put(y[1], freq - 1);
if (x.endsWith("_M") && freq <= 1)
x = x.replaceFirst("_M", "");
catId = catId + "," + x;
}
catId = catId.replaceFirst(",", "");
treeInfo.add(header + " " + ntId + " " + catId);
}
/*
* System.out.println(idConvTally);
* System.out.println(fullCat); System.out.println('\n');
*/
}
}
// done
return retval;
}
// lex lookup
// nb: not always insisting on right POS, b/c hashing strategy uses
// surface words,
// thus doesn't distinguish lex signs based solely on POS
// nb: might make sense to warn on lex cats with missing semantics
else if (eltName.equals("Leafnode")) {
try {
String lex = derivElt.getAttributeValue("lexeme");
Word w = lexicon.tokenizer.parseToken(lex);
str += w.getForm() + " ";
String cat = derivElt.getAttributeValue("cat");
String simpleCat = derivElt.getAttributeValue("stag");
String rel = derivElt.getAttributeValue("rel");
String indexRel = derivElt.getAttributeValue("indexRel");
String semClass = "";
semClass = derivElt.getAttributeValue("class");
String roles = derivElt.getAttributeValue("argRoles");
String pos = derivElt.getAttributeValue("pos");
// nb: for now, need to ignore rel for non-VB pos
if (!pos.startsWith("VB"))
rel = null;
// lex lookup with required supertag
// NB: there's no guarantee of getting the right arg roles if the word-cat pair is observed
lexicon.setSupertagger(supertaggerStandIn);
supertaggerStandIn.setTag(simpleCat);
SignHash lexSigns = lexicon.getSignsFromWord(w);
if (semClass == null || semClass.length() == 0)
semClass = "NoClass";
// add lex signs, filtered by rel, reindexed
// also check number with matching pos, match on no class
int matchPOS = 0;
boolean matchNoClass = false;
for (Iterator<Sign> it = lexSigns.asSignSet().iterator(); it.hasNext();) {
Sign s = it.next();
Word wTemp = s.getWords().get(0);
String morphClass = wTemp.getSemClass();
if (morphClass == null || morphClass.length() == 0)
morphClass = "NoClass";
Category lexcat = s.getCategory();
LF lexLF = lexcat.getLF();
// allow any class if no sem class given
if (!(semClass.equals("NoClass") || semClass.equals(morphClass))
|| !containsPred(lexLF, rel)
|| !containsRoles(lexLF, roles)
|| !containsRel(lexLF, indexRel, s)) {
it.remove();
}
else {
UnifyControl.reindex(lexcat);
if (wTemp.getPOS().equals(pos)) {
matchPOS++;
if (semClass.equals("NoClass") && morphClass.equals("NoClass"))
matchNoClass = true;
}
}
}
// filter by pos unless none match
if (matchPOS > 0) {
for (Iterator<Sign> it = lexSigns.asSignSet().iterator(); it.hasNext();) {
Sign s = it.next();
Word wTemp = s.getWords().get(0);
if (!wTemp.getPOS().equals(pos)) {
it.remove(); continue;
}
// filter by mismatched class if apropos
if (matchNoClass) {
String morphClass = wTemp.getSemClass();
if (morphClass != null && morphClass.length() != 0)
it.remove();
}
}
}
if (lexSigns.isEmpty())
throw new LexException("No matching category " + cat + " for: " + w);
return lexSigns;
} catch (LexException exc) {
// try continuing derivations without lex signs for punctuation,
// otherwise throw parse exception
if (isPunct(derivElt)) {
if (ccgBankTaskTestbed.isDebugDerivations()) {
ccgBankTaskTestbed.log(header + ": " + exc.toString());
}
return new SignHash();
}
throw new ParseException(exc.toString());
} catch (RuntimeException exc) {
// for other exceptions, throw parse exception
throw new ParseException(exc.toString());
}
} else
throw new RuntimeException(header + ": unrecognized element in derivation: " + eltName);
}