// enable indentation use
goodSentence = goodSentence.replaceAll("[\\n\\t]+", "");
goodSentence = cleanXML(goodSentence);
assertTrue(goodSentence.trim().length() > 0);
final AnalyzedSentence sent = disambiguateUntil(rules, id,
languageTool.getRawAnalyzedSentence(goodSentence));
final AnalyzedSentence sentToReplace = disambiguateUntil(rules, id,
languageTool.getRawAnalyzedSentence(goodSentence));
//note: we're testing only if string representations are equal
//it's because getRawAnalyzedSentence does not set all properties
//in AnalyzedSentence, and during equal test they are set for the
//left-hand side
assertEquals("The untouched example (" + goodSentence + ") for " + lang.getName() +
" rule " + id +"["+ rule.getSubId() +"] was touched!",
sent.toString(), rule.replace(sentToReplace).toString());
}
}
final List<DisambiguatedExample> examples = rule.getExamples();
if (examples != null) {
for (final DisambiguatedExample example : examples) {
final String outputForms = example.getDisambiguated();
assertTrue("No input form found for: " + id, outputForms != null);
assertTrue(outputForms.trim().length() > 0);
final int expectedMatchStart = example.getExample().indexOf("<marker>");
final int expectedMatchEnd = example.getExample().indexOf("</marker>") - "<marker>".length();
if (expectedMatchStart == -1 || expectedMatchEnd == -1) {
fail(lang
+ ": No position markup ('<marker>...</marker>') in disambiguated example in rule " + rule);
}
final String inputForms = example.getAmbiguous();
assertTrue("No input form found for: " + id, inputForms != null);
assertTrue(inputForms.trim().length() > 0);
assertTrue("Input and output forms for rule " + id + "are the same!",
!outputForms.equals(inputForms));
final AnalyzedSentence cleanInput = languageTool
.getRawAnalyzedSentence(cleanXML(example.getExample()));
final AnalyzedSentence sent = disambiguateUntil(rules, id,
languageTool
.getRawAnalyzedSentence(cleanXML(example.getExample())));
final AnalyzedSentence disambiguatedSent = rule
.replace(disambiguateUntil(rules, id, languageTool
.getRawAnalyzedSentence(cleanXML(example.getExample()))));
assertTrue(
"Disambiguated sentence is equal to the non-disambiguated sentence for rule: "
+ id, !cleanInput.equals(disambiguatedSent));
assertTrue(
"Disambiguated sentence is equal to the input sentence for rule: "
+ id + ". The sentence was: " + sent, !sent.equals(disambiguatedSent));
String reading = "";
String annotations = "";
for (final AnalyzedTokenReadings readings : sent.getTokens()) {
if (readings.isSentenceStart() && !inputForms.contains("<S>")) {
continue;
}
if (readings.getStartPos() == expectedMatchStart) {
final AnalyzedTokenReadings[] r = { readings };
reading = new AnalyzedSentence(r).toShortString(",");
annotations = readings.getHistoricalAnnotations();
assertTrue(
"Wrong marker position in the example for the rule " + id,
readings.getStartPos() == expectedMatchStart
&& readings.getStartPos() + readings.getToken().length() == expectedMatchEnd);
break;
}
}
assertEquals("The input form for the rule " + id + " in the example: "
+ example.toString() + " is different than expected (expected "
+ inputForms + " but got " + sortForms(reading) + "). The token has been changed by the disambiguator: " + annotations,
inputForms, sortForms(reading));
for (final AnalyzedTokenReadings readings : disambiguatedSent.getTokens()) {
if (readings.isSentenceStart() && !outputForms.contains("<S>")) {
continue;
}
if (readings.getStartPos() == expectedMatchStart) {
final AnalyzedTokenReadings[] r = { readings };
reading = new AnalyzedSentence(r).toShortString(",");
assertTrue(readings.getStartPos() == expectedMatchStart
&& readings.getStartPos() + readings.getToken().length() == expectedMatchEnd);
break;
}
}