annotateChange(whTokens[position], prevValue, prevAnot);
}
}
} else if (!StringTools.isEmpty(disambiguatedPOS)) { // negative filtering
Pattern p = Pattern.compile(disambiguatedPOS);
AnalyzedTokenReadings tmp = new AnalyzedTokenReadings(whTokens[fromPos].getReadings(),
whTokens[fromPos].getStartPos());
for (AnalyzedToken analyzedToken : tmp) {
if (analyzedToken.getPOSTag() != null) {
final Matcher mPos = p.matcher(analyzedToken.getPOSTag());
if (mPos.matches()) {
final int position = sentence.getOriginalPosition(firstMatchToken + correctedStPos);
final String prevValue = whTokens[position].toString();
final String prevAnot = whTokens[position].getHistoricalAnnotations();
whTokens[position].removeReading(analyzedToken);
annotateChange(whTokens[position], prevValue, prevAnot);
}
}
}
}
break;
case ADD:
if (newTokenReadings != null) {
if (newTokenReadings.length == matchingTokensWithCorrection
- startPositionCorrection + endPositionCorrection) {
for (int i = 0; i < newTokenReadings.length; i++) {
final String token;
final int position = sentence.getOriginalPosition(firstMatchToken + correctedStPos + i);
if ("".equals(newTokenReadings[i].getToken())) { // empty token
token = whTokens[position].getToken();
} else {
token = newTokenReadings[i].getToken();
}
final String lemma;
if (newTokenReadings[i].getLemma() == null) { // empty lemma
lemma = token;
} else {
lemma = newTokenReadings[i].getLemma();
}
final AnalyzedToken newTok = new AnalyzedToken(token,
newTokenReadings[i].getPOSTag(), lemma);
final String prevValue = whTokens[position].toString();
final String prevAnot = whTokens[position].getHistoricalAnnotations();
whTokens[position].addReading(newTok);
annotateChange(whTokens[position], prevValue, prevAnot);
}
}
}
break;
case FILTERALL:
for (int i = 0; i < matchingTokensWithCorrection - startPositionCorrection + endPositionCorrection; i++) {
final int position = sentence.getOriginalPosition(firstMatchToken + correctedStPos + i);
Element myEl;
if (elementsMatched.get(i + startPositionCorrection)) {
myEl = rule.getPatternElements().get(i + startPositionCorrection);
} else {
int k = 1;
while (i + startPositionCorrection + k < rule.getPatternElements().size() + endPositionCorrection &&
!elementsMatched.get(i + startPositionCorrection + k)) {
k++;
}
//FIXME: this is left to see whether this fails anywhere
assert(i + k + startPositionCorrection < rule.getPatternElements().size());
myEl = rule.getPatternElements().get(i + k + startPositionCorrection);
}
final Match tmpMatchToken = new Match(myEl.getPOStag(), null,
true,
myEl.getPOStag(),
null, Match.CaseConversion.NONE, false, false,
Match.IncludeRange.NONE);
MatchState matchState = tmpMatchToken.createState(rule.getLanguage().getSynthesizer(), whTokens[position]);
final String prevValue = whTokens[position].toString();
final String prevAnot = whTokens[position].getHistoricalAnnotations();
whTokens[position] = matchState.filterReadings();
annotateChange(whTokens[position], prevValue, prevAnot);
}
break;
case IMMUNIZE:
for (int i = 0; i < matchingTokensWithCorrection - startPositionCorrection + endPositionCorrection; i++) {
whTokens[sentence.getOriginalPosition(firstMatchToken + correctedStPos + i)].immunize();
}
break;
case IGNORE_SPELLING:
for (int i = 0; i < matchingTokensWithCorrection - startPositionCorrection + endPositionCorrection; i++) {
whTokens[sentence.getOriginalPosition(firstMatchToken + correctedStPos + i)].ignoreSpelling();
}
break;
case FILTER:
if (matchElement == null) { // same as REPLACE if using <match>
final Match tmpMatchToken = new Match(disambiguatedPOS, null,
true, disambiguatedPOS, null,
Match.CaseConversion.NONE, false, false,
Match.IncludeRange.NONE);
boolean newPOSmatches = false;
// only apply filter rule when it matches previous tags:
for (int i = 0; i < whTokens[fromPos].getReadingsLength(); i++) {
if (!whTokens[fromPos].getAnalyzedToken(i).hasNoTag() &&
whTokens[fromPos].getAnalyzedToken(i).getPOSTag().matches(disambiguatedPOS)) {
newPOSmatches = true;
break;
}
}
if (newPOSmatches) {
final MatchState matchState = tmpMatchToken.createState(rule.getLanguage().getSynthesizer(), whTokens[fromPos]);
final String prevValue = whTokens[fromPos].toString();
final String prevAnot = whTokens[fromPos].getHistoricalAnnotations();
whTokens[fromPos] = matchState.filterReadings();
annotateChange(whTokens[fromPos], prevValue, prevAnot);
}
break;
}
//fallthrough
case REPLACE:
default:
if (newTokenReadings != null && newTokenReadings.length > 0) {
if (newTokenReadings.length == matchingTokensWithCorrection - startPositionCorrection + endPositionCorrection) {
for (int i = 0; i < newTokenReadings.length; i++) {
final String token;
final int position = sentence.getOriginalPosition(firstMatchToken + correctedStPos + i);
if ("".equals(newTokenReadings[i].getToken())) { // empty token
token = whTokens[position].getToken();
} else {
token = newTokenReadings[i].getToken();
}
final String lemma;
if (newTokenReadings[i].getLemma() == null) { // empty lemma
lemma = token;
} else {
lemma = newTokenReadings[i].getLemma();
}
final AnalyzedToken analyzedToken = new AnalyzedToken(token, newTokenReadings[i].getPOSTag(), lemma);
final AnalyzedTokenReadings toReplace = new AnalyzedTokenReadings(
analyzedToken,
whTokens[fromPos].getStartPos());
whTokens[position] = replaceTokens(
whTokens[position], toReplace);
}
}
} else if (matchElement == null) {
String lemma = "";
for (AnalyzedToken analyzedToken : whTokens[fromPos]) {
if (analyzedToken.getPOSTag() != null
&& analyzedToken.getPOSTag().equals(disambiguatedPOS) && analyzedToken.getLemma() != null) {
lemma = analyzedToken.getLemma();
}
}
if (StringTools.isEmpty(lemma)) {
lemma = whTokens[fromPos].getAnalyzedToken(0).getLemma();
}
final AnalyzedToken analyzedToken = new AnalyzedToken(whTokens[fromPos].getToken(), disambiguatedPOS, lemma);
final AnalyzedTokenReadings toReplace = new AnalyzedTokenReadings(
analyzedToken, whTokens[fromPos].getStartPos());
whTokens[fromPos] = replaceTokens(whTokens[fromPos], toReplace);
} else {
// using the match element
final MatchState matchElementState = matchElement.createState(rule.getLanguage().getSynthesizer(), whTokens[fromPos]);