continue;
}
bagOfTargetTokens.add(eTerm);
if (isOne2Many <= 1) {
if (probDist.containsKey(fTerm)) {
HMapSFW eToken2Prob = probDist.get(fTerm);
eToken2Prob.increment(eTerm, weight);
}else {
HMapSFW eToken2Prob = new HMapSFW();
eToken2Prob.put(eTerm, weight);
probDist.put(fTerm, eToken2Prob);
}
}
}
if (isOne2Many == 2) {
// if ids.size() > 1 eTerm is a multi-token expression
// even if eTerm is overwritten here, we need to do above loop to update bagOfTargetTokens
if (ids.size() > 1) {
eTerm = isConsecutiveWithStopwords(ids, rhs, docLangTokenizer); // <---- heuristic
}
// no proper translation on target-side (e.g., stopword OR non-consecutive multi-word translation), let's skip
if (eTerm == null) {
continue;
}
eTerm = Utils.removeBorderStopWords(docLangTokenizer, eTerm);
// this is difference between one-to-many and one-to-one heuristics for 1-best MT case
// we add multi-token expressions in addition to single target tokens,
bagOfTargetTokens.add(eTerm);
// update prob. distr.
if (probDist.containsKey(fTerm)) {
HMapSFW eToken2Prob = probDist.get(fTerm);
eToken2Prob.increment(eTerm, weight);
}else {
HMapSFW eToken2Prob = new HMapSFW();
eToken2Prob.put(eTerm, weight);
probDist.put(fTerm, eToken2Prob);
}
}
}