// add token translations into a #combine of #weight array structures
JsonArray tokensArr = new JsonArray();
if (tokenWeight > 0) {
for (String srcToken : stemmedSourceTokens) {
HMapSFW nbestDist = translation.getDistributionOf(srcToken);
if (defaultTokenizer.isStopWord(srcToken)){
continue;
}
LOG.info("Processing "+srcToken);
// combine translations from N-best AND bilingual dictionary
List<PairOfFloatMap> tokenRepresentationList = new ArrayList<PairOfFloatMap>();
// Pr{bitext}
if (bitextWeight > 0) {
HMapSFW bitextDist = clGenerator.getTranslations(origQuery.trim(), srcToken, pairsInGrammar, stemmed2Stemmed);
if(bitextDist != null && !bitextDist.isEmpty()){
tokenRepresentationList.add(new PairOfFloatMap(bitextDist, bitextWeight));
}
}
// Pr{scfg}
if (scfgWeight > 0) {
HMapSFW scfgDist = scfgGenerator.getTranslations(origQuery.trim(), srcToken, probMap, stemmed2Stemmed);
if (scfgDist != null && !scfgDist.isEmpty() ){
tokenRepresentationList.add(new PairOfFloatMap(scfgDist, scfgWeight));
}
}
// Pr{n-best}
if (mtWeight > 0 && nbestDist != null && !nbestDist.isEmpty()) {
Utils.normalize(nbestDist);
tokenRepresentationList.add(new PairOfFloatMap(nbestDist, mtWeight));
}
JsonArray combinedArr;
float scale = 1;
if (scaling) {
scale = scale * translation.getSourceTokenCnt().get(srcToken) / ((float)translation.getCount());
}
if(tokenRepresentationList.size() == 0) {
continue; // if empty distr., do not represent this source token in query
} else if(tokenRepresentationList.size() == 1) {
combinedArr = Utils.createJsonArrayFromProbabilities(Utils.scaleProbMap(lexProbThreshold, scale, tokenRepresentationList.get(0).getMap()));
} else {
combinedArr = Utils.createJsonArrayFromProbabilities(Utils.combineProbMaps(lexProbThreshold, scale, tokenRepresentationList));
}
JsonObject tokenWeightedArr = new JsonObject();
tokenWeightedArr.add("#weight", combinedArr);
// optional: if this source token has occurred more than once per query, reflect this in the representation
// for (int i = 0; i < Math.ceil(tokenCount.get(srcToken)/(float)kBest); i++) {
// tokensArr.put(tokenWeightedArr);
// }
tokensArr.add(tokenWeightedArr);
}
queryTJson.add("#combine", tokensArr);
}
// combine the token-based and phrase-based representations into a #combweight structure
JsonArray queryJsonArr = new JsonArray();
HMapSFW scaledPhrase2Weight = null;
if (phraseWeight > 0) {
scaledPhrase2Weight = Utils.scaleProbMap(lexProbThreshold, phraseWeight, translation.getPhraseDist());
for (String phrase : scaledPhrase2Weight.keySet()) {
queryJsonArr.add(new JsonPrimitive(scaledPhrase2Weight.get(phrase)));
queryJsonArr.add(new JsonPrimitive(phrase));
}
}
if (tokenWeight > 0) {
queryJsonArr.add(new JsonPrimitive(tokenWeight));