// We simply use the source-language doc length since the ratio of doc length to average doc
// length is unlikely to change significantly (not worth complicating the pipeline)
int docLen = CLIRUtils.translateTFs(doc, tfS, eVocabSrc, eVocabTrg, fVocabSrc, fVocabTrg,
e2f_Probs, f2e_Probs, tokenizer, LOG);
HMapSFW v = CLIRUtils.createTermDocVector(docLen, tfS, eVocabTrg, model, dict, dfTable,
isNormalize, LOG);
// If no translation of any word is in the target vocab, remove document i.e., our model
// wasn't capable of translating it.
if (v.size() < MIN_SIZE) {
reporter.incrCounter(Docs.SHORTAfterTranslation, 1);
return;
} else {
reporter.incrCounter(Docs.Total, 1);
output.collect(docno, v);