public void map(Writable docnoKey, Indexable page, OutputCollector<PairOfInts, WikiDocInfo> output, Reporter reporter) throws IOException {
int docno = ((IntWritable)docnoKey).get();
WikipediaPage p = (WikipediaPage) page;
String lang = p.getLanguage();
ArrayListOfIntsWritable similarDocnos;
// we only load the mapping once, during the first map() call of a mapper.
// this works b/c all input kv pairs of a given mapper will have same lang id (reason explained above)
loadPairs(pwsimMapping, lang, mJob, reporter);
// if no similar docs for docno, return
similarDocnos = pwsimMapping.get(docno);
ArrayListWritable<Text> sentences;
ArrayListWritable<HMapSFW> vectors = new ArrayListWritable<HMapSFW>();
ArrayListOfIntsWritable sentLengths = new ArrayListOfIntsWritable();
try {
// identify sentences in document, filter out ones below MinSentLength threshold
// convert each sentence into a tf-idf vector, using general DF map for collection and a heuristic for avg. doc length
// filter out sentences for which the vector has less than MinVectorTerms terms