List<File> clusterFiles = new ArrayList<File>();
for(Integer i : cluster.keySet()) {
clusterFiles.add(new File(ir.document(i).getField("filename").stringValue().replaceAll("markedup", "source")));
}
NGramTfDf ngtd = NGramTfDf.analyseFiles(clusterFiles);
ngtd.calculateNGrams();
Bag<String> tf = ngtd.getDfBag(1);
tf.discardInfrequent(2);
Map<String,Double> tfIdf = new HashMap<String,Double>();
int numDocs = ir.numDocs();
IndexSearcher is = new IndexSearcher(ir);