List<File> files = new ArrayList<File>();
//files = FileTools.getFilesFromDirectoryByName(new File("/home/ptc24/oscarworkspace/corpora/paperset1"), "markedup.xml");
files = FileTools.getFilesFromDirectoryByName(new File("/home/ptc24/oscarworkspace/corpora/newEnzyme"), "markedup.xml");
FeatureVectorExtractor fve = new InlineFVE(files);
SimilarityExtractor se = new SimilarityExtractor(fve, new CosSimilarity(), new TTestWeighting());
System.out.println("Indexing...");
Map<String,Integer> termIndex = new HashMap<String,Integer>();
int termNo = 0;
List<String> terms = se.getMostFrequentTerms(5000);