String outputDir = "myDistanceNewsClusters";
HadoopUtil.delete(conf, new Path(outputDir));
Path tokenizedPath = new Path(outputDir
,DocumentProcessor.TOKENIZED_DOCUMENT_OUTPUT_FOLDER);
DefaultAnalyzer analyzer = new DefaultAnalyzer();
DocumentProcessor.tokenizeDocuments(new Path(inputDir), analyzer
.getClass().asSubclass(Analyzer.class), tokenizedPath, conf);
DictionaryVectorizer.createTermFrequencyVectors(tokenizedPath,
new Path(outputDir), conf, minSupport, maxNGramSize, minLLRValue, 2, true, reduceTasks,
chunkSize, sequentialAccessOutput, false);