Set<String> srcSeparators = new HashSet<String>(
Arrays.asList(new String[] { ".", ",", ";", "", "(", ")", "of" }));
Set<String> dstSeparators = new HashSet<String>(Arrays.asList(new String[] { ".", ",", ";", "(", ")", "von" }));
FileReader srcReader = new FileReader(srcFileName);
FileReader dstReader = new FileReader(dstFileName);
Tokenizer<String> srcTokenizer = new MultidocumentRegexTokenizer(srcReader, STRING_REGEX, new StopWordRemover(
srcStopWords, new LowerCaseNormalizer()));
Tokenizer<String> dstTokenizer = new MultidocumentRegexTokenizer(dstReader, STRING_REGEX, new StopWordRemover(
dstStopWords, new LowerCaseNormalizer()));
Corpus<String> srcCorpus = new TreeMapCorpus(srcTokenizer, srcSeparators);
Corpus<String> dstCorpus = new TreeMapCorpus(dstTokenizer, dstSeparators);
double minSimilarity = 0.3;