defaults.put("lsi.threshold.candidateCluster", "0.775");
// TODO: this should be eventually replaced with documents from Nutch
// tagged with a language tag. There is no need to again determine
// the language of a document.
return new LingoLocalFilterComponent(
// If you want to include Polish in the list of supported languages,
// you have to download a separate Carrot2-component called
// carrot2-stemmer-lametyzator.jar, put it in classpath
// and add new Polish() below.
new Language[]