IndirectSorter. The replacement class {@link IndirectSort} from HPPC should provide the same functionality.
.getLexicalData(LanguageCode.MALTESE);
for (String word : wordsToCheck.split(",")) {
if (!lexicalData.isCommonWord(new MutableCharArray(word))
&& !lexicalData.isStopLabel(word)) {
clusters.add(new Cluster(word));
}
}
}
private static List<Cluster> sanityCheck(List<Cluster> in, Predicate<Document> docFilter)
{
List<Cluster> cloned = Lists.newArrayListWithCapacity(in.size());
for (Cluster c : in) {
Cluster c2 = new Cluster();
c2.addPhrases(c.getPhrases());
c2.addDocuments(
Iterables.filter(c.getDocuments(), docFilter));
c2.addSubclusters(sanityCheck(c.getSubclusters(), docFilter));
cloned.add(c2);
}
return cloned;
}
@Test
public void testSimple() throws Exception {
//<start id="crt2.simple"/>
//... setup some documents elsewhere
final Controller controller =
ControllerFactory.createSimple();//<co id="crt2.controller.creation"/>
documents = new ArrayList<Document>();
for (int i = 0; i < titles.length; i++) {
Document doc = new Document(titles[i], snippets[i],
"file://foo_" + i + ".txt");
documents.add(doc);
}
final ProcessingResult result = controller.process(documents,
"red fox",
LingoClusteringAlgorithm.class);//<co id="crt2.process"/>
displayResults(result);//<co id="crt2.print"/>
/*
//... setup some documents elsewhere
final Controller controller =
ControllerFactory.createSimple();//<co id="crt2.controller.creation"/>
documents = new ArrayList<Document>();
for (int i = 0; i < titles.length; i++) {
Document doc = new Document(titles[i], snippets[i],
"file://foo_" + i + ".txt");
documents.add(doc);
}
final ProcessingResult result = controller.process(documents,
"red fox",
if (highlt != null && highlt.length == 1) {
snippet = highlt[0];
}
}
}
Document carrotDocument = new Document(getValue(sdoc, titleField),
snippet, (String)sdoc.getFieldValue(urlField));
carrotDocument.setField(SOLR_DOCUMENT_ID, sdoc.getFieldValue(idFieldName));
result.add(carrotDocument);
}
return result;
}
final ControllerHelper helper = new ControllerHelper();
final InputStream is = Thread.currentThread()
.getContextClassLoader().getResourceAsStream(processResource);
if (is != null) {
try {
final LocalComponentFactory nutchInputFactory = new LocalComponentFactory() {
public LocalComponent getInstance() {
return new NutchInputComponent(defaultLanguage);
}
};
controller.addLocalComponentFactory("input-nutch", nutchInputFactory);
}
/** Adds the required component factories to a local Carrot2 controller. */
private void addComponentFactories() throws DuplicatedKeyException {
// * <input component-key="input-nutch" />
LocalComponentFactory nutchInputFactory = new LocalComponentFactory() {
public LocalComponent getInstance() {
return new NutchInputComponent(defaultLanguage);
}
};
controller.addLocalComponentFactory("input-nutch", nutchInputFactory);
// * <filter component-key="filter-lingo" />
LocalComponentFactory lingoFactory = new LocalComponentFactory() {
public LocalComponent getInstance() {
final HashMap defaults = new HashMap();
// These are adjustments settings for the clustering algorithm.
// If you try the live WebStart demo of Carrot2 you can see how they affect
// the final clustering: http://www.carrot2.org
defaults.put("lsi.threshold.clusterAssignment", "0.150");
defaults.put("lsi.threshold.candidateCluster", "0.775");
// Initialize a new Lingo clustering component.
ArrayList languageList = new ArrayList(languages.length);
for (int i = 0; i < languages.length; i++) {
final String lcode = languages[i];
try {
final Language lang = AllKnownLanguages.getLanguageForIsoCode(lcode);
if (lang == null) {
logger.warn("Language not supported in Carrot2: " + lcode);
} else {
languageList.add(lang);
logger.debug("Language loaded: " + lcode);
}
} catch (Throwable t) {
logger.warn("Language could not be loaded: " + lcode, t);
}
}
return new LingoLocalFilterComponent(
(Language []) languageList.toArray(new Language [languageList.size()]), defaults);
}
};
controller.addLocalComponentFactory("filter-lingo", lingoFactory);
// * <output component-key="output-clustersConsumer" />
LocalComponentFactory clusterConsumerOutputFactory = new LocalComponentFactory() {
public LocalComponent getInstance() {
return new ArrayOutputComponent();
}
};
controller.addLocalComponentFactory("output-array",
c2Logger.setLevel(Level.ERROR);
AllKnownLanguages.getLanguageCodes();
c2Logger.setLevel(original);
// Initialize the controller.
controller = new LocalControllerBase();
final Configuration nutchConf = getConf();
final String processResource = nutchConf.get(
"extension.clustering.carrot2.process-resource");
return new NutchInputComponent(defaultLanguage);
}
};
controller.addLocalComponentFactory("input-nutch", nutchInputFactory);
final LocalProcess process = helper.loadProcess(
helper.getExtension(processResource), is).getProcess();
controller.addProcess(PROCESS_ID, process);
is.close();
} catch (IOException e) {
logger.error("Could not load process resource: " + processResource, e);
/**
* Adds a hardcoded clustering process to the local controller.
*/
private void addProcesses() {
final LocalProcessBase process = new LocalProcessBase(
"input-nutch",
"output-array",
new String [] {"filter-lingo"},
"The Lingo clustering algorithm (www.carrot2.org).",
"");
Related Classes of org.carrot2.util.IndirectSorterTest
Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.