long startTime = System.currentTimeMillis();
// four mapers
List<ConfigurableWorker> mappers = new ArrayList<ConfigurableWorker>(4);
ConfigurableWorker mapWorker_1 = new ConfigurableWorker("W_M1");
ConfigurableWorker mapWorker_2 = new ConfigurableWorker("W_M2");
ConfigurableWorker mapWorker_3 = new ConfigurableWorker("W_M3");
ConfigurableWorker mapWorker_4 = new ConfigurableWorker("W_M4");
mapWorker_1.setTaskProcessor(new PageContentFetchProcessor());
mapWorker_2.setTaskProcessor(new PageContentFetchProcessor());
mapWorker_3.setTaskProcessor(new PageContentFetchProcessor());
mapWorker_4.setTaskProcessor(new PageContentFetchProcessor());
mappers.add(mapWorker_1);
mappers.add(mapWorker_2);
mappers.add(mapWorker_3);
mappers.add(mapWorker_4);
// one reducers
ConfigurableWorker reduceWorker_1 = new ConfigurableWorker("W_R1");
reduceWorker_1.setTaskProcessor(new URLMatchingProcessor());
// bind reducer to final result class
MapReduceURLMiningMain main = new MapReduceURLMiningMain();
reduceWorker_1.addListener(main);
// initiate tasks
addTask2Worker(mapWorker_1, new MapReducePageURLMiningTask("http://www.taobao.com"));
addTask2Worker(mapWorker_1, new MapReducePageURLMiningTask("http://www.xinhuanet.com"));
addTask2Worker(mapWorker_1, new MapReducePageURLMiningTask("http://www.zol.com.cn"));
addTask2Worker(mapWorker_1, new MapReducePageURLMiningTask("http://www.163.com"));
// bind mapper to reduer
Map2ReduceConnector connector = new Map2ReduceConnector(Arrays.asList(reduceWorker_1));
mapWorker_1.addListener(connector);
mapWorker_2.addListener(connector);
mapWorker_3.addListener(connector);
mapWorker_4.addListener(connector);
// start all
mapWorker_1.start();
mapWorker_2.start();
mapWorker_3.start();
mapWorker_4.start();
reduceWorker_1.start();
String targetURL = "";
int lastIndex = 0;
while (foundURLs.size() < URL_SIZE_TO_MINE) {
synchronized (foundURLs) {
targetURL = foundURLs.pollFirst();
if (targetURL == null) {
foundURLs.wait();
continue;
}
}
lastIndex = ++lastIndex % mappers.size();
MapReducePageURLMiningTask task = new MapReducePageURLMiningTask(targetURL);
taskID2TaskMap.putIfAbsent(mappers.get(lastIndex).addTask(task), task);
synchronized (foundURLs) {
foundURLs.add(targetURL);
}
TimeUnit.MILLISECONDS.sleep(100);
}
// stop all
mapWorker_1.stop();
mapWorker_2.stop();
mapWorker_3.stop();
mapWorker_4.stop();
reduceWorker_1.stop();
synchronized (foundURLs) {
for (String string : foundURLs) {
System.out.println(string);
}