}
public Map<String,Map<String, Double>> verify(String catName, boolean loadMaps)
throws UnsupportedEncodingException{
if (loadMaps) loadIncludedNotIncludedUrls(catName);
BayesClassifier classifier= new BayesClassifier(config.getBaseDir(), catName);
if (classifier.isProbabilitiesFileEmpty()){
return null;
}
Map<String,Map<String, Double>> mp= new HashMap<String,Map<String, Double>>();
mp.put("uinc_cinc", new HashMap<String, Double>());
mp.put("unot_cnot", new HashMap<String, Double>());
mp.put("uinc_cnot", new HashMap<String, Double>());
mp.put("unot_cinc", new HashMap<String, Double>());
// traverse the list of included urls and check what the classifier say
for (String url: includedUrlsList){
String item=cache.getItem(url);
if (null==item){
LOGGER.warn("Page " + url + "is in included for " + catName + " but not in cache");
continue;
}
double classifierScore = classifier.classify(DocumentParser.parse(item, classifier.getMaxTuple()));
boolean classifierIncluded = (classifierScore > 0.5);
addToIncNotIncMap(mp, url, true, classifierIncluded, classifierScore);
}
// traverse the list of not included urls and check what the classifier say
for (String url: notIncludedUrlsList){
String item=cache.getItem(url);
if (null==item){
LOGGER.warn("Page " + url + "is in notIncluded for " + catName + " but not in cache");
continue;
}
double classifierScore = classifier.classify(DocumentParser.parse(item, classifier.getMaxTuple()));
boolean classifierIncluded = (classifierScore > 0.5);
addToIncNotIncMap(mp, url, false, classifierIncluded, classifierScore);
}
return mp;
}