* sigma|) (Basically, a Manhattan Distance normalized by the standard
* deviation of a word across the known author list)
*/
public List<Pair<String, Double>> analyze(Document unknown) {
List<Pair<String, Double>> results = new ArrayList<Pair<String, Double>>();
EventMap unknownEventMap = new EventMap(unknown);
if (useCentroid) {
for (Entry<String, EventMap> entry : knownCentroids.entrySet()) {
double delta = 0.0;
for (Event event : events) {
Double knownFrequency = entry.getValue().relativeFrequency(event);
if (knownFrequency == null) {
knownFrequency = 0.0;
}
delta += Math.abs((unknownEventMap.relativeFrequency(event) - knownFrequency) / eventStddev.get(event));
}
results.add(new Pair<String, Double>(entry.getKey(), delta,2));
}
} else {
for (Entry<String, Collection<EventMap>> entry : knownHistograms.asMap().entrySet()) {
for (EventMap histogram : entry.getValue()) {
double delta = 0.0;
for (Event event : events) {
delta += Math.abs((unknownEventMap.relativeFrequency(event) - histogram.relativeFrequency(event)) / eventStddev.get(event));
}
results.add(new Pair<String, Double>(entry.getKey(), delta,2));
}
}
}