@Override
public Set<Event> train(List<EventSet> eventSets) {
int numEvents = getParameter("numEvents", 50);
String informative = getParameter("Informative", "Most");
EventHistogram hist = new EventHistogram();
for (EventSet oneSet : eventSets) {
for (Event e : oneSet) {
hist.add(e);
}
}
List<Pair<Event, Double>> infoGain = new ArrayList<Pair<Event, Double>>(hist.getNTypes());
BigDecimal percentage = new BigDecimal(0.0);
BigDecimal numerator = new BigDecimal(1.0);
BigDecimal denom1 = new BigDecimal(0.0);
BigDecimal denom2 = new BigDecimal(1.0);
/*
* The list count keeps track of the frequency of each event in the
* individual documents This is mi in the formula
*/
List<EventHistogram> eventHistograms = new ArrayList<EventHistogram>(eventSets.size());
for (EventSet eventSet : eventSets) {
eventHistograms.add(new EventHistogram(eventSet));
}
for (Event event : hist) {
percentage = new BigDecimal(hist.getRelativeFrequency(event));
for (EventHistogram eventHistogram : eventHistograms) {
int mi = eventHistogram.getAbsoluteFrequency(event);
/*
* Calculates numerator i = 0 to n Product of mi!
*/