TIntIntHashMap rowSums = new TIntIntHashMap();
int numFeatures = 0;
Iterator<DoubleVector> featureIterator = features.iterator();
Iterator<DoubleVector> outcomeIterator = outcome.iterator();
while (featureIterator.hasNext()) {
DoubleVector feature = featureIterator.next();
DoubleVector out = outcomeIterator.next();
int classIndex = getOutcomeClassIndex(out);
int nominalFeatureValue = (int) feature.get(featureIndex);
int[] is = featureValueOutcomeCount.get(nominalFeatureValue);
if (is == null) {
is = new int[outcomeDimension];
featureValueOutcomeCount.put(nominalFeatureValue, is);
}
is[classIndex]++;
rowSums.put(nominalFeatureValue, rowSums.get(nominalFeatureValue) + 1);
numFeatures++;
}
double entropySum = 0d;
// now we can calculate the entropy
TIntObjectIterator<int[]> iterator = featureValueOutcomeCount.iterator();
while (iterator.hasNext()) {
iterator.advance();
int[] outcomeCounts = iterator.value();
double condEntropy = rowSums.get(iterator.key()) / (double) numFeatures
* getEntropy(outcomeCounts);
entropySum += condEntropy;
}
return new Split(featureIndex, overallEntropy - entropySum);
} else {
// numerical case
Iterator<DoubleVector> featureIterator = features.iterator();
TDoubleHashSet possibleFeatureValues = new TDoubleHashSet();
while (featureIterator.hasNext()) {
DoubleVector feature = featureIterator.next();
possibleFeatureValues.add(feature.get(featureIndex));
}
double bestInfogain = -1;
double bestSplit = 0.0;
TDoubleIterator iterator = possibleFeatureValues.iterator();
while (iterator.hasNext()) {