// System.out.println(s + "\t" + t.getCount(s) + "\t" + f.getCount(s));
//}
}
double ratio = trues.size() / (0.0 + trues.size() + falses.size());
ChiSquaredDistribution csd = new ChiSquaredDistributionImpl(1);
ChiSquareTest cst = new ChiSquareTestImpl();
Bag<String> combined = new Bag<String>();
combined.addAll(onlyTrues);
combined.addAll(onlyFalses);
Bag<String> tpf = new Bag<String>();
tpf.addAll(trues);
tpf.addAll(falses);
tpf.discardInfrequent(5);
//combined.discardInfrequent(8);
Map<String,Double> mcNemarScores = new HashMap<String,Double>();
for(String s : tpf.getList()) {
int b = onlyTrues.getCount(s);
int c = onlyFalses.getCount(s);
double score = Math.pow(b-c, 2) / (b+c);
int t = trues.getCount(s);
int f = falses.getCount(s);
double et = (t + f) * ratio;
double ef = (t + f) * (1.0 - ratio);
long [] obsArray = new long[]{t, f};
double [] expectArray = new double[]{et, ef};
double cs = cst.chiSquare(expectArray, obsArray);
//score = cs;
if(Double.isNaN(score)) score = 0.0;
mcNemarScores.put(s, score);
}
int ss = mcNemarScores.size();
int count = 0;
boolean beforeCutOff = true;
for(String s : StringTools.getSortedList(mcNemarScores)) {
count++;
double foo = count * 1.0 / ss;
int b = onlyTrues.getCount(s);
int c = onlyFalses.getCount(s);
int t = trues.getCount(s);
int f = falses.getCount(s);
double et = (t + f) * ratio;
double ef = (t + f) * (1.0 - ratio);
long [] obsArray = new long[]{t, f};
double [] expectArray = new double[]{et, ef};
double cs = cst.chiSquare(expectArray, obsArray);
if(beforeCutOff && ((1.0 - csd.cumulativeProbability(mcNemarScores.get(s))) / foo) > 0.05) {
System.out.println(count - 1);
beforeCutOff = false;
//break;
}
System.out.println(s + "\t" + b + "\t" + c + "\t" + t + "\t" + f + "\t" + mcNemarScores.get(s)
+ "\t" + (1.0 - csd.cumulativeProbability(mcNemarScores.get(s)))
+ "\t" + ((1.0 - csd.cumulativeProbability(mcNemarScores.get(s))) / foo)
+ "\t" + csd.cumulativeProbability(cs));
}
}
}