this.falseNegativeCost = falseNegativeCost;
}
@Override
public SegmentSet balance(SegmentSet segmentSet) {
SegmentSet balanced = new SegmentSet();
List<Segment> allSegments = segmentSet.getSegments();
List<Segment> resultSegments = new ArrayList<Segment>();
List<Segment> positive = new ArrayList<Segment>();
List<Segment> negative = new ArrayList<Segment>();
List<Segment> unlabeled = new ArrayList<Segment>();
for (Segment segment : allSegments) {
if (!segment.hasTrueLabel()) {
unlabeled.add(segment);
} else if (segment.getTrueLabel() == true) {
positive.add(segment);
} else if (segment.getTrueLabel() == false) {
negative.add(segment);
}
}
if (!unlabeled.isEmpty()) {
throw new IllegalArgumentException("Data set contains " + unlabeled.size() + " unlabeled examples.");
}
double currentPositiveNegativeRatio = (double) positive.size() / negative.size();
double desiredPositiveNegativeRatio = (double) falseNegativeCost / falsePositiveCost;
//Will we be downsampling positive or negative examples?
if (currentPositiveNegativeRatio > desiredPositiveNegativeRatio) {
//We are removing positive examples
resultSegments.addAll(negative);
int desiredPositiveExamples = computeFinalExamples(negative.size(), desiredPositiveNegativeRatio);
sampleInto(resultSegments, positive, desiredPositiveExamples);
} else if (currentPositiveNegativeRatio < desiredPositiveNegativeRatio) {
//We are removing negative examples
resultSegments.addAll(positive);
int desiredNegativeExamples = computeFinalExamples(positive.size(), 1.0 / desiredPositiveNegativeRatio);
sampleInto(resultSegments, negative, desiredNegativeExamples);
} else {
//We are not doing any downsampling
resultSegments.addAll(positive);
resultSegments.addAll(negative);
}
balanced.setSegments(resultSegments);
System.out.println("Balanced (" + positive.size() + ", " + negative.size() + ") to ("
+ balanced.getCountWithTrueLabel(true) + ", " + balanced.getCountWithTrueLabel(false) + ")");
return balanced;
}