Package quickml.supervised.classifier.decisionTree.tree

Examples of quickml.supervised.classifier.decisionTree.tree.ClassificationCounter


* Created by chrisreeves on 6/24/14.
*/
public class GiniImpurityScorer implements Scorer{
    @Override
    public double scoreSplit(ClassificationCounter a, ClassificationCounter b) {
        ClassificationCounter parent = ClassificationCounter.merge(a, b);
        double parentGiniIndex = getGiniIndex(parent);
        double aGiniIndex = getGiniIndex(a) * a.getTotal() / parent.getTotal() ;
        double bGiniIndex = getGiniIndex(b) * b.getTotal() / parent.getTotal();
        return parentGiniIndex - aGiniIndex - bGiniIndex;
    }
View Full Code Here


        }
    }

    @Override
    public double scoreSplit(final ClassificationCounter a, final ClassificationCounter b) {
        ClassificationCounter parent = ClassificationCounter.merge(a, b);
        double parentMSE = getTotalError(parent) / parent.getTotal();
        double splitMSE = (getTotalError(a) + getTotalError(b)) / (a.getTotal() + b.getTotal());
        return parentMSE - splitMSE;
    }
View Full Code Here

        }
        return trainingDataList;
    }

    private void validateData(Iterable<? extends Instance<AttributesMap>> trainingData) {
        ClassificationCounter classificationCounter = ClassificationCounter.countAll(trainingData);
        Preconditions.checkArgument(classificationCounter.getCounts().keySet().size() <= 2, "trainingData must contain only 2 classifications, but it had %s", classificationCounter.getCounts().keySet().size());
    }
View Full Code Here

    * Add data to each split data set based on the desired cross data values. Maintain the same ratio of classifications in the split set by
    * selecting that ratio from outside sets. Only keep the attributes in the supporting instances that in in the white list
    * */
    private void crossPollinateData(Map<Serializable, ArrayList<Instance<AttributesMap>>> splitTrainingData, ArrayList<Instance<AttributesMap>> allData) {
        for(Map.Entry<Serializable, ArrayList<Instance<AttributesMap>>> entry : splitTrainingData.entrySet()) {
            ClassificationCounter splitClassificationCounter = ClassificationCounter.countAll(entry.getValue());
            long amountCrossData = (long) Math.max(splitClassificationCounter.getTotal() * percentCrossData, minimumAmountTotalCrossData);
            Set<Instance<AttributesMap>> crossData = new HashSet<>();
            ClassificationCounter crossDataCount = new ClassificationCounter();
            for(int i = allData.size()-1; i >= 0; i--) {
                Instance<AttributesMap>instance = allData.get(i);
                double classificationRatio = splitClassificationCounter.getCount(instance.getLabel()) / splitClassificationCounter.getTotal();
                double targetCount = Math.max(classificationRatio * amountCrossData, minimumAmountCrossDataPerClassification);
                if(shouldAddInstance(entry.getKey(), instance, crossDataCount, targetCount)) {
                    crossData.add(cleanSupportingData(instance));
                    crossDataCount.addClassification(instance.getLabel(), instance.getWeight());
                }
                if(crossDataCount.getTotal() >= amountCrossData) {
                    break;
                }
            }
            //cross pollinate data
            entry.getValue().addAll(crossData);
View Full Code Here

*/
public class ClassificationCounterTest {

    @Test
    public void testAdd() {
        ClassificationCounter a = new ClassificationCounter();
        a.addClassification("dog", 1.0);
        a.addClassification("cat", 0.5);
        ClassificationCounter b = new ClassificationCounter();
        b.addClassification("dog", 0.5);
        b.addClassification("cat", 1.0);
        ClassificationCounter c = a.add(b);
        Assert.assertEquals(c.getCount("dog"), 1.5);
        Assert.assertEquals(c.getCount("cat"), 1.5);
    }
View Full Code Here

        Assert.assertEquals(c.getCount("cat"), 1.5);
    }

    @Test
    public void testSubtract() {
        ClassificationCounter a = new ClassificationCounter();
        a.addClassification("dog", 1.0);
        a.addClassification("cat", 2.5);
        ClassificationCounter b = new ClassificationCounter();
        b.addClassification("dog", 0.5);
        b.addClassification("cat", 1.0);
        ClassificationCounter c = a.subtract(b);
        Assert.assertEquals(c.getCount("dog"), 0.5);
        Assert.assertEquals(c.getCount("cat"), 1.5);
    }
View Full Code Here

        Assert.assertEquals(c.getCount("cat"), 1.5);
    }

    @Test
    public void testMerge() {
        ClassificationCounter a = new ClassificationCounter();
        a.addClassification("dog", 1.0);
        a.addClassification("cat", 0.5);
        ClassificationCounter b = new ClassificationCounter();
        b.addClassification("dog", 0.5);
        b.addClassification("cat", 1.0);
        ClassificationCounter merged = ClassificationCounter.merge(a, b);
        Assert.assertEquals(merged.getTotal(), 3.0);
        Assert.assertEquals(merged.getCount("dog"), 1.5);
        Assert.assertEquals(merged.getCount("cat"), 1.5);
    }
View Full Code Here

* Created by ian on 2/27/14.
*/
public class MSEScorerTest {
    @Test
    public void simpleTest() {
        ClassificationCounter a = new ClassificationCounter();
        a.addClassification("a", 4);
        a.addClassification("b", 9);
        a.addClassification("c", 1);
        ClassificationCounter b = new ClassificationCounter();
       b.addClassification("a", 5);
       b.addClassification("b", 9);
       b.addClassification("c", 6);
        MSEScorer mseScorer = new MSEScorer(MSEScorer.CrossValidationCorrection.FALSE);
        Assert.assertTrue(Math.abs(mseScorer.scoreSplit(a, b)- 0.021776929) < 0.000000001);
    }
View Full Code Here

public class InformationGainScorerTest {

    @Test
    public void sameClassificationTest() {
        ClassificationCounter a = new ClassificationCounter();
        a.addClassification("a", 4);
        ClassificationCounter b = new ClassificationCounter();
        b.addClassification("a", 4);
        InformationGainScorer scorer = new InformationGainScorer();
        Assert.assertEquals(scorer.scoreSplit(a, b), 0.0);
    }
View Full Code Here

        Assert.assertEquals(scorer.scoreSplit(a, b), 0.0);
    }

    @Test
    public void diffClassificationTest() {
        ClassificationCounter a = new ClassificationCounter();
        a.addClassification("a", 4);
        ClassificationCounter b = new ClassificationCounter();
        b.addClassification("b", 4);
        InformationGainScorer scorer = new InformationGainScorer();
        Assert.assertEquals(scorer.scoreSplit(a, b), 1.0);
    }
View Full Code Here

TOP

Related Classes of quickml.supervised.classifier.decisionTree.tree.ClassificationCounter

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.