Package org.apache.commons.math.stat.inference

Examples of org.apache.commons.math.stat.inference.ChiSquareTest


            expected[i] = (poissonDistribution.cumulativeProbability(binBounds.get(i) - 1) -
                poissonDistribution.cumulativeProbability(binBounds.get(i - 1) -1)) * sampleSize;
        }

        // Use chisquare test to verify that generated values are poisson(mean)-distributed
        ChiSquareTest chiSquareTest = new ChiSquareTestImpl();
        try {
            // Fail if we can reject null hypothesis that distributions are the same
            assertFalse(chiSquareTest.chiSquareTest(expected, observed, alpha));
        } catch (AssertionFailedError ex) {
            StringBuffer msgBuffer = new StringBuffer();
            DecimalFormat df = new DecimalFormat("#.##");
            msgBuffer.append("Chisquare test failed for mean = ");
            msgBuffer.append(mean);
            msgBuffer.append(" p-value = ");
            msgBuffer.append(chiSquareTest.chiSquareTest(expected, observed));
            msgBuffer.append(" chisquare statistic = ");
            msgBuffer.append(chiSquareTest.chiSquare(expected, observed));
            msgBuffer.append(". \n");
            msgBuffer.append("bin\t\texpected\tobserved\n");
            for (int i = 0; i < expected.length; i++) {
                msgBuffer.append("[");
                msgBuffer.append(i == 0 ? 1: binBounds.get(i - 1));
View Full Code Here


        //}
      }
     
      double ratio = trues.size() / (0.0 + trues.size() + falses.size());
      ChiSquaredDistribution csd = new ChiSquaredDistributionImpl(1);
      ChiSquareTest cst = new ChiSquareTestImpl();
     
      Bag<String> combined = new Bag<String>();
      combined.addAll(onlyTrues);
      combined.addAll(onlyFalses);
      Bag<String> tpf = new Bag<String>();
      tpf.addAll(trues);
      tpf.addAll(falses);
      tpf.discardInfrequent(5);
      //combined.discardInfrequent(8);
      Map<String,Double> mcNemarScores = new HashMap<String,Double>();
      for(String s : tpf.getList()) {
        int b = onlyTrues.getCount(s);
        int c = onlyFalses.getCount(s);
        double score = Math.pow(b-c, 2) / (b+c);
        int t = trues.getCount(s);
        int f = falses.getCount(s);
        double et = (t + f) * ratio;
        double ef = (t + f) * (1.0 - ratio);
        long [] obsArray = new long[]{t, f};
        double [] expectArray = new double[]{et, ef};
        double cs = cst.chiSquare(expectArray, obsArray);
        //score = cs;
        if(Double.isNaN(score)) score = 0.0;
        mcNemarScores.put(s, score);
      }

      int ss = mcNemarScores.size();
      int count = 0;
     
      boolean beforeCutOff = true;
      for(String s : StringTools.getSortedList(mcNemarScores)) {
        count++;
        double foo = count * 1.0 / ss;
        int b = onlyTrues.getCount(s);
        int c = onlyFalses.getCount(s);
       
        int t = trues.getCount(s);
        int f = falses.getCount(s);
        double et = (t + f) * ratio;
        double ef = (t + f) * (1.0 - ratio);
        long [] obsArray = new long[]{t, f};
        double [] expectArray = new double[]{et, ef};
        double cs = cst.chiSquare(expectArray, obsArray);
        if(beforeCutOff && ((1.0 - csd.cumulativeProbability(mcNemarScores.get(s))) / foo) > 0.05) {
          System.out.println(count - 1);
          beforeCutOff = false;
          //break;
        }
View Full Code Here

    Set<String> words = new HashSet<String>();
    words.addAll(wordsWithReact.getSet());
    words.addAll(wordsWithoutReact.getSet());
   
    ChiSquaredDistribution csd = new ChiSquaredDistributionImpl(1);
    ChiSquareTest cst = new ChiSquareTestImpl();
   
   
    for(String word : wordsWithReact.getList()) {
      int observed = wordsWithReact.getCount(word);
      int observedElsewhere = wordsWithoutReact.getCount(word);
      //if(observed + observedElsewhere < 20) continue;
      double expected = overallRatio * (observed + observedElsewhere);
      double expectedElsewhere = (1.0 - overallRatio) * (observed + observedElsewhere);
      double g = 0.0;
      if(observed > 0) g += 2 * observed * Math.log(observed/expected);
      if(observedElsewhere > 0) g += 2 * observedElsewhere * Math.log(observedElsewhere/expectedElsewhere);
     
      long [] obsArray = new long[]{observed, observedElsewhere};
      double [] expectArray = new double[]{expected, expectedElsewhere};
      double cs = cst.chiSquare(expectArray, obsArray);
     
      double manualcs = 0.0;
      manualcs += Math.pow(observed - expected, 2.0) / expected;
      manualcs += Math.pow(observedElsewhere - expectedElsewhere, 2.0) / expectedElsewhere;

View Full Code Here

        //}
      }
     
      double ratio = trues.size() / (0.0 + trues.size() + falses.size());
      ChiSquaredDistribution csd = new ChiSquaredDistributionImpl(1);
      ChiSquareTest cst = new ChiSquareTestImpl();
     
      Bag<String> combined = new Bag<String>();
      combined.addAll(onlyTrues);
      combined.addAll(onlyFalses);
      Bag<String> tpf = new Bag<String>();
      tpf.addAll(trues);
      tpf.addAll(falses);
      tpf.discardInfrequent(5);
      //combined.discardInfrequent(8);
      Map<String,Double> mcNemarScores = new HashMap<String,Double>();
      for(String s : tpf.getList()) {
        int b = onlyTrues.getCount(s);
        int c = onlyFalses.getCount(s);
        double score = Math.pow(b-c, 2) / (b+c);
        int t = trues.getCount(s);
        int f = falses.getCount(s);
        double et = (t + f) * ratio;
        double ef = (t + f) * (1.0 - ratio);
        long [] obsArray = new long[]{t, f};
        double [] expectArray = new double[]{et, ef};
        double cs = cst.chiSquare(expectArray, obsArray);
        //score = cs;
        if(Double.isNaN(score)) score = 0.0;
        mcNemarScores.put(s, score);
      }

      int ss = mcNemarScores.size();
      int count = 0;
     
      boolean beforeCutOff = true;
      for(String s : StringTools.getSortedList(mcNemarScores)) {
        count++;
        double foo = count * 1.0 / ss;
        int b = onlyTrues.getCount(s);
        int c = onlyFalses.getCount(s);
       
        int t = trues.getCount(s);
        int f = falses.getCount(s);
        double et = (t + f) * ratio;
        double ef = (t + f) * (1.0 - ratio);
        long [] obsArray = new long[]{t, f};
        double [] expectArray = new double[]{et, ef};
        double cs = cst.chiSquare(expectArray, obsArray);
        if(beforeCutOff && ((1.0 - csd.cumulativeProbability(mcNemarScores.get(s))) / foo) > 0.05) {
          System.out.println(count - 1);
          beforeCutOff = false;
          //break;
        }
View Full Code Here

     * @param expected expected counts
     * @param observed observed counts
     * @param alpha significance level of the test
     */
    public static void assertChiSquareAccept(String[] valueLabels, double[] expected, long[] observed, double alpha) throws Exception {
        ChiSquareTest chiSquareTest = new ChiSquareTestImpl();
        try {
            // Fail if we can reject null hypothesis that distributions are the same
            Assert.assertFalse(chiSquareTest.chiSquareTest(expected, observed, alpha));
        } catch (AssertionFailedError ex) {
            StringBuilder msgBuffer = new StringBuilder();
            DecimalFormat df = new DecimalFormat("#.##");
            msgBuffer.append("Chisquare test failed");
            msgBuffer.append(" p-value = ");
            msgBuffer.append(chiSquareTest.chiSquareTest(expected, observed));
            msgBuffer.append(" chisquare statistic = ");
            msgBuffer.append(chiSquareTest.chiSquare(expected, observed));
            msgBuffer.append(". \n");
            msgBuffer.append("value\texpected\tobserved\n");
            for (int i = 0; i < expected.length; i++) {
                msgBuffer.append(valueLabels[i]);
                msgBuffer.append("\t");
View Full Code Here

            expected[i] = (poissonDistribution.cumulativeProbability(binBounds.get(i) - 1) -
                poissonDistribution.cumulativeProbability(binBounds.get(i - 1) -1)) * sampleSize;
        }

        // Use chisquare test to verify that generated values are poisson(mean)-distributed
        ChiSquareTest chiSquareTest = new ChiSquareTestImpl();
        try {
            // Fail if we can reject null hypothesis that distributions are the same
            assertFalse(chiSquareTest.chiSquareTest(expected, observed, alpha));
        } catch (AssertionFailedError ex) {
            StringBuilder msgBuffer = new StringBuilder();
            DecimalFormat df = new DecimalFormat("#.##");
            msgBuffer.append("Chisquare test failed for mean = ");
            msgBuffer.append(mean);
            msgBuffer.append(" p-value = ");
            msgBuffer.append(chiSquareTest.chiSquareTest(expected, observed));
            msgBuffer.append(" chisquare statistic = ");
            msgBuffer.append(chiSquareTest.chiSquare(expected, observed));
            msgBuffer.append(". \n");
            msgBuffer.append("bin\t\texpected\tobserved\n");
            for (int i = 0; i < expected.length; i++) {
                msgBuffer.append("[");
                msgBuffer.append(i == 0 ? 1: binBounds.get(i - 1));
View Full Code Here

TOP

Related Classes of org.apache.commons.math.stat.inference.ChiSquareTest

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.