Package org.apache.mahout.math.stats

Examples of org.apache.mahout.math.stats.OnlineSummarizer


    System.out.printf("speedup,q1,q2,q3\n");

    for (int i = 0; i < 12; i++) {
      double strategy = (i - 1.0) / 10.0;
      cut.setRaiseHashLimitStrategy(strategy);
      OnlineSummarizer t1 = evaluateStrategy(testData, ref, cut);
      int evals = cut.resetEvaluationCount();
      final double speedup = 10e6 / evals;
      System.out.printf("%.1f,%.2f,%.2f,%.2f\n", speedup, t1.getQuartile(1),
          t1.getQuartile(2), t1.getQuartile(3));
      Assert.assertTrue(t1.getQuartile(2) > 0.45);
      Assert.assertTrue(speedup > 4 || t1.getQuartile(2) > 0.9);
      Assert.assertTrue(speedup > 15 || t1.getQuartile(2) > 0.8);
    }
  }
View Full Code Here


    }
  }

  private OnlineSummarizer evaluateStrategy(Matrix testData, BruteSearch ref,
                                            LocalitySensitiveHashSearch cut) {
    OnlineSummarizer t1 = new OnlineSummarizer();

    for (int i = 0; i < 100; i++) {
      final Vector q = testData.viewRow(i);
      List<WeightedThing<Vector>> v1 = cut.search(q, 150);
      BitSet b1 = new BitSet();
      for (WeightedThing<Vector> v : v1) {
        b1.set(((WeightedVector)v.getValue()).getIndex());
      }

      List<WeightedThing<Vector>> v2 = ref.search(q, 100);
      BitSet b2 = new BitSet();
      for (WeightedThing<Vector> v : v2) {
        b2.set(((WeightedVector)v.getValue()).getIndex());
      }

      b1.and(b2);
      t1.add(b1.cardinality());
    }
    return t1;
  }
View Full Code Here

    // and
    // limitCount >= searchSize && limitCount - hashCount[hashLimit-1] < searchSize

    OnlineSummarizer[] distribution = new OnlineSummarizer[BITS + 1];
    for (int i = 0; i < BITS + 1; i++) {
      distribution[i] = new OnlineSummarizer();
    }

    int hashLimit = BITS;
    int limitCount = 0;
    double distanceLimit = Double.POSITIVE_INFINITY;
View Full Code Here

   * A quick and dirty hack to compute the median of a vector...
   * @param v
   * @return
   */
  private static double median(Vector v) {
    OnlineSummarizer med = new OnlineSummarizer();
    if (v.size() < 100) {
      return v.zSum() / v.size();
    }
    for (Vector.Element e : v) {
      med.add(e.get());
    }
    return med.getMedian();
  }
View Full Code Here

 
  private int incorrectlyClassified;
 
  public ResultAnalyzer(Collection<String> labelSet, String defaultLabel) {
    confusionMatrix = new ConfusionMatrix(labelSet, defaultLabel);
    summarizer = new OnlineSummarizer();
  }
View Full Code Here

     
      if (lmp.getTargetCategories().size() <=2 ) {
        collector = new Auc();
      }
     
      OnlineSummarizer slh = new OnlineSummarizer();
      ConfusionMatrix cm = new ConfusionMatrix(lmp.getTargetCategories(), defaultCategory);

      State<Wrapper, CrossFoldLearner> best = lr.getBest();
      if (best == null) {
        output.printf("%s\n",
            "AdaptiveLogisticRegression has not be trained probably.");
        return;
      }
      CrossFoldLearner learner = best.getPayload().getLearner();

      BufferedReader in = TrainLogistic.open(inputFile);
      String line = in.readLine();
      csv.firstLine(line);
      line = in.readLine();
      if (showScores) {
        output.printf(Locale.ENGLISH, "\"%s\", \"%s\", \"%s\", \"%s\"\n",
            "target", "model-output", "log-likelihood", "average-likelihood");
      }
      while (line != null) {
        Vector v = new SequentialAccessSparseVector(lmp.getNumFeatures());
        //TODO: How to avoid extra target values not shown in the training process.
        int target = csv.processLine(line, v);
        double likelihood = learner.logLikelihood(target, v);
        double score = learner.classifyFull(v).maxValue();
       
        slh.add(likelihood);
        cm.addInstance(csv.getTargetString(line), csv.getTargetLabel(target));       
       
        if (showScores) {
          output.printf(Locale.ENGLISH, "%8d, %.12f, %.13f, %.13f\n", target,
              score, learner.logLikelihood(target, v), slh.getMean());
        }
        if (collector != null) {
          collector.add(target, score);
        }
        line = in.readLine();
      }
     
      output.printf(Locale.ENGLISH,"\nLog-likelihood:");
      output.printf(Locale.ENGLISH, "Min=%.2f, Max=%.2f, Mean=%.2f, Median=%.2f\n",
          slh.getMin(), slh.getMax(), slh.getMean(), slh.getMedian());

      if (collector != null) {       
        output.printf(Locale.ENGLISH, "\nAUC = %.2f\n", collector.auc());       
      }
     
View Full Code Here

   * A quick and dirty hack to compute the median of a vector...
   * @param v
   * @return
   */
  private static double median(Vector v) {
    OnlineSummarizer med = new OnlineSummarizer();
    if (v.size() < 100) {
      return v.zSum() / v.size();
    }
    for (Vector.Element e : v) {
      med.add(e.get());
    }
    return med.getMedian();
  }
View Full Code Here

   */
  private static double median(Vector v) {
    if (v.size() < 100) {
      return v.zSum() / v.size();
    }
    OnlineSummarizer med = new OnlineSummarizer();
    for (Vector.Element e : v.all()) {
      med.add(e.get());
    }
    return med.getMedian();
  }
View Full Code Here

    List<OnlineSummarizer> summarizers = Lists.newArrayList();
    if (searcher.size() == 0) {
      return summarizers;
    }
    for (int i = 0; i < searcher.size(); ++i) {
      summarizers.add(new OnlineSummarizer());
    }
    for (Vector v : datapoints) {
      Centroid closest = (Centroid)searcher.search(v,  1).get(0).getValue();
      OnlineSummarizer summarizer = summarizers.get(closest.getIndex());
      summarizer.add(distanceMeasure.distance(v, closest));
    }
    return summarizers;
  }
View Full Code Here

  private int correctlyClassified;
  private int incorrectlyClassified;
 
  public ResultAnalyzer(Collection<String> labelSet, String defaultLabel) {
    confusionMatrix = new ConfusionMatrix(labelSet, defaultLabel);
    summarizer = new OnlineSummarizer();
  }
View Full Code Here

TOP

Related Classes of org.apache.mahout.math.stats.OnlineSummarizer

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.