Package org.apache.mahout.df.callback

Examples of org.apache.mahout.df.callback.ForestPredictions


    DefaultTreeBuilder treeBuilder = new DefaultTreeBuilder();
    treeBuilder.setM(m);
   
    Dataset dataset = Dataset.load(getConf(), datasetPath);
   
    ForestPredictions callback = isOob ? new ForestPredictions(dataset.nbInstances(), dataset.nblabels())
        : null;
   
    Builder forestBuilder;
   
    if (isPartial) {
      log.info("Partial Mapred implementation");
      forestBuilder = new PartialBuilder(treeBuilder, dataPath, datasetPath, seed, getConf());
    } else {
      log.info("InMem Mapred implementation");
      forestBuilder = new InMemBuilder(treeBuilder, dataPath, datasetPath, seed, getConf());
    }
   
    log.info("Building the forest...");
    long time = System.currentTimeMillis();
   
    DecisionForest forest = forestBuilder.build(nbTrees, callback);
   
    time = System.currentTimeMillis() - time;
    log.info("Build Time: {}", DFUtils.elapsedTime(time));
   
    if (isOob) {
      Random rng;
      if (seed != null) {
        rng = RandomUtils.getRandom(seed);
      } else {
        rng = RandomUtils.getRandom();
      }
     
      FileSystem fs = dataPath.getFileSystem(getConf());
      int[] labels = Data.extractLabels(dataset, fs, dataPath);
     
      log.info("oob error estimate : "
                           + ErrorEstimate.errorRate(labels, callback.computePredictions(rng)));
    }
   
    return forest;
  }
View Full Code Here


    DefaultTreeBuilder treeBuilder = new DefaultTreeBuilder();
    treeBuilder.setM(m);
   
    Dataset dataset = Dataset.load(getConf(), datasetPath);
   
    ForestPredictions callback = isOob ? new ForestPredictions(dataset.nbInstances(), dataset.nblabels())
        : null;
   
    Builder forestBuilder;
   
    if (isPartial) {
      log.info("Partial Mapred implementation");
      forestBuilder = new PartialBuilder(treeBuilder, dataPath, datasetPath, seed, getConf());
    } else {
      log.info("InMem Mapred implementation");
      forestBuilder = new InMemBuilder(treeBuilder, dataPath, datasetPath, seed, getConf());
    }
    log.info("Building the forest...");
    long time = System.currentTimeMillis();
   
    DecisionForest forest = forestBuilder.build(nbTrees, callback);
   
    time = System.currentTimeMillis() - time;
    log.info("Build Time: {}", DFUtils.elapsedTime(time));
   
    if (isOob) {
      Random rng;
      if (seed != null) {
        rng = RandomUtils.getRandom(seed);
      } else {
        rng = RandomUtils.getRandom();
      }
     
      FileSystem fs = dataPath.getFileSystem(getConf());
      int[] labels = Data.extractLabels(dataset, fs, dataPath);
     
      log.info("oob error estimate : "
                           + ErrorEstimate.errorRate(labels, callback.computePredictions(rng)));
    }
   
    return forest;
  }
View Full Code Here

    DefaultTreeBuilder treeBuilder = new DefaultTreeBuilder();
    treeBuilder.setM(m);
   
    Dataset dataset = Dataset.load(getConf(), datasetPath);
   
    ForestPredictions callback = isOob ? new ForestPredictions(dataset.nbInstances(), dataset.nblabels())
        : null;
   
    Builder forestBuilder;
   
    if (isPartial) {
      log.info("Partial Mapred implementation");
      forestBuilder = new PartialBuilder(treeBuilder, dataPath, datasetPath, seed, getConf());
    } else {
      log.info("InMem Mapred implementation");
      forestBuilder = new InMemBuilder(treeBuilder, dataPath, datasetPath, seed, getConf());
    }
   
    log.info("Building the forest...");
    long time = System.currentTimeMillis();
   
    DecisionForest forest = forestBuilder.build(nbTrees, callback);
   
    time = System.currentTimeMillis() - time;
    log.info("Build Time: {}", DFUtils.elapsedTime(time));
   
    if (isOob) {
      Random rng = seed == null ? RandomUtils.getRandom() : RandomUtils.getRandom(seed);

      FileSystem fs = dataPath.getFileSystem(getConf());
      int[] labels = Data.extractLabels(dataset, fs, dataPath);
     
      log.info("oob error estimate : {}",
               ErrorEstimate.errorRate(labels, callback.computePredictions(rng)));
    }
   
    return forest;
  }
View Full Code Here

    DefaultTreeBuilder treeBuilder = new DefaultTreeBuilder();
   
    SequentialBuilder forestBuilder = new SequentialBuilder(rng, treeBuilder, train);
   
    // grow a forest with m = log2(M)+1
    ForestPredictions errorM = new ForestPredictions(train.size(), nblabels); // oob error when using m =
                                                                              // log2(M)+1
    treeBuilder.setM(m);
   
    long time = System.currentTimeMillis();
    log.info("Growing a forest with m={}", m);
    DecisionForest forestM = forestBuilder.build(nbtrees, errorM);
    sumTimeM += System.currentTimeMillis() - time;
    numNodesM += forestM.nbNodes();
   
    double oobM = ErrorEstimate.errorRate(trainLabels, errorM.computePredictions(rng)); // oob error estimate
                                                                                        // when m = log2(M)+1
   
    // grow a forest with m=1
    ForestPredictions errorOne = new ForestPredictions(train.size(), nblabels); // oob error when using m = 1
    treeBuilder.setM(1);
   
    time = System.currentTimeMillis();
    log.info("Growing a forest with m=1");
    DecisionForest forestOne = forestBuilder.build(nbtrees, errorOne);
    sumTimeOne += System.currentTimeMillis() - time;
    numNodesOne += forestOne.nbNodes();
   
    double oobOne = ErrorEstimate.errorRate(trainLabels, errorOne.computePredictions(rng)); // oob error
                                                                                            // estimate when m
                                                                                            // = 1
   
    // compute the test set error (Selection Error), and mean tree error (One Tree Error),
    // using the lowest oob error forest
    ForestPredictions testError = new ForestPredictions(test.size(), nblabels); // test set error
    MeanTreeCollector treeError = new MeanTreeCollector(test, nbtrees); // mean tree error
   
    // compute the test set error using m=1 (Single Input Error)
    errorOne = new ForestPredictions(test.size(), nblabels);
   
    if (oobM < oobOne) {
      forestM.classify(test, new MultiCallback(testError, treeError));
      forestOne.classify(test, errorOne);
    } else {
      forestOne.classify(test, new MultiCallback(testError, treeError, errorOne));
    }
   
    sumTestErr += ErrorEstimate.errorRate(testLabels, testError.computePredictions(rng));
    sumOneErr += ErrorEstimate.errorRate(testLabels, errorOne.computePredictions(rng));
    sumTreeErr += treeError.meanTreeError();
  }
View Full Code Here

    DefaultTreeBuilder treeBuilder = new DefaultTreeBuilder();
    treeBuilder.setM(m);
   
    Dataset dataset = Dataset.load(getConf(), datasetPath);
   
    ForestPredictions callback = isOob ? new ForestPredictions(dataset.nbInstances(), dataset.nblabels())
        : null;
   
    Builder forestBuilder;
   
    if (isPartial) {
      log.info("Partial Mapred implementation");
      forestBuilder = new PartialBuilder(treeBuilder, dataPath, datasetPath, seed, getConf());
    } else {
      log.info("InMem Mapred implementation");
      forestBuilder = new InMemBuilder(treeBuilder, dataPath, datasetPath, seed, getConf());
    }

    forestBuilder.setOutputDirName(outputPath.getName());
   
    log.info("Building the forest...");
    long time = System.currentTimeMillis();
   
    DecisionForest forest = forestBuilder.build(nbTrees, callback);
   
    time = System.currentTimeMillis() - time;
    log.info("Build Time: {}", DFUtils.elapsedTime(time));
   
    if (isOob) {
      Random rng;
      if (seed != null) {
        rng = RandomUtils.getRandom(seed);
      } else {
        rng = RandomUtils.getRandom();
      }
     
      FileSystem fs = dataPath.getFileSystem(getConf());
      int[] labels = Data.extractLabels(dataset, fs, dataPath);
     
      log.info("oob error estimate : "
                           + ErrorEstimate.errorRate(labels, callback.computePredictions(rng)));
    }

    // store the decision forest in the output path
    Path forestPath = new Path(outputPath, "forest.seq");
    log.info("Storing the forest in: " + forestPath);
View Full Code Here

    DefaultTreeBuilder treeBuilder = new DefaultTreeBuilder();
   
    SequentialBuilder forestBuilder = new SequentialBuilder(rng, treeBuilder, train);

    // grow a forest with m = log2(M)+1
    ForestPredictions errorM = new ForestPredictions(dataSize, nblabels); // oob error when using m = log2(M)+1
    treeBuilder.setM(m);

    long time = System.currentTimeMillis();
    log.info("Growing a forest with m=" + m);
    DecisionForest forestM = forestBuilder.build(nbtrees, errorM);
    sumTimeM += System.currentTimeMillis() - time;

    double oobM = ErrorEstimate.errorRate(trainLabels, errorM.computePredictions(rng)); // oob error estimate when m = log2(M)+1

    // grow a forest with m=1
    ForestPredictions errorOne = new ForestPredictions(dataSize, nblabels); // oob error when using m = 1
    treeBuilder.setM(1);

    time = System.currentTimeMillis();
    log.info("Growing a forest with m=1");
    DecisionForest forestOne = forestBuilder.build(nbtrees, errorOne);
    sumTimeOne += System.currentTimeMillis() - time;

    double oobOne = ErrorEstimate.errorRate(trainLabels, errorOne.computePredictions(rng)); // oob error estimate when m = 1

    // compute the test set error (Selection Error), and mean tree error (One Tree Error),
    // using the lowest oob error forest
    ForestPredictions testError = new ForestPredictions(dataSize, nblabels); // test set error
    MeanTreeCollector treeError = new MeanTreeCollector(train, nbtrees); // mean tree error

    // compute the test set error using m=1 (Single Input Error)
    errorOne = new ForestPredictions(dataSize, nblabels);

    if (oobM < oobOne) {
      forestM.classify(test, new MultiCallback(testError, treeError));
      forestOne.classify(test, errorOne);
    } else {
      forestOne.classify(test,
          new MultiCallback(testError, treeError, errorOne));
    }

    sumTestErr += ErrorEstimate.errorRate(testLabels, testError.computePredictions(rng));
    sumOneErr += ErrorEstimate.errorRate(testLabels, errorOne.computePredictions(rng));
    sumTreeErr += treeError.meanTreeError();
  }
View Full Code Here

    DefaultTreeBuilder treeBuilder = new DefaultTreeBuilder();
    treeBuilder.setM(m);

   Dataset dataset = Dataset.load(getConf(), datasetPath);

    ForestPredictions callback = (isOob) ? new ForestPredictions(dataset
        .nbInstances(), dataset.nblabels()) : null;

    Builder forestBuilder;

    if (isPartial) {
      log.info("Partial Mapred implementation");
      forestBuilder = new PartialBuilder(treeBuilder, dataPath, datasetPath, seed, getConf());
    } else {
      log.info("InMem Mapred implementation");
      forestBuilder = new InMemBuilder(treeBuilder, dataPath, datasetPath,
          seed, getConf());
    }
    log.info("Building the forest...");
    long time = System.currentTimeMillis();

    DecisionForest forest = forestBuilder.build(nbTrees, callback);

    time = System.currentTimeMillis() - time;
    log.info("Build Time: " + DFUtils.elapsedTime(time));

    if (isOob) {
      Random rng;
      if (seed != null)
        rng = new Random(seed);
      else
        rng = new Random();

      FileSystem fs = dataPath.getFileSystem(getConf());
      int[] labels = Data.extractLabels(dataset, fs, dataPath);

      log.info("oob error estimate : "
          + ErrorEstimate.errorRate(labels, callback.computePredictions(rng)));
    }

    return forest;
  }
View Full Code Here

    DefaultTreeBuilder treeBuilder = new DefaultTreeBuilder();
    treeBuilder.setM(m);

    Dataset dataset = Dataset.load(getConf(), datasetPath);

    ForestPredictions callback = (isOob) ? new ForestPredictions(dataset
        .nbInstances(), dataset.nblabels()) : null;

    Builder forestBuilder;

    if (isPartial) {
      log.info("Partial Mapred implementation");
      forestBuilder = new PartialBuilder(treeBuilder, dataPath, datasetPath,
          seed, getConf());
    } else {
      log.info("InMem Mapred implementation");
      forestBuilder = new InMemBuilder(treeBuilder, dataPath, datasetPath,
          seed, getConf());
    }

    log.info("Building the forest...");
    long time = System.currentTimeMillis();

    DecisionForest forest = forestBuilder.build(nbTrees, callback);

    time = System.currentTimeMillis() - time;
    log.info("Build Time: " + DFUtils.elapsedTime(time));

    if (isOob) {
      Random rng;
      if (seed != null)
        rng = new Random(seed);
      else
        rng = new Random();

      FileSystem fs = dataPath.getFileSystem(getConf());
      int[] labels = Data.extractLabels(dataset, fs, dataPath);
     
      log.info("oob error estimate : "
          + ErrorEstimate.errorRate(labels, callback.computePredictions(rng)));
    }

    return forest;
  }
View Full Code Here

    DefaultTreeBuilder treeBuilder = new DefaultTreeBuilder();
    treeBuilder.setM(m);
   
    Dataset dataset = Dataset.load(getConf(), datasetPath);
   
    ForestPredictions callback = isOob ? new ForestPredictions(dataset.nbInstances(), dataset.nblabels())
        : null;
   
    Builder forestBuilder;
   
    if (isPartial) {
      log.info("Partial Mapred implementation");
      forestBuilder = new PartialBuilder(treeBuilder, dataPath, datasetPath, seed, getConf());
    } else {
      log.info("InMem Mapred implementation");
      forestBuilder = new InMemBuilder(treeBuilder, dataPath, datasetPath, seed, getConf());
    }

    forestBuilder.setOutputDirName(outputPath.getName());
   
    log.info("Building the forest...");
    long time = System.currentTimeMillis();
   
    DecisionForest forest = forestBuilder.build(nbTrees, callback);
   
    time = System.currentTimeMillis() - time;
    log.info("Build Time: {}", DFUtils.elapsedTime(time));
    log.info("Forest num Nodes: {}", forest.nbNodes());
    log.info("Forest mean num Nodes: {}", forest.meanNbNodes());
    log.info("Forest mean max Depth: {}", forest.meanMaxDepth());

    if (isOob) {
      Random rng;
      if (seed != null) {
        rng = RandomUtils.getRandom(seed);
      } else {
        rng = RandomUtils.getRandom();
      }
     
      FileSystem fs = dataPath.getFileSystem(getConf());
      int[] labels = Data.extractLabels(dataset, fs, dataPath);
     
      log.info("oob error estimate : "
                           + ErrorEstimate.errorRate(labels, callback.computePredictions(rng)));
    }

    // store the decision forest in the output path
    Path forestPath = new Path(outputPath, "forest.seq");
    log.info("Storing the forest in: " + forestPath);
View Full Code Here

TOP

Related Classes of org.apache.mahout.df.callback.ForestPredictions

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.