Package org.apache.mahout.df.data

Examples of org.apache.mahout.df.data.Data


      log.warn("attribute {} already selected in a parent node", best.getAttr());
    }
   
    Node childNode;
    if (data.getDataset().isNumerical(best.getAttr())) {
      Data loSubset = data.subset(Condition.lesser(best.getAttr(), best.getSplit()));
      Node loChild = build(rng, loSubset);
     
      Data hiSubset = data.subset(Condition.greaterOrEquals(best.getAttr(), best.getSplit()));
      Node hiChild = build(rng, hiSubset);
     
      childNode = new NumericalNode(best.getAttr(), best.getSplit(), loChild, hiChild);
    } else { // CATEGORICAL attribute
      selected[best.getAttr()] = true;
     
      double[] values = data.values(best.getAttr());
      Node[] children = new Node[values.length];
     
      for (int index = 0; index < values.length; index++) {
        Data subset = data.subset(Condition.equals(best.getAttr(), values[index]));
        children[index] = build(rng, subset);
      }
     
      childNode = new CategoricalNode(best.getAttr(), values, children);
     
View Full Code Here


  protected double numericalIg(Data data, int attr, double split) {
    double hy = entropy(data);
    double invDataSize = 1.0 / data.size();
   
    // LO subset
    Data subset = data.subset(Condition.lesser(attr, split));
    hy -= subset.size() * invDataSize * entropy(subset);
   
    // HI subset
    subset = data.subset(Condition.greaterOrEquals(attr, split));
    hy -= subset.size() * invDataSize * entropy(subset);
   
    return hy;
  }
View Full Code Here

    // store the data into a file
    String[] sData = Utils.double2String(source);
    Path dataPath = Utils.writeDataToTestFile(sData);
    Dataset dataset = DataLoader.generateDataset(descriptor, sData);
    Data data = DataLoader.loadData(dataset, sData);

    Configuration conf = new Configuration();
    Step0JobTest.setMaxSplitSize(conf, dataPath, NUM_MAPS);

    // prepare a custom TreeBuilder that will classify each
View Full Code Here

    // store the data into a file
    String[] sData = Utils.double2String(source);
    Path dataPath = Utils.writeDataToTestFile(sData);
    Dataset dataset = DataLoader.generateDataset(descriptor, sData);
    Data data = DataLoader.loadData(dataset, sData);

    Configuration conf = new Configuration();
    Step0JobTest.setMaxSplitSize(conf, dataPath, numMaps);

    // prepare a custom TreeBuilder that will classify each
View Full Code Here

   * @throws RuntimeException if the data is not set
   */
  public Node build(int treeId, Random rng, PredictionCallback callback) {
    log.debug("Bagging...");
    Arrays.fill(sampled, false);
    Data bag = data.bagging(rng, sampled);

    log.debug("Building...");
    Node tree = treeBuilder.build(rng, bag);

    // predict the label for the out-of-bag elements
View Full Code Here

  }

  protected Data loadData(Configuration conf, Path dataPath, Dataset dataset) throws IOException {
    log.info("Loading the data...");
    FileSystem fs = dataPath.getFileSystem(conf);
    Data data = DataLoader.loadData(dataset, fs, dataPath);
    log.info("Data Loaded");

    return data;
  }
View Full Code Here

    int dataSize = data.size();
    int nblabels = data.getDataset().nblabels();

    Random rng = RandomUtils.getRandom();

    Data train = data.clone();
    Data test = train.rsplit(rng, (int) (data.size() * 0.1));
   
    int[] trainLabels = train.extractLabels();
    int[] testLabels = test.extractLabels();
   
    DefaultTreeBuilder treeBuilder = new DefaultTreeBuilder();
   
    SequentialBuilder forestBuilder = new SequentialBuilder(rng, treeBuilder, train);

View Full Code Here

    }
   
    // load the data
    FileSystem fs = dataPath.getFileSystem(new Configuration());
    Dataset dataset = Dataset.load(getConf(), datasetPath);
    Data data = DataLoader.loadData(dataset, fs, dataPath);

    // take m to be the first integer less than log2(M) + 1, where M is the
    // number of inputs
    int m = (int) Math.floor(Maths.log(2, data.getDataset().nbAttributes()) + 1);

    for (int iteration = 0; iteration < nbIterations; iteration++) {
      log.info("Iteration " + iteration);
      runIteration(data, m, nbTrees);
    }
View Full Code Here

      if (best == null || best.ig < split.ig)
        best = split;
    }

    if (data.getDataset().isNumerical(best.attr)) {
      Data loSubset = data.subset(Condition.lesser(best.attr, best.split));
      Node loChild = build(rng, loSubset);

      Data hiSubset = data.subset(Condition.greaterOrEquals(best.attr,
          best.split));
      Node hiChild = build(rng, hiSubset);

      return new NumericalNode(best.attr, best.split, loChild, hiChild);
    } else { // CATEGORICAL attribute
      double[] values = data.values(best.attr);
      Node[] childs = new Node[values.length];

      for (int index = 0; index < values.length; index++) {
        Data subset = data.subset(Condition.equals(best.attr, values[index]));
        childs[index] = build(rng, subset);
      }

      return new CategoricalNode(best.attr, values, childs);
    }
View Full Code Here

  protected static Data loadData(Configuration conf, Path dataPath, Dataset dataset)
      throws Exception {
    log.info("Loading the data...");
    FileSystem fs = dataPath.getFileSystem(conf);
    Data data = DataLoader.loadData(dataset, fs, dataPath);
    log.info("Data Loaded");

    return data;
  }
View Full Code Here

TOP

Related Classes of org.apache.mahout.df.data.Data

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.