Package org.apache.mahout.df.data

Examples of org.apache.mahout.df.data.Data


    // store the data into a file
    String[] sData = Utils.double2String(source);
    Path dataPath = Utils.writeDataToTestFile(sData);
    Dataset dataset = DataLoader.generateDataset(descriptor, sData);
    Data data = DataLoader.loadData(dataset, sData);

    Configuration conf = new Configuration();
    Step0JobTest.setMaxSplitSize(conf, dataPath, NUM_MAPS);

    // prepare a custom TreeBuilder that will classify each
View Full Code Here


    TreeBuilder builder = new DefaultTreeBuilder();

    String[] source = Utils.double2String(dData);
    String descriptor = "N N N N N N N N L";
    Dataset dataset = DataLoader.generateDataset(descriptor, source);
    Data data = DataLoader.loadData(dataset, source);

    builder.build(rng, data);
  }
View Full Code Here

   
    Node childNode;
    if (data.getDataset().isNumerical(best.getAttr())) {
      boolean[] temp = null;

      Data loSubset = data.subset(Condition.lesser(best.getAttr(), best.getSplit()));
      Data hiSubset = data.subset(Condition.greaterOrEquals(best.getAttr(), best.getSplit()));

      if (loSubset.isEmpty() || hiSubset.isEmpty()) {
        // the selected attribute did not change the data, avoid using it in the child notes
        selected[best.getAttr()] = true;
      } else {
        // the data changed, so we can unselect all previousely selected NUMERICAL attributes
        temp = selected;
        selected = cloneCategoricalAttributes(data.getDataset(), selected);
      }

      Node loChild = build(rng, loSubset);
      Node hiChild = build(rng, hiSubset);

      // restore the selection state of the attributes
      if (temp != null) {
        selected = temp;
      } else {
        selected[best.getAttr()] = alreadySelected;
      }

      childNode = new NumericalNode(best.getAttr(), best.getSplit(), loChild, hiChild);
    } else { // CATEGORICAL attribute
      selected[best.getAttr()] = true;
     
      double[] values = data.values(best.getAttr());
      Node[] children = new Node[values.length];
     
      for (int index = 0; index < values.length; index++) {
        Data subset = data.subset(Condition.equals(best.getAttr(), values[index]));
        children[index] = build(rng, subset);
      }

      selected[best.getAttr()] = alreadySelected;
     
View Full Code Here

TOP

Related Classes of org.apache.mahout.df.data.Data

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.