Package org.apache.mahout.classifier.df.node

Examples of org.apache.mahout.classifier.df.node.Node


    if (alreadySelected) {
      // attribute already selected
      log.warn("attribute {} already selected in a parent node", best.getAttr());
    }

    Node childNode;
    if (data.getDataset().isNumerical(best.getAttr())) {
      boolean[] temp = null;

      Data loSubset = data.subset(Condition.lesser(best.getAttr(), best.getSplit()));
      Data hiSubset = data.subset(Condition.greaterOrEquals(best.getAttr(), best.getSplit()));

      if (loSubset.isEmpty() || hiSubset.isEmpty()) {
        // the selected attribute did not change the data, avoid using it in the child notes
        selected[best.getAttr()] = true;
      } else {
        // the data changed, so we can unselect all previousely selected NUMERICAL attributes
        temp = selected;
        selected = cloneCategoricalAttributes(data.getDataset(), selected);
      }

      // size of the subset is less than the minSpitNum
      if (loSubset.size() < minSplitNum || hiSubset.size() < minSplitNum) {
        // branch is not split
        double label;
        if (data.getDataset().isNumerical(data.getDataset().getLabelId())) {
          label = sum / data.size();
        } else {
          label = data.majorityLabel(rng);
        }
        log.debug("branch is not split Leaf({})", label);
        return new Leaf(label);
      }

      Node loChild = build(rng, loSubset);
      Node hiChild = build(rng, hiSubset);

      // restore the selection state of the attributes
      if (temp != null) {
        selected = temp;
      } else {
View Full Code Here


   
    log.debug("Building {} trees", nbTrees);
    for (int treeId = 0; treeId < nbTrees; treeId++) {
      log.debug("Building tree number : {}", treeId);
     
      Node tree = bagging.build(rng);
     
      key.set(partition, firstTreeId + treeId);
     
      if (isOutput()) {
        MapredOutput emOut = new MapredOutput(tree);
View Full Code Here

  void map(IntWritable key, Context context) throws IOException, InterruptedException {
   
    initRandom((InMemInputSplit) context.getInputSplit());
   
    log.debug("Building...");
    Node tree = bagging.build(rng);
   
    if (isOutput()) {
      log.debug("Outputing...");
      MapredOutput mrOut = new MapredOutput(tree);
     
View Full Code Here

        }
      } else if (node instanceof NumericalNode) {
        NumericalNode nnode = (NumericalNode) node;
        int attr = (Integer) fields.get("NumericalNode.attr").get(nnode);
        double split = (Double) fields.get("NumericalNode.split").get(nnode);
        Node loChild = (Node) fields.get("NumericalNode.loChild").get(nnode);
        Node hiChild = (Node) fields.get("NumericalNode.hiChild").get(nnode);
        buff.append('\n');
        for (int j = 0; j < layer; j++) {
          buff.append("|   ");
        }
        buff.append(attrNames == null ? attr : attrNames[attr]).append(" < ")
View Full Code Here

        }
      } else if (node instanceof NumericalNode) {
        NumericalNode nnode = (NumericalNode) node;
        int attr = (Integer) fields.get("NumericalNode.attr").get(nnode);
        double split = (Double) fields.get("NumericalNode.split").get(nnode);
        Node loChild = (Node) fields.get("NumericalNode.loChild").get(nnode);
        Node hiChild = (Node) fields.get("NumericalNode.hiChild").get(nnode);
       
        if (instance.get(attr) < split) {
          buff.append('(').append(attrNames == null ? attr : attrNames[attr])
              .append(" = ").append(doubleToString(instance.get(attr)))
              .append(") < ").append(doubleToString(split));
View Full Code Here

  @Test
  public void testPredictTrace() throws Exception {
    // build tree
    DecisionTreeBuilder builder = new DecisionTreeBuilder();
    builder.setM(data.getDataset().nbAttributes() - 1);
    Node tree = builder.build(rng, data);
   
    String[] prediction = TreeVisualizer.predictTrace(tree, testData,
        ATTR_NAMES);
    Assert.assertArrayEquals(new String[] {
        "outlook = rainy -> windy = TRUE -> no", "outlook = overcast -> yes",
View Full Code Here

    // build tree
    DecisionTreeBuilder builder = new DecisionTreeBuilder();
    builder.setM(data.getDataset().nbAttributes() - 1);
    builder.setMinSplitNum(0);
    builder.setComplemented(false);
    Node tree = builder.build(rng, lessData);

    assertEquals("\noutlook = sunny\n|   humidity < 85 : yes\n|   humidity >= 85 : no\noutlook = overcast : yes", TreeVisualizer.toString(tree, data.getDataset(), ATTR_NAMES));
  }
View Full Code Here

  public void testEmpty() throws Exception {
    Data emptyData = new Data(data.getDataset());
   
    // build tree
    DecisionTreeBuilder builder = new DecisionTreeBuilder();
    Node tree = builder.build(rng, emptyData);

    assertEquals(" : unknown", TreeVisualizer.toString(tree, data.getDataset(), ATTR_NAMES));
  }
View Full Code Here

    if (alreadySelected) {
      // attribute already selected
      log.warn("attribute {} already selected in a parent node", best.getAttr());
    }

    Node childNode;
    if (data.getDataset().isNumerical(best.getAttr())) {
      boolean[] temp = null;

      Data loSubset = data.subset(Condition.lesser(best.getAttr(), best.getSplit()));
      Data hiSubset = data.subset(Condition.greaterOrEquals(best.getAttr(), best.getSplit()));

      if (loSubset.isEmpty() || hiSubset.isEmpty()) {
        // the selected attribute did not change the data, avoid using it in the child notes
        selected[best.getAttr()] = true;
      } else {
        // the data changed, so we can unselect all previousely selected NUMERICAL attributes
        temp = selected;
        selected = cloneCategoricalAttributes(data.getDataset(), selected);
      }

      // size of the subset is less than the minSpitNum
      if (loSubset.size() < minSplitNum || hiSubset.size() < minSplitNum) {
        // branch is not split
        double label;
        if (data.getDataset().isNumerical(data.getDataset().getLabelId())) {
          label = sum / data.size();
        } else {
          label = data.majorityLabel(rng);
        }
        log.debug("branch is not split Leaf({})", label);
        return new Leaf(label);
      }

      Node loChild = build(rng, loSubset);
      Node hiChild = build(rng, hiSubset);

      // restore the selection state of the attributes
      if (temp != null) {
        selected = temp;
      } else {
View Full Code Here

    if (alreadySelected) {
      // attribute already selected
      log.warn("attribute {} already selected in a parent node", best.getAttr());
    }

    Node childNode;
    if (data.getDataset().isNumerical(best.getAttr())) {
      boolean[] temp = null;

      Data loSubset = data.subset(Condition.lesser(best.getAttr(), best.getSplit()));
      Data hiSubset = data.subset(Condition.greaterOrEquals(best.getAttr(), best.getSplit()));

      if (loSubset.isEmpty() || hiSubset.isEmpty()) {
        // the selected attribute did not change the data, avoid using it in the child notes
        selected[best.getAttr()] = true;
      } else {
        // the data changed, so we can unselect all previousely selected NUMERICAL attributes
        temp = selected;
        selected = cloneCategoricalAttributes(data.getDataset(), selected);
      }

      Node loChild = build(rng, loSubset);
      Node hiChild = build(rng, hiSubset);

      // restore the selection state of the attributes
      if (temp != null) {
        selected = temp;
      } else {
View Full Code Here

TOP

Related Classes of org.apache.mahout.classifier.df.node.Node

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.