Examples of org.apache.mahout.df.mapreduce.partial.TreeID

Package org.apache.mahout.df.mapreduce.partial

Examples of org.apache.mahout.df.mapreduce.partial.TreeID

org.apache.mahout.df.mapreduce.partial.TreeID
Indicates both the tree and the data partition used to grow the tree

    FileSystem fs = outputPath.getFileSystem(job);
    
    Path[] outfiles = DFUtils.listOutputFiles(fs, outputPath);
    
    // read all the outputs
    TreeID key = new TreeID();
    MapredOutput value = new MapredOutput();
    
    int index = 0;
    for (Path path : outfiles) {
      Reader reader = new Reader(fs, path, job);
      
      try {
        while (reader.next(key, value)) {
          if (keys != null) {
            keys[index] = key.clone();
          }
          
          if (trees != null) {
            trees[index] = value.getTree();
          }

View Full Code Here

    log.debug("partition: {} numInstances: {}", partition, instances.size());
    
    Data data = new Data(getDataset(), instances);
    Bagging bagging = new Bagging(getTreeBuilder(), data);
    
    TreeID key = new TreeID();
    
    log.debug("Building {} trees", nbTrees);
    SingleTreePredictions callback = null;
    int[] predictions = null;
    for (int treeId = 0; treeId < nbTrees; treeId++) {
      log.debug("Building tree number: {}", treeId);
      if (isOobEstimate() && !isNoOutput()) {
        callback = new SingleTreePredictions(data.size());
        predictions = callback.getPredictions();
      }
      
      Node tree = bagging.build(treeId, rng, callback);
      
      key.set(partition, firstTreeId + treeId);
      
      if (!isNoOutput()) {
        MapredOutput emOut = new MapredOutput(tree, predictions);
        output.collect(key, emOut);
      }

View Full Code Here

  }
  
  @Override
  public void close() throws IOException {
    for (int index = 0; index < keys.length; index++) {
      TreeID key = new TreeID(partition, keys[index].treeId());
      output.collect(key, new MapredOutput(callbacks[index].getPredictions()));
    }
  }

View Full Code Here

   */
  DecisionForest processOutput(TreeID[] keys, MapredOutput[] values, PredictionCallback callback) {
    List<Node> trees = new ArrayList<Node>();


    for (int index = 0; index < keys.length; index++) {
      TreeID key = keys[index];
      MapredOutput value = values[index];


      trees.add(value.getTree());


      int[] predictions = value.getPredictions();
      for (int id = 0; id < predictions.length; id++) {
        callback.prediction(key.treeId(), firstIds[key.partition()] + id,
            predictions[id]);
      }
    }
    
    return new DecisionForest(trees);

View Full Code Here

      int nbTrees = Step1Mapper.nbTrees(numMaps, numTrees, partition);


      for (int treeId = 0; treeId < nbTrees; treeId++) {
        Node tree = new Leaf(rng.nextInt(100));


        keys[index] = new TreeID(partition, treeId);
        values[index] = new MapredOutput(tree, nextIntArray(rng, numInstances));


        index++;
      }

View Full Code Here


    @Override
    public void prediction(int treeId, int instanceId, int prediction) {
      int partition = instanceId / numInstances;


      TreeID key = new TreeID(partition, treeId);
      int index = ArrayUtils.indexOf(keys, key);
      assertTrue("key not found", index >= 0);


      assertEquals(values[index].getPredictions()[instanceId % numInstances],
          prediction);

View Full Code Here

    int treeIndex = 0;
    for (int partition = 0; partition < nbMappers; partition++) {
      int nbMapTrees = Step1Mapper.nbTrees(nbMappers, nbTrees, partition);


      for (int tree = 0; tree < nbMapTrees; tree++, treeIndex++) {
        keys[treeIndex] = new TreeID(partition, treeIndex);
        // put the partition in the leaf's label
        // this way we can track the outputs
        trees[treeIndex] = new Leaf(partition);
      }
      
      sizes[partition] = splits[partition].length;
    }


    // store the first step outputs in a file
    FileSystem fs = FileSystem.getLocal(new Configuration());
    Path forestPath = new Path("testdata/Step2MapperTest.forest");
    InterResults.store(fs, forestPath, keys, trees, sizes);


    LongWritable key = new LongWritable();
    Text value = new Text();


    for (int partition = 0; partition < nbMappers; partition++) {
      String[] split = splits[partition];


      // number of trees that will be handled by the mapper
      int nbConcerned = Step2Mapper.nbConcerned(nbMappers, nbTrees, partition);


      PartialOutputCollector output = new PartialOutputCollector(nbConcerned);


      // load the current mapper's (key, tree) pairs
      TreeID[] curKeys = new TreeID[nbConcerned];
      Node[] curTrees = new Node[nbConcerned];
      InterResults.load(fs, forestPath, nbMappers, nbTrees, partition, curKeys, curTrees);


      // simulate the job
      MockStep2Mapper mapper = new MockStep2Mapper(partition, dataset, curKeys, curTrees, split.length);


      for (int index = 0; index < split.length; index++) {
        key.set(index);
        value.set(split[index]);
        mapper.map(key, value, output, Reporter.NULL);
      }


      mapper.close();


      // make sure the mapper did not return its own trees
      assertEquals(nbConcerned, output.nbOutputs());


      // check the returned results
      int current = 0;
      for (int index = 0; index < nbTrees; index++) {
        if (keys[index].partition() == partition) {
          // should not be part of the results
          continue;
        }


        TreeID k = output.getKeys()[current];


        // the tree should receive the partition's index
        assertEquals(partition, k.partition());


        // make sure all the trees of the other partitions are handled in the
        // correct order
        assertEquals(index, k.treeId());


        int[] predictions = output.getValues()[current].getPredictions();


        // all the instances of the partition should be classified
        assertEquals(split.length, predictions.length);

View Full Code Here

    log.debug("partition: " + partition + "numInstances: " + instances.size());
    
    Data data = new Data(getDataset(), instances);
    Bagging bagging = new Bagging(getTreeBuilder(), data);


    TreeID key = new TreeID();


    log.debug("Building " + nbTrees + " trees");
    SingleTreePredictions callback = null;
    int[] predictions = null;
    for (int treeId = 0; treeId < nbTrees; treeId++) {
      log.debug("Building tree N° : " + treeId);
      if (isOobEstimate() && !isNoOutput()) {
        callback = new SingleTreePredictions(data.size());
        predictions = callback.getPredictions();
      }


      Node tree = bagging.build(treeId, rng, callback);


      key.set(partition, firstTreeId + treeId);


      if (!isNoOutput()) {
        MapredOutput emOut = new MapredOutput(tree, predictions);
        output.collect(key, emOut);
      }

View Full Code Here

  }


  @Override
  public void close() throws IOException {
    for (int index = 0; index < keys.length; index++) {
      TreeID key = new TreeID(partition, keys[index].treeId());
      output.collect(key, new MapredOutput(callbacks[index].getPredictions()));
    }
  }

View Full Code Here

    FileSystem fs = outputPath.getFileSystem(job);


    Path[] outfiles = DFUtils.listOutputFiles(fs, outputPath);


    // read all the outputs
    TreeID key = new TreeID();
    MapredOutput value = new MapredOutput();
    
    int index = 0;
    for (Path path : outfiles) {
      Reader reader = new Reader(fs, path, job);


      try {
        while (reader.next(key, value)) {
          if (keys != null) {
            keys[index] = key.clone();
          }
          
          if (trees != null) {
            trees[index] = value.getTree();
          }

View Full Code Here

0 1

TOP

Related Classes of org.apache.mahout.df.mapreduce.partial.TreeID

org.apache.mahout.df.mapred.partial.PartialBuilder

org.apache.mahout.df.mapred.partial.PartialBuilderTest

org.apache.mahout.df.mapred.partial.PartialBuilderTest$TestCallback

org.apache.mahout.df.mapred.partial.PartialSequentialBuilder

org.apache.mahout.df.mapred.partial.Step1Mapper

org.apache.mahout.df.mapred.partial.Step2Mapper

org.apache.mahout.df.mapred.partial.Step2MapperTest

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.