Package org.apache.mahout.df.mapreduce.partial

Examples of org.apache.mahout.df.mapreduce.partial.TreeID


    FileSystem fs = outputPath.getFileSystem(job);
   
    Path[] outfiles = DFUtils.listOutputFiles(fs, outputPath);
   
    // read all the outputs
    TreeID key = new TreeID();
    MapredOutput value = new MapredOutput();
   
    int index = 0;
    for (Path path : outfiles) {
      Reader reader = new Reader(fs, path, job);
     
      try {
        while (reader.next(key, value)) {
          if (keys != null) {
            keys[index] = key.clone();
          }
         
          if (trees != null) {
            trees[index] = value.getTree();
          }
View Full Code Here


    log.debug("partition: {} numInstances: {}", partition, instances.size());
   
    Data data = new Data(getDataset(), instances);
    Bagging bagging = new Bagging(getTreeBuilder(), data);
   
    TreeID key = new TreeID();
   
    log.debug("Building {} trees", nbTrees);
    SingleTreePredictions callback = null;
    int[] predictions = null;
    for (int treeId = 0; treeId < nbTrees; treeId++) {
      log.debug("Building tree number: {}", treeId);
      if (isOobEstimate() && !isNoOutput()) {
        callback = new SingleTreePredictions(data.size());
        predictions = callback.getPredictions();
      }
     
      Node tree = bagging.build(treeId, rng, callback);
     
      key.set(partition, firstTreeId + treeId);
     
      if (!isNoOutput()) {
        MapredOutput emOut = new MapredOutput(tree, predictions);
        output.collect(key, emOut);
      }
View Full Code Here

  }
 
  @Override
  public void close() throws IOException {
    for (int index = 0; index < keys.length; index++) {
      TreeID key = new TreeID(partition, keys[index].treeId());
      output.collect(key, new MapredOutput(callbacks[index].getPredictions()));
    }
  }
View Full Code Here

   */
  DecisionForest processOutput(TreeID[] keys, MapredOutput[] values, PredictionCallback callback) {
    List<Node> trees = new ArrayList<Node>();

    for (int index = 0; index < keys.length; index++) {
      TreeID key = keys[index];
      MapredOutput value = values[index];

      trees.add(value.getTree());

      int[] predictions = value.getPredictions();
      for (int id = 0; id < predictions.length; id++) {
        callback.prediction(key.treeId(), firstIds[key.partition()] + id,
            predictions[id]);
      }
    }
   
    return new DecisionForest(trees);
View Full Code Here

      int nbTrees = Step1Mapper.nbTrees(numMaps, numTrees, partition);

      for (int treeId = 0; treeId < nbTrees; treeId++) {
        Node tree = new Leaf(rng.nextInt(100));

        keys[index] = new TreeID(partition, treeId);
        values[index] = new MapredOutput(tree, nextIntArray(rng, numInstances));

        index++;
      }
     
View Full Code Here

    @Override
    public void prediction(int treeId, int instanceId, int prediction) {
      int partition = instanceId / numInstances;

      TreeID key = new TreeID(partition, treeId);
      int index = ArrayUtils.indexOf(keys, key);
      assertTrue("key not found", index >= 0);

      assertEquals(values[index].getPredictions()[instanceId % numInstances],
          prediction);
View Full Code Here

    int treeIndex = 0;
    for (int partition = 0; partition < nbMappers; partition++) {
      int nbMapTrees = Step1Mapper.nbTrees(nbMappers, nbTrees, partition);

      for (int tree = 0; tree < nbMapTrees; tree++, treeIndex++) {
        keys[treeIndex] = new TreeID(partition, treeIndex);
        // put the partition in the leaf's label
        // this way we can track the outputs
        trees[treeIndex] = new Leaf(partition);
      }
     
      sizes[partition] = splits[partition].length;
    }

    // store the first step outputs in a file
    FileSystem fs = FileSystem.getLocal(new Configuration());
    Path forestPath = new Path("testdata/Step2MapperTest.forest");
    InterResults.store(fs, forestPath, keys, trees, sizes);

    LongWritable key = new LongWritable();
    Text value = new Text();

    for (int partition = 0; partition < nbMappers; partition++) {
      String[] split = splits[partition];

      // number of trees that will be handled by the mapper
      int nbConcerned = Step2Mapper.nbConcerned(nbMappers, nbTrees, partition);

      PartialOutputCollector output = new PartialOutputCollector(nbConcerned);

      // load the current mapper's (key, tree) pairs
      TreeID[] curKeys = new TreeID[nbConcerned];
      Node[] curTrees = new Node[nbConcerned];
      InterResults.load(fs, forestPath, nbMappers, nbTrees, partition, curKeys, curTrees);

      // simulate the job
      MockStep2Mapper mapper = new MockStep2Mapper(partition, dataset, curKeys, curTrees, split.length);

      for (int index = 0; index < split.length; index++) {
        key.set(index);
        value.set(split[index]);
        mapper.map(key, value, output, Reporter.NULL);
      }

      mapper.close();

      // make sure the mapper did not return its own trees
      assertEquals(nbConcerned, output.nbOutputs());

      // check the returned results
      int current = 0;
      for (int index = 0; index < nbTrees; index++) {
        if (keys[index].partition() == partition) {
          // should not be part of the results
          continue;
        }

        TreeID k = output.getKeys()[current];

        // the tree should receive the partition's index
        assertEquals(partition, k.partition());

        // make sure all the trees of the other partitions are handled in the
        // correct order
        assertEquals(index, k.treeId());

        int[] predictions = output.getValues()[current].getPredictions();

        // all the instances of the partition should be classified
        assertEquals(split.length, predictions.length);
View Full Code Here

    log.debug("partition: " + partition + "numInstances: " + instances.size());
   
    Data data = new Data(getDataset(), instances);
    Bagging bagging = new Bagging(getTreeBuilder(), data);

    TreeID key = new TreeID();

    log.debug("Building " + nbTrees + " trees");
    SingleTreePredictions callback = null;
    int[] predictions = null;
    for (int treeId = 0; treeId < nbTrees; treeId++) {
      log.debug("Building tree N° : " + treeId);
      if (isOobEstimate() && !isNoOutput()) {
        callback = new SingleTreePredictions(data.size());
        predictions = callback.getPredictions();
      }

      Node tree = bagging.build(treeId, rng, callback);

      key.set(partition, firstTreeId + treeId);

      if (!isNoOutput()) {
        MapredOutput emOut = new MapredOutput(tree, predictions);
        output.collect(key, emOut);
      }
View Full Code Here

  }

  @Override
  public void close() throws IOException {
    for (int index = 0; index < keys.length; index++) {
      TreeID key = new TreeID(partition, keys[index].treeId());
      output.collect(key, new MapredOutput(callbacks[index].getPredictions()));
    }
  }
View Full Code Here

    FileSystem fs = outputPath.getFileSystem(job);

    Path[] outfiles = DFUtils.listOutputFiles(fs, outputPath);

    // read all the outputs
    TreeID key = new TreeID();
    MapredOutput value = new MapredOutput();
   
    int index = 0;
    for (Path path : outfiles) {
      Reader reader = new Reader(fs, path, job);

      try {
        while (reader.next(key, value)) {
          if (keys != null) {
            keys[index] = key.clone();
          }
         
          if (trees != null) {
            trees[index] = value.getTree();
          }
View Full Code Here

TOP

Related Classes of org.apache.mahout.df.mapreduce.partial.TreeID

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.