Package org.apache.mahout.df.mapred.partial

Source Code of org.apache.mahout.df.mapred.partial.Step2MapperTest$MockStep2Mapper

/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements.  See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.mahout.df.mapred.partial;

import java.util.Random;

import junit.framework.TestCase;

import org.apache.commons.lang.ArrayUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.Reporter;
import org.apache.mahout.common.RandomUtils;
import org.apache.mahout.df.data.DataLoader;
import org.apache.mahout.df.data.Dataset;
import org.apache.mahout.df.data.Utils;
import org.apache.mahout.df.mapreduce.partial.InterResults;
import org.apache.mahout.df.mapreduce.partial.TreeID;
import org.apache.mahout.df.node.Leaf;
import org.apache.mahout.df.node.Node;

public class Step2MapperTest extends TestCase {

  /**
   * Special Step2Mapper that can be configured without using a Configuration
   *
   */
  private static class MockStep2Mapper extends Step2Mapper {
    private MockStep2Mapper(int partition, Dataset dataset, TreeID[] keys,
        Node[] trees, int numInstances) {
      configure(partition, dataset, keys, trees, numInstances);
    }

  }

  /** nb attributes per generated data instance */
  protected static final int nbAttributes = 4;

  /** nb generated data instances */
  protected static final int nbInstances = 100;

  /** nb trees to build */
  protected static final int nbTrees = 11;

  /** nb mappers to use */
  protected static final int nbMappers = 5;

  @Override
  protected void setUp() throws Exception {
    super.setUp();
    RandomUtils.useTestSeed();
  }
              
  public void testMapper() throws Exception {
    Random rng = RandomUtils.getRandom();

    // prepare the data
    String descriptor = Utils.randomDescriptor(rng, nbAttributes);
    double[][] source = Utils.randomDoubles(rng, descriptor, nbInstances);
    String[] sData = Utils.double2String(source);
    Dataset dataset = DataLoader.generateDataset(descriptor, sData);
    String[][] splits = Utils.splitData(sData, nbMappers);

    // prepare first step output
    TreeID[] keys = new TreeID[nbTrees];
    Node[] trees = new Node[nbTrees];
    int[] sizes = new int[nbMappers];
   
    int treeIndex = 0;
    for (int partition = 0; partition < nbMappers; partition++) {
      int nbMapTrees = Step1Mapper.nbTrees(nbMappers, nbTrees, partition);

      for (int tree = 0; tree < nbMapTrees; tree++, treeIndex++) {
        keys[treeIndex] = new TreeID(partition, treeIndex);
        // put the partition in the leaf's label
        // this way we can track the outputs
        trees[treeIndex] = new Leaf(partition);
      }
     
      sizes[partition] = splits[partition].length;
    }

    // store the first step outputs in a file
    FileSystem fs = FileSystem.getLocal(new Configuration());
    Path forestPath = new Path("testdata/Step2MapperTest.forest");
    InterResults.store(fs, forestPath, keys, trees, sizes);

    LongWritable key = new LongWritable();
    Text value = new Text();

    for (int partition = 0; partition < nbMappers; partition++) {
      String[] split = splits[partition];

      // number of trees that will be handled by the mapper
      int nbConcerned = Step2Mapper.nbConcerned(nbMappers, nbTrees, partition);

      PartialOutputCollector output = new PartialOutputCollector(nbConcerned);

      // load the current mapper's (key, tree) pairs
      TreeID[] curKeys = new TreeID[nbConcerned];
      Node[] curTrees = new Node[nbConcerned];
      InterResults.load(fs, forestPath, nbMappers, nbTrees, partition, curKeys, curTrees);

      // simulate the job
      MockStep2Mapper mapper = new MockStep2Mapper(partition, dataset, curKeys, curTrees, split.length);

      for (int index = 0; index < split.length; index++) {
        key.set(index);
        value.set(split[index]);
        mapper.map(key, value, output, Reporter.NULL);
      }

      mapper.close();

      // make sure the mapper did not return its own trees
      assertEquals(nbConcerned, output.nbOutputs());

      // check the returned results
      int current = 0;
      for (int index = 0; index < nbTrees; index++) {
        if (keys[index].partition() == partition) {
          // should not be part of the results
          continue;
        }

        TreeID k = output.getKeys()[current];

        // the tree should receive the partition's index
        assertEquals(partition, k.partition());

        // make sure all the trees of the other partitions are handled in the
        // correct order
        assertEquals(index, k.treeId());

        int[] predictions = output.getValues()[current].getPredictions();

        // all the instances of the partition should be classified
        assertEquals(split.length, predictions.length);
        assertEquals(
            "at least one instance of the partition was not classified", -1,
            ArrayUtils.indexOf(predictions, -1));

        // the tree must not belong to the mapper's partition
        int treePartition = predictions[0];
        assertFalse("Step2Mapper returned a tree from its own partition",
            partition == treePartition);

        current++;
      }
    }
  }
}
TOP

Related Classes of org.apache.mahout.df.mapred.partial.Step2MapperTest$MockStep2Mapper

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.