Examples of org.apache.mahout.df.data.Dataset

org.apache.mahout.df.data.Dataset
Contains informations about the attributes.


    // prepare the data
    String descriptor = Utils.randomDescriptor(rng, nbAttributes);
    double[][] source = Utils.randomDoubles(rng, descriptor, nbInstances);
    String[] sData = Utils.double2String(source);
    Dataset dataset = DataLoader.generateDataset(descriptor, sData);
    String[][] splits = Utils.splitData(sData, nbMappers);


    MockTreeBuilder treeBuilder = new MockTreeBuilder();


    LongWritable key = new LongWritable();

View Full Code Here

    }


    // store the data into a file
    String[] sData = Utils.double2String(source);
    Path dataPath = Utils.writeDataToTestFile(sData);
    Dataset dataset = DataLoader.generateDataset(descriptor, sData);
    Data data = DataLoader.loadData(dataset, sData);


    Configuration conf = new Configuration();
    Step0JobTest.setMaxSplitSize(conf, dataPath, numMaps);

View Full Code Here


  private DecisionForest buildForest() throws IOException, ClassNotFoundException, InterruptedException {
    DefaultTreeBuilder treeBuilder = new DefaultTreeBuilder();
    treeBuilder.setM(m);


   Dataset dataset = Dataset.load(getConf(), datasetPath);


    ForestPredictions callback = (isOob) ? new ForestPredictions(dataset
        .nbInstances(), dataset.nblabels()) : null;


    Builder forestBuilder;


    if (isPartial) {
      log.info("Partial Mapred implementation");

View Full Code Here

      return -1;
    }
    
    // load the data
    FileSystem fs = dataPath.getFileSystem(new Configuration());
    Dataset dataset = Dataset.load(getConf(), datasetPath);
    Data data = DataLoader.loadData(dataset, fs, dataPath);


    // take m to be the first integer less than log2(M) + 1, where M is the
    // number of inputs
    int m = (int) Math.floor(Maths.log(2, data.getDataset().nbAttributes()) + 1);

View Full Code Here


  private DecisionForest buildForest() throws IOException {
    DefaultTreeBuilder treeBuilder = new DefaultTreeBuilder();
    treeBuilder.setM(m);


    Dataset dataset = Dataset.load(getConf(), datasetPath);


    ForestPredictions callback = (isOob) ? new ForestPredictions(dataset
        .nbInstances(), dataset.nblabels()) : null;


    Builder forestBuilder;


    if (isPartial) {
      log.info("Partial Mapred implementation");

View Full Code Here

    if (files == null || files.length < 2) {
      throw new IllegalArgumentException("missing paths from the DistributedCache");
    }


    Path datasetPath = new Path(files[0].getPath());
    Dataset dataset = Dataset.load(conf, datasetPath);


    int numMaps = Builder.getNumMaps(conf);
    int p = conf.getInt("mapred.task.partition", -1);


    // total number of trees in the forest

View Full Code Here

    String descriptor = DescriptorUtils.generateDescriptor(description);


    Path fPath = validateOutput(filePath);
    
    log.info("generating the dataset...");
    Dataset dataset = generateDataset(descriptor, dataPath);


    log.info("storing the dataset description");
    storeWritable(new Configuration(), fPath, dataset);
  }

View Full Code Here


    if (files == null || files.length < 2) {
      throw new IllegalArgumentException("missing paths from the DistributedCache");
    }


    Dataset dataset;
    try {
      Path datasetPath = new Path(files[0].getPath());
      dataset = Dataset.load(job, datasetPath);
    } catch (IOException e) {
      throw new IllegalStateException("Exception while loading the dataset : ", e);

View Full Code Here

    }


    // store the data into a file
    String[] sData = Utils.double2String(source);
    Path dataPath = Utils.writeDataToTestFile(sData);
    Dataset dataset = DataLoader.generateDataset(descriptor, sData);
    Data data = DataLoader.loadData(dataset, sData);


    JobConf jobConf = new JobConf();
    jobConf.setNumMapTasks(numMaps);

View Full Code Here

    }


    // store the data into a file
    String[] sData = Utils.double2String(source);
    Path dataPath = Utils.writeDataToTestFile(sData);
    Dataset dataset = DataLoader.generateDataset(descriptor, sData);
    Data data = DataLoader.loadData(dataset, sData);


    Configuration conf = new Configuration();
    Step0JobTest.setMaxSplitSize(conf, dataPath, numMaps);

View Full Code Here

0 1 2 3 4 5

TOP

Related Classes of org.apache.mahout.df.data.Dataset

org.apache.hadoop.fs.FileSystem

org.apache.hadoop.fs.FSDataInputStream

org.apache.mahout.df.BreimanExample

org.apache.mahout.df.builder.InfiniteRecursionTest

org.apache.mahout.df.mapred.BuildForest

org.apache.mahout.df.mapred.partial.PartitionBugTest

org.apache.mahout.df.mapred.partial.Step0JobTest

org.apache.mahout.df.mapred.partial.Step1MapperTest

org.apache.mahout.df.mapred.partial.Step2Mapper

org.apache.mahout.df.mapred.partial.Step2MapperTest

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.