Package org.apache.mahout.df.data

Examples of org.apache.mahout.df.data.Dataset


    // prepare the data
    String descriptor = Utils.randomDescriptor(rng, nbAttributes);
    double[][] source = Utils.randomDoubles(rng, descriptor, nbInstances);
    String[] sData = Utils.double2String(source);
    Dataset dataset = DataLoader.generateDataset(descriptor, sData);
    String[][] splits = Utils.splitData(sData, nbMappers);

    MockTreeBuilder treeBuilder = new MockTreeBuilder();

    LongWritable key = new LongWritable();
View Full Code Here


    }

    // store the data into a file
    String[] sData = Utils.double2String(source);
    Path dataPath = Utils.writeDataToTestFile(sData);
    Dataset dataset = DataLoader.generateDataset(descriptor, sData);
    Data data = DataLoader.loadData(dataset, sData);

    Configuration conf = new Configuration();
    Step0JobTest.setMaxSplitSize(conf, dataPath, numMaps);
View Full Code Here

  private DecisionForest buildForest() throws IOException, ClassNotFoundException, InterruptedException {
    DefaultTreeBuilder treeBuilder = new DefaultTreeBuilder();
    treeBuilder.setM(m);

   Dataset dataset = Dataset.load(getConf(), datasetPath);

    ForestPredictions callback = (isOob) ? new ForestPredictions(dataset
        .nbInstances(), dataset.nblabels()) : null;

    Builder forestBuilder;

    if (isPartial) {
      log.info("Partial Mapred implementation");
View Full Code Here

      return -1;
    }
   
    // load the data
    FileSystem fs = dataPath.getFileSystem(new Configuration());
    Dataset dataset = Dataset.load(getConf(), datasetPath);
    Data data = DataLoader.loadData(dataset, fs, dataPath);

    // take m to be the first integer less than log2(M) + 1, where M is the
    // number of inputs
    int m = (int) Math.floor(Maths.log(2, data.getDataset().nbAttributes()) + 1);
View Full Code Here

  private DecisionForest buildForest() throws IOException {
    DefaultTreeBuilder treeBuilder = new DefaultTreeBuilder();
    treeBuilder.setM(m);

    Dataset dataset = Dataset.load(getConf(), datasetPath);

    ForestPredictions callback = (isOob) ? new ForestPredictions(dataset
        .nbInstances(), dataset.nblabels()) : null;

    Builder forestBuilder;

    if (isPartial) {
      log.info("Partial Mapred implementation");
View Full Code Here

    if (files == null || files.length < 2) {
      throw new IllegalArgumentException("missing paths from the DistributedCache");
    }

    Path datasetPath = new Path(files[0].getPath());
    Dataset dataset = Dataset.load(conf, datasetPath);

    int numMaps = Builder.getNumMaps(conf);
    int p = conf.getInt("mapred.task.partition", -1);

    // total number of trees in the forest
View Full Code Here

    String descriptor = DescriptorUtils.generateDescriptor(description);

    Path fPath = validateOutput(filePath);
   
    log.info("generating the dataset...");
    Dataset dataset = generateDataset(descriptor, dataPath);

    log.info("storing the dataset description");
    storeWritable(new Configuration(), fPath, dataset);
  }
View Full Code Here

    if (files == null || files.length < 2) {
      throw new IllegalArgumentException("missing paths from the DistributedCache");
    }

    Dataset dataset;
    try {
      Path datasetPath = new Path(files[0].getPath());
      dataset = Dataset.load(job, datasetPath);
    } catch (IOException e) {
      throw new IllegalStateException("Exception while loading the dataset : ", e);
View Full Code Here

    }

    // store the data into a file
    String[] sData = Utils.double2String(source);
    Path dataPath = Utils.writeDataToTestFile(sData);
    Dataset dataset = DataLoader.generateDataset(descriptor, sData);
    Data data = DataLoader.loadData(dataset, sData);

    JobConf jobConf = new JobConf();
    jobConf.setNumMapTasks(numMaps);
View Full Code Here

    }

    // store the data into a file
    String[] sData = Utils.double2String(source);
    Path dataPath = Utils.writeDataToTestFile(sData);
    Dataset dataset = DataLoader.generateDataset(descriptor, sData);
    Data data = DataLoader.loadData(dataset, sData);

    Configuration conf = new Configuration();
    Step0JobTest.setMaxSplitSize(conf, dataPath, numMaps);
View Full Code Here

TOP

Related Classes of org.apache.mahout.df.data.Dataset

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.