Package org.apache.mahout.df.data

Examples of org.apache.mahout.df.data.Dataset


    String descriptor = DescriptorUtils.generateDescriptor(description);
   
    Path fPath = validateOutput(filePath);
   
    log.info("generating the dataset...");
    Dataset dataset = generateDataset(descriptor, dataPath);
   
    log.info("storing the dataset description");
    storeWritable(new Configuration(), fPath, dataset);
  }
View Full Code Here


   
    @Override
    protected void setup(Context context) throws IOException, InterruptedException {
      Configuration conf = context.getConfiguration();
     
      Dataset dataset = Builder.loadDataset(conf);
      setup(dataset);
    }
View Full Code Here

    private int nblabels;
   
    @Override
    protected void setup(Context context) throws IOException, InterruptedException {
      Configuration conf = context.getConfiguration();
      Dataset dataset = Builder.loadDataset(conf);
      setup(dataset.nblabels());
    }
View Full Code Here

    if ((files == null) || (files.length < 2)) {
      throw new IllegalArgumentException("missing paths from the DistributedCache");
    }
   
    Path datasetPath = new Path(files[0].getPath());
    Dataset dataset = Dataset.load(conf, datasetPath);
   
    int numMaps = Builder.getNumMaps(conf);
    int p = conf.getInt("mapred.task.partition", -1);
   
    // total number of trees in the forest
View Full Code Here

      partPaths[p] = new Path(partsPath, String.format("part.%03d", p));
      files[p] = pfs.create(partPaths[p]);
    }
   
    Path datasetPath = new Path(datasetStr);
    Dataset dataset = Dataset.load(conf, datasetPath);
   
    // currents[label] = next partition file where to place the tuple
    int[] currents = new int[dataset.nblabels()];
   
    // currents is initialized randomly in the range [0, numpartitions[
    Random random = RandomUtils.getRandom();
    for (int c = 0; c < currents.length; c++) {
      currents[c] = random.nextInt(numPartitions);
    }
   
    // foreach tuple of the data
    Path dataPath = new Path(dataStr);
    FileSystem ifs = dataPath.getFileSystem(conf);
    FSDataInputStream input = ifs.open(dataPath);
    Scanner scanner = new Scanner(input);
    DataConverter converter = new DataConverter(dataset);
    int nbInstances = dataset.nbInstances();
   
    int id = 0;
    while (scanner.hasNextLine()) {
      if (id % 1000 == 0) {
        log.info(String.format("progress : %d / %d", id, nbInstances));
View Full Code Here

   
    if ((files == null) || (files.length < 2)) {
      throw new IllegalArgumentException("missing paths from the DistributedCache");
    }
   
    Dataset dataset;
    try {
      Path datasetPath = new Path(files[0].getPath());
      dataset = Dataset.load(job, datasetPath);
    } catch (IOException e) {
      throw new IllegalStateException("Exception while loading the dataset : ", e);
View Full Code Here

    int label = Utils.findLabel(descriptor);

    // all the vectors have the same label (0)
    double[][] temp = Utils.randomDoublesWithSameLabel(rng, descriptor, 100, 0);
    String[] sData = Utils.double2String(temp);
    Dataset dataset = DataLoader.generateDataset(descriptor, sData);
    Data data = DataLoader.loadData(dataset, sData);
    DefaultIgSplit iG = new DefaultIgSplit();

    double expected = 0.0 - 1.0 * Math.log(1.0) / Math.log(2.0);
    assertEquals(expected, iG.entropy(data));
View Full Code Here

    }

    // store the data into a file
    String[] sData = Utils.double2String(source);
    Path dataPath = Utils.writeDataToTestFile(sData);
    Dataset dataset = DataLoader.generateDataset(descriptor, sData);
    Data data = DataLoader.loadData(dataset, sData);

    JobConf jobConf = new JobConf();
    jobConf.setNumMapTasks(numMaps);
View Full Code Here

 
  private DecisionForest buildForest() throws IOException {
    DefaultTreeBuilder treeBuilder = new DefaultTreeBuilder();
    treeBuilder.setM(m);
   
    Dataset dataset = Dataset.load(getConf(), datasetPath);
   
    ForestPredictions callback = isOob ? new ForestPredictions(dataset.nbInstances(), dataset.nblabels())
        : null;
   
    Builder forestBuilder;
   
    if (isPartial) {
View Full Code Here

 
  private DecisionForest buildForest() throws IOException, ClassNotFoundException, InterruptedException {
    DefaultTreeBuilder treeBuilder = new DefaultTreeBuilder();
    treeBuilder.setM(m);
   
    Dataset dataset = Dataset.load(getConf(), datasetPath);
   
    ForestPredictions callback = isOob ? new ForestPredictions(dataset.nbInstances(), dataset.nblabels())
        : null;
   
    Builder forestBuilder;
   
    if (isPartial) {
View Full Code Here

TOP

Related Classes of org.apache.mahout.df.data.Dataset

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.