Examples of org.apache.mahout.df.data.Dataset

org.apache.mahout.df.data.Dataset
Contains informations about the attributes.

    log.info("DistributedCache.getCacheFiles(): {}", ArrayUtils.toString(files));
    
    Preconditions.checkArgument(files != null && files.length >= 2, "missing paths from the DistributedCache" );
    
    Path datasetPath = new Path(files[0].getPath());
    Dataset dataset = Dataset.load(conf, datasetPath);
    
    int numMaps = Builder.getNumMaps(conf);
    int p = conf.getInt("mapred.task.partition", -1);
    
    // total number of trees in the forest

View Full Code Here


    // write the data to a file
    Path dataPath = Utils.writeDataToTestFile(sData);


    // prepare a data converter
    Dataset dataset = DataLoader.generateDataset(descriptor, sData);
    DataConverter converter = new DataConverter(dataset);


    Job job = new Job();
    job.setInputFormatClass(TextInputFormat.class);
    FileInputFormat.setInputPaths(job, dataPath);

View Full Code Here

    String descriptor = DescriptorUtils.generateDescriptor(description);
    
    Path fPath = validateOutput(filePath);
    
    log.info("generating the dataset...");
    Dataset dataset = generateDataset(descriptor, dataPath);
    
    log.info("storing the dataset description");
    DFUtils.storeWritable(new Configuration(), fPath, dataset);
  }

View Full Code Here

    
    @Override
    protected void setup(Context context) throws IOException, InterruptedException {
      Configuration conf = context.getConfiguration();
      
      Dataset dataset = Builder.loadDataset(conf);
      setup(dataset);
    }

View Full Code Here

    private int nblabels;
    
    @Override
    protected void setup(Context context) throws IOException, InterruptedException {
      Configuration conf = context.getConfiguration();
      Dataset dataset = Builder.loadDataset(conf);
      setup(dataset.nblabels());
    }

View Full Code Here


      if ((files == null) || (files.length < 2)) {
        throw new IOException("not enough paths in the DistributedCache");
      }


      Dataset dataset = Dataset.load(conf, new Path(files[0].getPath()));


      converter = new DataConverter(dataset);


      forest = DecisionForest.load(conf, new Path(files[1].getPath()));
      if (forest == null) {

View Full Code Here

      partPaths[p] = new Path(partsPath, String.format(Locale.ENGLISH, "part.%03d", p));
      files[p] = pfs.create(partPaths[p]);
    }
    
    Path datasetPath = new Path(datasetStr);
    Dataset dataset = Dataset.load(conf, datasetPath);
    
    // currents[label] = next partition file where to place the tuple
    int[] currents = new int[dataset.nblabels()];
    
    // currents is initialized randomly in the range [0, numpartitions[
    Random random = RandomUtils.getRandom();
    for (int c = 0; c < currents.length; c++) {
      currents[c] = random.nextInt(numPartitions);
    }
    
    // foreach tuple of the data
    Path dataPath = new Path(dataStr);
    FileSystem ifs = dataPath.getFileSystem(conf);
    FSDataInputStream input = ifs.open(dataPath);
    Scanner scanner = new Scanner(input);
    DataConverter converter = new DataConverter(dataset);
    int nbInstances = dataset.nbInstances();
    
    int id = 0;
    while (scanner.hasNextLine()) {
      if (id % 1000 == 0) {
        log.info("progress : {} / {}", id, nbInstances);

View Full Code Here

    }


    // store the data into a file
    String[] sData = Utils.double2String(source);
    Path dataPath = Utils.writeDataToTestFile(sData);
    Dataset dataset = DataLoader.generateDataset(descriptor, sData);
    Data data = DataLoader.loadData(dataset, sData);


    Configuration conf = new Configuration();
    Step0JobTest.setMaxSplitSize(conf, dataPath, NUM_MAPS);

View Full Code Here


    // write the data to a file
    Path dataPath = Utils.writeDataToTestFile(sData);


    // prepare a data converter
    Dataset dataset = DataLoader.generateDataset(descriptor, sData);
    DataConverter converter = new DataConverter(dataset);


    Job job = new Job();
    job.setInputFormatClass(TextInputFormat.class);
    FileInputFormat.setInputPaths(job, dataPath);

View Full Code Here


    // prepare the data
    String descriptor = Utils.randomDescriptor(rng, nbAttributes);
    double[][] source = Utils.randomDoubles(rng, descriptor, nbInstances);
    String[] sData = Utils.double2String(source);
    Dataset dataset = DataLoader.generateDataset(descriptor, sData);
    String[][] splits = Utils.splitData(sData, nbMappers);


    // prepare first step output
    TreeID[] keys = new TreeID[nbTrees];
    Node[] trees = new Node[nbTrees];

View Full Code Here

0 1 2 3 4 5

TOP

Related Classes of org.apache.mahout.df.data.Dataset

org.apache.hadoop.fs.FileSystem

org.apache.hadoop.fs.FSDataInputStream

org.apache.mahout.df.BreimanExample

org.apache.mahout.df.builder.InfiniteRecursionTest

org.apache.mahout.df.mapred.BuildForest

org.apache.mahout.df.mapred.partial.PartitionBugTest

org.apache.mahout.df.mapred.partial.Step0JobTest

org.apache.mahout.df.mapred.partial.Step1MapperTest

org.apache.mahout.df.mapred.partial.Step2Mapper

org.apache.mahout.df.mapred.partial.Step2MapperTest

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.