Package org.apache.mahout.classifier.df.data

Examples of org.apache.mahout.classifier.df.data.DataConverter


   
    /**
     * Useful when testing
     */
    void setup(Dataset dataset) {
      converter = new DataConverter(dataset);
    }
View Full Code Here


        throw new IOException("not enough paths in the DistributedCache");
      }
     
      dataset = Dataset.load(conf, new Path(files[0].getPath()));

      converter = new DataConverter(dataset);

      forest = DecisionForest.load(conf, new Path(files[1].getPath()));
      if (forest == null) {
        throw new InterruptedException("DecisionForest not found!");
      }
View Full Code Here

   *          number of running map tasks
   * @param numTrees
   *          total number of trees in the forest
   */
  protected void configure(Long seed, int partition, int numMapTasks, int numTrees) {
    converter = new DataConverter(getDataset());
   
    // prepare random-numders generator
    log.debug("seed : {}", seed);
    if (seed == null) {
      rng = RandomUtils.getRandom();
View Full Code Here

    // foreach tuple of the data
    Path dataPath = new Path(dataStr);
    FileSystem ifs = dataPath.getFileSystem(conf);
    FSDataInputStream input = ifs.open(dataPath);
    Scanner scanner = new Scanner(input, "UTF-8");
    DataConverter converter = new DataConverter(dataset);
   
    int id = 0;
    while (scanner.hasNextLine()) {
      if (id % 1000 == 0) {
        log.info("progress : {}", id);
      }
     
      String line = scanner.nextLine();
      if (line.isEmpty()) {
        continue; // skip empty lines
      }
     
      // write the tuple in files[tuple.label]
      Instance instance = converter.convert(line);
      int label = (int) dataset.getLabel(instance);
      files[currents[label]].writeBytes(line);
      files[currents[label]].writeChar('\n');
     
      // update currents
View Full Code Here

   
    /**
     * Useful when testing
     */
    void setup(Dataset dataset) {
      converter = new DataConverter(dataset);
    }
View Full Code Here

      if (files.length < 2) {
        throw new IOException("not enough paths in the DistributedCache");
      }
      dataset = Dataset.load(conf, files[0]);
      converter = new DataConverter(dataset);

      forest = DecisionForest.load(conf, files[1]);
      if (forest == null) {
        throw new InterruptedException("DecisionForest not found!");
      }
View Full Code Here

      return;
    }

    // load the dataset
    Dataset dataset = Dataset.load(getConf(), datasetPath);
    DataConverter converter = new DataConverter(dataset);

    log.info("Sequential classification...");
    long time = System.currentTimeMillis();

    Random rng = RandomUtils.getRandom();
View Full Code Here

   *          number of running map tasks
   * @param numTrees
   *          total number of trees in the forest
   */
  protected void configure(Long seed, int partition, int numMapTasks, int numTrees) {
    converter = new DataConverter(getDataset());
   
    // prepare random-numders generator
    log.debug("seed : {}", seed);
    if (seed == null) {
      rng = RandomUtils.getRandom();
View Full Code Here

   *          number of running map tasks
   * @param numTrees
   *          total number of trees in the forest
   */
  protected void configure(Long seed, int partition, int numMapTasks, int numTrees) {
    converter = new DataConverter(getDataset());
   
    // prepare random-numders generator
    log.debug("seed : {}", seed);
    if (seed == null) {
      rng = RandomUtils.getRandom();
View Full Code Here

        throw new IOException("not enough paths in the DistributedCache");
      }
     
      dataset = Dataset.load(conf, new Path(files[0].getPath()));

      converter = new DataConverter(dataset);

      forest = DecisionForest.load(conf, new Path(files[1].getPath()));
      if (forest == null) {
        throw new InterruptedException("DecisionForest not found!");
      }
View Full Code Here

TOP

Related Classes of org.apache.mahout.classifier.df.data.DataConverter

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.