Package org.apache.hama.bsp

Examples of org.apache.hama.bsp.BSPJob


      conf.set("layerSizeArray", layerSizeArraySb.toString());
    }

    HamaConfiguration hamaConf = new HamaConfiguration(conf);

    BSPJob job = new BSPJob(hamaConf, SmallMLPTrainer.class);
    job.setJobName("Small scale MLP training");
    job.setJarByClass(SmallMLPTrainer.class);
    job.setBspClass(SmallMLPTrainer.class);
    job.setInputPath(dataInputPath);
    job.setInputFormat(org.apache.hama.bsp.SequenceFileInputFormat.class);
    job.setInputKeyClass(LongWritable.class);
    job.setInputValueClass(VectorWritable.class);
    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(NullWritable.class);
    job.setOutputFormat(org.apache.hama.bsp.NullOutputFormat.class);

    int numTasks = conf.getInt("tasks", 1);
    job.setNumBspTask(numTasks);
    job.waitForCompletion(true);

    // reload learned model
    Log.info(String.format("Reload model from %s.",
        trainingParams.get("modelPath")));
    this.modelPath = trainingParams.get("modelPath");
View Full Code Here


  /**
   * Method which actually starts SpMV.
   */
  private static void startTask(HamaConfiguration conf) throws IOException,
      InterruptedException, ClassNotFoundException {
    BSPJob bsp = new BSPJob(conf, SpMV.class);
    bsp.setJobName("Sparse matrix vector multiplication");
    bsp.setBspClass(SpMVBSP.class);
    /*
     * Input matrix is presented as pairs of integer and SparseVectorWritable.
     * Output is pairs of integer and double
     */
    bsp.setInputFormat(SequenceFileInputFormat.class);
    bsp.setInputKeyClass(IntWritable.class);
    bsp.setInputValueClass(SparseVectorWritable.class);
    bsp.setOutputKeyClass(IntWritable.class);
    bsp.setOutputValueClass(DoubleWritable.class);
    bsp.setOutputFormat(SequenceFileOutputFormat.class);
    bsp.setInputPath(new Path(conf.get(inputMatrixPathString)));

    FileOutputFormat.setOutputPath(bsp, new Path(conf.get(outputPathString)));

    BSPJobClient jobClient = new BSPJobClient(conf);
    ClusterStatus cluster = jobClient.getClusterStatus(true);

    int requestedTasks = conf.getInt(requestedBspTasksString, -1);
    if (requestedTasks != -1) {
      bsp.setNumBspTask(requestedTasks);
    } else {
      bsp.setNumBspTask(cluster.getMaxTasks());
    }

    long startTime = System.currentTimeMillis();
    if (bsp.waitForCompletion(true)) {
      LOG.info("Job Finished in " + (System.currentTimeMillis() - startTime)
          / 1000.0 + " seconds.");
      convertToDenseVector(conf);
      LOG.info("Result is in " + getResultPath());
    } else {
View Full Code Here

    this.writeModelToFile();

    HamaConfiguration hamaConf = new HamaConfiguration(conf);

    // create job
    BSPJob job = new BSPJob(hamaConf, SmallLayeredNeuralNetworkTrainer.class);
    job.setJobName("Small scale Neural Network training");
    job.setJarByClass(SmallLayeredNeuralNetworkTrainer.class);
    job.setBspClass(SmallLayeredNeuralNetworkTrainer.class);
    job.setInputPath(dataInputPath);
    job.setInputFormat(org.apache.hama.bsp.SequenceFileInputFormat.class);
    job.setInputKeyClass(LongWritable.class);
    job.setInputValueClass(VectorWritable.class);
    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(NullWritable.class);
    job.setOutputFormat(org.apache.hama.bsp.NullOutputFormat.class);

    int numTasks = conf.getInt("tasks", 1);
    Log.info(String.format("Number of tasks: %d\n", numTasks));
    job.setNumBspTask(numTasks);
    job.waitForCompletion(true);

    // reload learned model
    Log.info(String.format("Reload model from %s.", this.modelPath));
    this.readFromModel();
View Full Code Here

   * {@inheritDoc}
   */
  @Override
  public boolean train() {
    try {
      BSPJob job = setupJob();
      boolean res = job.waitForCompletion(true);
      return res;
    } catch (IOException e) {
      e.printStackTrace();
    } catch (InterruptedException e) {
      e.printStackTrace();
View Full Code Here

    }
    return false;
  }

  private BSPJob setupJob() throws IOException {
    BSPJob job = new BSPJob(conf, OnlineCF.class);

    String input = conf.get(OnlineCF.Settings.CONF_INPUT_PATH, null);
    String output = conf.get(OnlineCF.Settings.CONF_OUTPUT_PATH, null);
    Path in = new Path(input);
    Path out = new Path(output);

    if (conf.getInt(OnlineCF.Settings.CONF_MATRIX_RANK, -1) == -1) {
      conf.setInt(OnlineCF.Settings.CONF_MATRIX_RANK, OnlineCF.Settings.DFLT_MATRIX_RANK);
    }
   
    if (conf.getInt(OnlineCF.Settings.CONF_ITERATION_COUNT, -1) == -1) {
      conf.setInt(OnlineCF.Settings.CONF_ITERATION_COUNT, OnlineCF.Settings.DFLT_ITERATION_COUNT);
    }

    if (conf.getInt(OnlineCF.Settings.CONF_SKIP_COUNT, -1) == -1) {
      conf.setInt(OnlineCF.Settings.CONF_SKIP_COUNT, OnlineCF.Settings.DFLT_SKIP_COUNT);
    }
   
    if (conf.getClass(OnlineCF.Settings.CONF_ONLINE_UPDATE_FUNCTION, null) == null) {
      conf.setClass(OnlineCF.Settings.CONF_ONLINE_UPDATE_FUNCTION, OnlineCF.Settings.DFLT_UPDATE_FUNCTION,
              OnlineUpdate.Function.class);
    }
    conf.set(OnlineCF.Settings.CONF_MODEL_USER_DELIM, OnlineCF.Settings.DFLT_MODEL_USER_DELIM);
    conf.set(OnlineCF.Settings.CONF_MODEL_USER_FEATURE_DELIM, OnlineCF.Settings.DFLT_MODEL_USER_MTX_FEATURES_DELIM);
    conf.set(OnlineCF.Settings.CONF_MODEL_ITEM_DELIM, OnlineCF.Settings.DFLT_MODEL_ITEM_DELIM);
    conf.set(OnlineCF.Settings.CONF_MODEL_ITEM_FEATURE_DELIM, OnlineCF.Settings.DFLT_MODEL_ITEM_MTX_FEATURES_DELIM);

    job.setJobName("Online CF");
    job.setBoolean(Constants.ENABLE_RUNTIME_PARTITIONING, true);
    job.setPartitioner(HashPartitioner.class);
    job.setBspClass(OnlineTrainBSP.class);

    job.setInputPath(in);
    job.setInputFormat(SequenceFileInputFormat.class);
    job.setInputKeyClass(Text.class);
    job.setInputValueClass(VectorWritable.class);

    job.setOutputPath(out);
    job.setOutputFormat(SequenceFileOutputFormat.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(VectorWritable.class);

    job.setNumBspTask(conf.getInt(OnlineCF.Settings.CONF_TASK_COUNT, job.getNumBspTask()));
    return job;
  }
View Full Code Here

   * Creates a basic job with sequencefiles as in and output.
   */
  public static BSPJob createJob(Configuration cnf, Path in, Path out,
      boolean textOut) throws IOException {
    HamaConfiguration conf = new HamaConfiguration(cnf);
    BSPJob job = new BSPJob(conf, KMeansBSP.class);
    job.setJobName("KMeans Clustering");
    job.setJarByClass(KMeansBSP.class);
    job.setBspClass(KMeansBSP.class);
    job.setInputPath(in);
    job.setOutputPath(out);
    job.setInputFormat(org.apache.hama.bsp.SequenceFileInputFormat.class);
    if (textOut)
      job.setOutputFormat(org.apache.hama.bsp.TextOutputFormat.class);
    else
      job.setOutputFormat(org.apache.hama.bsp.SequenceFileOutputFormat.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(VectorWritable.class);
    return job;
  }
View Full Code Here

    conf.set("bsp.local.tasks.maximum", ""
        + Runtime.getRuntime().availableProcessors());
    // deactivate (set to false) if you want to iterate over disk, else it will
    // cache the input vectors in memory
    conf.setBoolean(CACHING_ENABLED_KEY, true);
    BSPJob job = createJob(conf, in, out, false);

    LOG.info("N: " + count + " k: " + k + " Dimension: " + dimension
        + " Iterations: " + iterations);

    FileSystem fs = FileSystem.get(conf);
    // prepare the input, like deleting old versions and creating centers
    prepareInput(count, k, dimension, conf, in, center, out, fs);
    if (args.length == 7) {
      job.setNumBspTask(Integer.parseInt(args[6]));
    }

    // just submit the job
    job.waitForCompletion(true);
  }
View Full Code Here

  public static void main(String[] args) throws InterruptedException,
      IOException, ClassNotFoundException {
    // BSP job configuration
    HamaConfiguration conf = new HamaConfiguration();

    BSPJob bsp = new BSPJob(conf, PiEstimator.class);
    bsp.setCompressionCodec(SnappyCompressor.class);
    bsp.setCompressionThreshold(40);
   
    // Set the job name
    bsp.setJobName("Pi Estimation Example");
    bsp.setBspClass(MyEstimator.class);
    bsp.setInputFormat(NullInputFormat.class);
    bsp.setOutputKeyClass(Text.class);
    bsp.setOutputValueClass(DoubleWritable.class);
    bsp.setOutputFormat(TextOutputFormat.class);
    FileOutputFormat.setOutputPath(bsp, TMP_OUTPUT);

    BSPJobClient jobClient = new BSPJobClient(conf);
    ClusterStatus cluster = jobClient.getClusterStatus(true);

    if (args.length > 0) {
      bsp.setNumBspTask(Integer.parseInt(args[0]));
    } else {
      // Set to maximum
      bsp.setNumBspTask(cluster.getMaxTasks());
    }

    long startTime = System.currentTimeMillis();
    if (bsp.waitForCompletion(true)) {
      printOutput(conf);
      System.out.println("Job Finished in "
          + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");
    }
  }
View Full Code Here

      }
      // Set the last argument to TRUE if first column is required to be the key
      in = KMeansBSP.prepareInputText(k, conf, in, center, out, fs, true);
    }

    BSPJob job = KMeansBSP.createJob(conf, in, out, true);

    long startTime = System.currentTimeMillis();
    // just submit the job
    if (job.waitForCompletion(true)) {
      System.out.println("Job Finished in "
          + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");
    }

    System.out.println("\nHere are a few lines of output:");
View Full Code Here

    cleanup(fs, inputPath);
    cleanup(fs, outputPath);
  }

  static BSPJob getSummationJob(HamaConfiguration conf) throws IOException {
    BSPJob bsp = new BSPJob(conf);
    bsp.setInputFormat(KeyValueTextInputFormat.class);
    bsp.setInputKeyClass(Text.class);
    bsp.setInputValueClass(Text.class);
    bsp.setOutputFormat(SequenceFileOutputFormat.class);
    bsp.setOutputKeyClass(NullWritable.class);
    bsp.setOutputValueClass(DoubleWritable.class);
    bsp.setMessageClass(DoubleWritable.class);
    return bsp;
  }
View Full Code Here

TOP

Related Classes of org.apache.hama.bsp.BSPJob

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.