Package org.apache.hama.bsp

Examples of org.apache.hama.bsp.BSPJob


   * Creates a basic job with sequencefiles as in and output.
   */
  public static BSPJob createJob(Configuration cnf, Path in, Path out,
      boolean textOut) throws IOException {
    HamaConfiguration conf = new HamaConfiguration(cnf);
    BSPJob job = new BSPJob(conf, KMeansBSP.class);
    job.setJobName("KMeans Clustering");
    job.setJarByClass(KMeansBSP.class);
    job.setBspClass(KMeansBSP.class);
    job.setInputPath(in);
    job.setOutputPath(out);
    job.setInputFormat(org.apache.hama.bsp.SequenceFileInputFormat.class);
    if (textOut)
      job.setOutputFormat(org.apache.hama.bsp.TextOutputFormat.class);
    else
      job.setOutputFormat(org.apache.hama.bsp.SequenceFileOutputFormat.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(VectorWritable.class);
    return job;
  }
View Full Code Here


    conf.set("bsp.local.tasks.maximum", ""
        + Runtime.getRuntime().availableProcessors());
    // deactivate (set to false) if you want to iterate over disk, else it will
    // cache the input vectors in memory
    conf.setBoolean(CACHING_ENABLED_KEY, true);
    BSPJob job = createJob(conf, in, out, false);

    LOG.info("N: " + count + " k: " + k + " Dimension: " + dimension
        + " Iterations: " + iterations);

    FileSystem fs = FileSystem.get(conf);
    // prepare the input, like deleting old versions and creating centers
    prepareInput(count, k, dimension, conf, in, center, out, fs);
    if (args.length == 7) {
      job.setNumBspTask(Integer.parseInt(args[6]));
    }

    // just submit the job
    job.waitForCompletion(true);
  }
View Full Code Here

      bw.write(sb.toString());
      bw.close();

      in = KMeansBSP.prepareInputText(k, conf, in, center, out, fs);

      BSPJob job = KMeansBSP.createJob(conf, in, out, true);

      // just submit the job
      boolean result = job.waitForCompletion(true);

      assertEquals(true, result);

      HashMap<Integer, DoubleVector> centerMap = KMeansBSP.readOutput(conf,
          out, centerOut, fs);
View Full Code Here

  public static void main(String[] args) throws InterruptedException,
      IOException, ClassNotFoundException {
    // BSP job configuration
    HamaConfiguration conf = new HamaConfiguration();

    BSPJob bsp = new BSPJob(conf, CombineExample.class);
    // Set the job name
    bsp.setJobName("Combine Example");
    bsp.setBspClass(MyBSP.class);
    bsp.setCombinerClass(SumCombiner.class);
    bsp.setInputFormat(NullInputFormat.class);
    bsp.setOutputKeyClass(Text.class);
    bsp.setOutputValueClass(IntWritable.class);
    bsp.setOutputFormat(TextOutputFormat.class);
    FileOutputFormat.setOutputPath(bsp, TMP_OUTPUT);
    bsp.setNumBspTask(2);

    long startTime = System.currentTimeMillis();
    if (bsp.waitForCompletion(true)) {
      printOutput(conf);
      System.out.println("Job Finished in "
          + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");
    }
View Full Code Here

          args);
      // get other arguments
      CommandLine results = parser.parse(cli.options,
          genericParser.getRemainingArgs());

      BSPJob job = new BSPJob(getConf());

      if (results.hasOption("input")) {
        FileInputFormat.setInputPaths(job, results.getOptionValue("input"));
      }
      if (results.hasOption("output")) {
        FileOutputFormat.setOutputPath(job,
            new Path(results.getOptionValue("output")));
      }
      if (results.hasOption("jar")) {
        job.setJar(results.getOptionValue("jar"));
      }

      if (results.hasOption("jobname")) {
        job.setJobName(results.getOptionValue("jobname"));
      }

      if (results.hasOption("inputformat")) {
        setIsJavaRecordReader(job.getConfiguration(), true);
        job.setInputFormat(getClass(results, "inputformat", conf,
            InputFormat.class));
      }

      if (results.hasOption("partitioner")) {
        job.setPartitioner(getClass(results, "partitioner", conf,
            Partitioner.class));
      }

      if (results.hasOption("outputformat")) {
        setIsJavaRecordWriter(job.getConfiguration(), true);
        job.setOutputFormat(getClass(results, "outputformat", conf,
            OutputFormat.class));
      }

      if (results.hasOption("streaming")) {
        LOG.info("Streaming enabled!");
        job.set("hama.streaming.enabled", "true");
      }

      if (results.hasOption("jobconf")) {
        LOG.warn("-jobconf option is deprecated, please use -D instead.");
        String options = results.getOptionValue("jobconf");
        StringTokenizer tokenizer = new StringTokenizer(options, ",");
        while (tokenizer.hasMoreTokens()) {
          String keyVal = tokenizer.nextToken().trim();
          String[] keyValSplit = keyVal.split("=", 2);
          job.set(keyValSplit[0], keyValSplit[1]);
        }
      }

      if (results.hasOption("bspTasks")) {
        int optionValue = Integer.parseInt(results.getOptionValue("bspTasks"));
        conf.setInt("bsp.local.tasks.maximum", optionValue);
        conf.setInt("bsp.peers.num", optionValue);
      }

      if (results.hasOption("program")) {
        String executablePath = results.getOptionValue("program");
        setExecutable(job.getConfiguration(), executablePath);
        DistributedCache.addCacheFile(new Path(executablePath).toUri(), conf);
      }

      if (results.hasOption("interpreter")) {
        job.getConfiguration().set("hama.pipes.executable.interpretor",
            results.getOptionValue("interpreter"));
      }

      if (results.hasOption("programArgs")) {
        job.getConfiguration().set("hama.pipes.executable.args",
            Joiner.on(" ").join(results.getOptionValues("programArgs")));
        // job.getConfiguration().set("hama.pipes.resolve.executable.args",
        // "true");
      }

      if (results.hasOption("cachefiles")) {
        FileSystem fs = FileSystem.get(getConf());
        String[] optionValues = results.getOptionValues("cachefiles");
        for (String s : optionValues) {
          Path path = new Path(s);
          FileStatus[] globStatus = fs.globStatus(path);
          for (FileStatus f : globStatus) {
            if (!f.isDir()) {
              DistributedCache.addCacheFile(f.getPath().toUri(),
                  job.getConfiguration());
            } else {
              LOG.info("Ignoring directory " + f.getPath() + " while globbing.");
            }
          }
        }
      }

      // if they gave us a jar file, include it into the class path
      String jarFile = job.getJar();
      if (jarFile != null) {
        @SuppressWarnings("deprecation")
        final URL[] urls = new URL[] { FileSystem.getLocal(conf)
            .pathToFile(new Path(jarFile)).toURL() };
        // FindBugs complains that creating a URLClassLoader should be
View Full Code Here

    } else {
      KMeansBSP.prepareInputText(k, conf, in, center, out, fs);
      in = new Path(args[0], "textinput/in.seq");
    }

    BSPJob job = KMeansBSP.createJob(conf, in, out, true);

    // just submit the job
    job.waitForCompletion(true);
  }
View Full Code Here

  /**
   * Method which actually starts SpMV.
   */
  private static void startTask(HamaConfiguration conf) throws IOException,
      InterruptedException, ClassNotFoundException {
    BSPJob bsp = new BSPJob(conf, SpMV.class);
    bsp.setJobName("Sparse matrix vector multiplication");
    bsp.setBspClass(SpMVBSP.class);
    /*
     * Input matrix is presented as pairs of integer and SparseVectorWritable.
     * Output is pairs of integer and double
     */
    bsp.setInputFormat(SequenceFileInputFormat.class);
    bsp.setInputKeyClass(IntWritable.class);
    bsp.setInputValueClass(SparseVectorWritable.class);
    bsp.setOutputKeyClass(IntWritable.class);
    bsp.setOutputValueClass(DoubleWritable.class);
    bsp.setOutputFormat(SequenceFileOutputFormat.class);
    bsp.setInputPath(new Path(conf.get(inputMatrixPathString)));

    FileOutputFormat.setOutputPath(bsp, new Path(conf.get(outputPathString)));

    BSPJobClient jobClient = new BSPJobClient(conf);
    ClusterStatus cluster = jobClient.getClusterStatus(true);

    int requestedTasks = conf.getInt(requestedBspTasksString, -1);
    if (requestedTasks != -1) {
      bsp.setNumBspTask(requestedTasks);
    } else {
      bsp.setNumBspTask(cluster.getMaxTasks());
    }

    long startTime = System.currentTimeMillis();
    if (bsp.waitForCompletion(true)) {
      LOG.info("Job Finished in " + (System.currentTimeMillis() - startTime)
          / 1000.0 + " seconds.");
      convertToDenseVector(conf);
      LOG.info("Result is in " + getResultPath());
    } else {
View Full Code Here

    HamaConfiguration conf = new HamaConfiguration();

    conf.setInt(SIZE_OF_MATRIX, Integer.parseInt(args[0]));
    conf.setInt(DENSITY, Integer.parseInt(args[1]));

    BSPJob bsp = new BSPJob(conf, CombineExample.class);
    // Set the job name
    bsp.setJobName("Random Symmetric Matrix Generator");
    bsp.setBspClass(SymmetricMatrixGenBSP.class);
    bsp.setInputFormat(NullInputFormat.class);
    bsp.setOutputKeyClass(Text.class);
    bsp.setOutputValueClass(TextArrayWritable.class);
    bsp.setOutputFormat(SequenceFileOutputFormat.class);
    FileOutputFormat.setOutputPath(bsp, new Path(args[2]));
    bsp.setNumBspTask(Integer.parseInt(args[3]));

    long startTime = System.currentTimeMillis();
    if (bsp.waitForCompletion(true)) {
      System.out.println("Job Finished in "
          + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");
    }
  }
View Full Code Here

            "unsupported RegressionModel").append(args[1])
            .append(", use 'logistic' or 'linear'").toString());
      }
    }

    BSPJob bsp = new BSPJob(conf, GradientDescentExample.class);
    // Set the job name
    bsp.setJobName("Gradient Descent Example");
    bsp.setBspClass(GradientDescentBSP.class);
    bsp.setInputFormat(VectorDoubleFileInputFormat.class);
    bsp.setInputPath(new Path(args[0]));
    bsp.setInputKeyClass(VectorWritable.class);
    bsp.setInputValueClass(DoubleWritable.class);
    bsp.setOutputKeyClass(VectorWritable.class);
    bsp.setOutputValueClass(DoubleWritable.class);
    bsp.setOutputFormat(TextOutputFormat.class);
    FileOutputFormat.setOutputPath(bsp, TMP_OUTPUT);

    long startTime = System.currentTimeMillis();
    if (bsp.waitForCompletion(true)) {
      printOutput(conf);
      System.out.println("Job Finished in "
          + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");
    }
View Full Code Here

    HamaConfiguration conf = new HamaConfiguration();

    conf.setInt(SIZE_OF_MATRIX, Integer.parseInt(args[0]));
    conf.setInt(MAX_EDGES, Integer.parseInt(args[1]));

    BSPJob bsp = new BSPJob(conf, FastGraphGenBSP.class);
    // Set the job name
    bsp.setJobName("Random Fast Matrix Generator");
    bsp.setBspClass(FastGraphGenBSP.class);
    bsp.setInputFormat(NullInputFormat.class);
    bsp.setOutputKeyClass(Text.class);
    bsp.setOutputValueClass(TextArrayWritable.class);
    bsp.setOutputFormat(SequenceFileOutputFormat.class);
    FileOutputFormat.setOutputPath(bsp, new Path(args[2]));
    bsp.setNumBspTask(Integer.parseInt(args[3]));

    long startTime = System.currentTimeMillis();
    if (bsp.waitForCompletion(true)) {
      System.out.println("Job Finished in "
          + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");
    }
  }
View Full Code Here

TOP

Related Classes of org.apache.hama.bsp.BSPJob

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.