Package org.apache.spark.api.java

Examples of org.apache.spark.api.java.JavaSparkContext


    if (args.length != 4) {
      System.err.println("Usage: JavaLR <master> <input_dir> <step_size> <niters>");
      System.exit(1);
    }

    JavaSparkContext sc = new JavaSparkContext(args[0], "JavaLR",
        System.getenv("SPARK_HOME"), JavaSparkContext.jarOfClass(JavaLR.class));
    JavaRDD<String> lines = sc.textFile(args[1]);
    JavaRDD<LabeledPoint> points = lines.map(new ParsePoint()).cache();
    double stepSize = Double.parseDouble(args[2]);
    int iterations = Integer.parseInt(args[3]);

    // Another way to configure LogisticRegression
View Full Code Here


    if (args.length < 3) {
      System.err.println("Usage: JavaHdfsLR <master> <file> <iters>");
      System.exit(1);
    }

    JavaSparkContext sc = new JavaSparkContext(args[0], "JavaHdfsLR",
        System.getenv("SPARK_HOME"), JavaSparkContext.jarOfClass(JavaHdfsLR.class));
    JavaRDD<String> lines = sc.textFile(args[1]);
    JavaRDD<DataPoint> points = lines.map(new ParsePoint()).cache();
    int ITERATIONS = Integer.parseInt(args[2]);

    // Initialize w to a random value
    double[] w = new double[D];
View Full Code Here

    if (args.length >= 5) {
      runs = Integer.parseInt(args[4]);
    }

    JavaSparkContext sc = new JavaSparkContext(args[0], "JavaKMeans",
        System.getenv("SPARK_HOME"), JavaSparkContext.jarOfClass(JavaKMeans.class));
    JavaRDD<String> lines = sc.textFile(inputFile);

    JavaRDD<Vector> points = lines.map(new ParsePoint());

    KMeansModel model = KMeans.train(points.rdd(), k, iterations, runs, KMeans.K_MEANS_PARALLEL());
View Full Code Here

    if (args.length == 0) {
      System.err.println("Usage: JavaTC <host> [<slices>]");
      System.exit(1);
    }

    JavaSparkContext sc = new JavaSparkContext(args[0], "JavaTC",
        System.getenv("SPARK_HOME"), JavaSparkContext.jarOfClass(JavaTC.class));
    Integer slices = (args.length > 1) ? Integer.parseInt(args[1]): 2;
    JavaPairRDD<Integer, Integer> tc = sc.parallelizePairs(generateGraph(), slices).cache();

    // Linear transitive closure: each round grows paths by one edge,
    // by joining the graph's edges with the already-discovered paths.
    // e.g. join the path (y, z) from the TC with the edge (x, y) from
    // the graph to obtain the path (x, z).
View Full Code Here

    if (args.length == 0) {
      System.err.println("Usage: JavaLogQuery <master> [logFile]");
      System.exit(1);
    }

    JavaSparkContext jsc = new JavaSparkContext(args[0], "JavaLogQuery",
      System.getenv("SPARK_HOME"), JavaSparkContext.jarOfClass(JavaLogQuery.class));

    JavaRDD<String> dataSet = (args.length == 2) ? jsc.textFile(args[1]) : jsc.parallelize(exampleApacheLogs);

    JavaPairRDD<Tuple3<String, String, String>, Stats> extracted = dataSet.mapToPair(new PairFunction<String, Tuple3<String, String, String>, Stats>() {
      @Override
      public Tuple2<Tuple3<String, String, String>, Stats> call(String s) {
        return new Tuple2<Tuple3<String, String, String>, Stats>(extractKey(s), extractStats(s));
View Full Code Here

    if (args.length < 3) {
      System.err.println("Usage: JavaPageRank <master> <file> <number_of_iterations>");
      System.exit(1);
    }

    JavaSparkContext ctx = new JavaSparkContext(args[0], "JavaPageRank",
      System.getenv("SPARK_HOME"), JavaSparkContext.jarOfClass(JavaPageRank.class));

    // Loads in input file. It should be in format of:
    //     URL         neighbor URL
    //     URL         neighbor URL
    //     URL         neighbor URL
    //     ...
    JavaRDD<String> lines = ctx.textFile(args[1], 1);

    // Loads all URLs from input file and initialize their neighbors.
    JavaPairRDD<String, Iterable<String>> links = lines.mapToPair(new PairFunction<String, String, String>() {
      @Override
      public Tuple2<String, String> call(String s) {
View Full Code Here

    if (args.length == 0) {
      System.err.println("Usage: JavaSparkPi <master> [slices]");
      System.exit(1);
    }

    JavaSparkContext jsc = new JavaSparkContext(args[0], "JavaSparkPi",
      System.getenv("SPARK_HOME"), JavaSparkContext.jarOfClass(JavaSparkPi.class));

    int slices = (args.length == 2) ? Integer.parseInt(args[1]) : 2;
    int n = 100000 * slices;
    List<Integer> l = new ArrayList<Integer>(n);
    for (int i = 0; i < n; i++) {
      l.add(i);
    }

    JavaRDD<Integer> dataSet = jsc.parallelize(l, slices);

    int count = dataSet.map(new Function<Integer, Integer>() {
      @Override
      public Integer call(Integer integer) {
        double x = Math.random() * 2 - 1;
View Full Code Here

    int blocks = -1;
    if (args.length == 6) {
      blocks = Integer.parseInt(args[5]);
    }

    JavaSparkContext sc = new JavaSparkContext(args[0], "JavaALS",
        System.getenv("SPARK_HOME"), JavaSparkContext.jarOfClass(JavaALS.class));
    JavaRDD<String> lines = sc.textFile(args[1]);

    JavaRDD<Rating> ratings = lines.map(new ParseRating());

    MatrixFactorizationModel model = ALS.train(ratings.rdd(), rank, iterations, 0.01, blocks);
View Full Code Here

public class JavaNaiveBayesSuite implements Serializable {
  private transient JavaSparkContext sc;

  @Before
  public void setUp() {
    sc = new JavaSparkContext("local", "JavaNaiveBayesSuite");
  }
View Full Code Here

public class JavaALSSuite implements Serializable {
  private transient JavaSparkContext sc;

  @Before
  public void setUp() {
    sc = new JavaSparkContext("local", "JavaALS");
  }
View Full Code Here

TOP

Related Classes of org.apache.spark.api.java.JavaSparkContext

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.