Package io.prediction.examples.java.parallel

Source Code of io.prediction.examples.java.parallel.DataSource

package io.prediction.examples.java.parallel;

import io.prediction.controller.java.EmptyParams;
import io.prediction.controller.java.PJavaDataSource;

import java.util.List;
import java.util.ArrayList;

import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.PairFunction;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import scala.Tuple2;
import scala.Tuple3;

public class DataSource extends PJavaDataSource<
  EmptyParams, Object, JavaPairRDD<String, Float>, Query, Object> {

  final static Logger logger = LoggerFactory.getLogger(DataSource.class);

  public DataSource() {
  }

  @Override
  public Iterable<Tuple3<Object, JavaPairRDD<String, Float>, JavaPairRDD<Query, Object>>>
      read(JavaSparkContext jsc) {
    JavaPairRDD<String, Float> readings = jsc.textFile("../data/helloworld/data.csv")
      .mapToPair(new PairFunction<String, String, Float>() {
        @Override
        public Tuple2 call(String line) {
          String[] tokens = line.split("[\t,]");
          Tuple2 reading = null;
          try {
            reading = new Tuple2(
              tokens[0],
              Float.parseFloat(tokens[1]));
          } catch (Exception e) {
            logger.error("Can't parse reading file. Caught Exception: " + e.getMessage());
            System.exit(1);
          }
          return reading;
        }
      });

    List<Tuple3<Object, JavaPairRDD<String, Float>, JavaPairRDD<Query, Object>>> data =
      new ArrayList<>();

    data.add(new Tuple3(
      null,
      readings,
      jsc.parallelizePairs(new ArrayList<Tuple2<Query, Object>>())
    ));

    return data;
  }
}
TOP

Related Classes of io.prediction.examples.java.parallel.DataSource

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.