String outFile= arg(args, "-out", null);
String sparkhome = arg(args, "-spark", System.getenv("SPARK_HOME"));
String jars[] = arg(args, "-jars", "AR.jar:ARApp.jar:ARExt.jar").split(":");
boolean partition = Boolean.parseBoolean(arg(args, "-partitions", "true"));
JavaSparkContext ctx = new JavaSparkContext(host, "Abstract-Rendering", sparkhome, jars);
OptionDataset<G,I> dataset;
try {
dataset= (OptionDataset) OptionDataset.class.getField(config).get(null);
} catch (
IllegalAccessException |
IllegalArgumentException |
NoSuchFieldException | NullPointerException | SecurityException e) {
throw new IllegalArgumentException("Could not find -config indicated: " + config);
}
JavaRDD<Indexed> base;
File sourceFile = dataset.sourceFile;
if (!sourceFile.getName().endsWith(".csv")) {
JavaPairRDD<LongWritable, DataInputRecord> source = ctx.hadoopFile(sourceFile.getPath(), HBINInputFormat.class, LongWritable.class, DataInputRecord.class);
base = (JavaRDD<Indexed>) (JavaRDD) source.map(new Function<Tuple2<LongWritable, DataInputRecord>, DataInputRecord>() {
public DataInputRecord call(Tuple2<LongWritable, DataInputRecord> pair) throws Exception {return pair._2;}
});
} else {
JavaRDD<String> source = ctx.textFile(sourceFile.getCanonicalPath());
base = source.map(new StringToIndexed("\\s*,\\s*"));
}
Glypher<G,I> glypher = new Glypher<G,I>(dataset.shaper,dataset.valuer);
GlyphsetRDD<G, I> glyphs = new GlyphsetRDD<>(base.map(glypher), true, partition);