boolean sequentialAccess) throws IOException {
if (normPower != NO_NORMALIZING && normPower < 0) {
throw new IllegalArgumentException("normPower must either be -1 or >= 0");
}
Configurable client = new JobClient();
JobConf conf = new JobConf(PartialVectorMerger.class);
conf.set("io.serializations", "org.apache.hadoop.io.serializer.JavaSerialization,"
+ "org.apache.hadoop.io.serializer.WritableSerialization");
// this conf parameter needs to be set enable serialisation of conf values
conf.setJobName("PartialVectorMerger::MergePartialVectors");
conf.setBoolean(SEQUENTIAL_ACCESS, sequentialAccess);
conf.setInt(DIMENSION, dimension);
conf.setFloat(NORMALIZATION_POWER, normPower);
conf.setOutputKeyClass(Text.class);
conf.setOutputValueClass(VectorWritable.class);
FileInputFormat.setInputPaths(conf, getCommaSeparatedPaths(partialVectorPaths));
Path outputPath = new Path(output);
FileOutputFormat.setOutputPath(conf, outputPath);
conf.setMapperClass(IdentityMapper.class);
conf.setInputFormat(SequenceFileInputFormat.class);
conf.setReducerClass(PartialVectorMergeReducer.class);
conf.setOutputFormat(SequenceFileOutputFormat.class);
FileSystem dfs = FileSystem.get(outputPath.toUri(), conf);
if (dfs.exists(outputPath)) {
dfs.delete(outputPath, true);
}
client.setConf(conf);
JobClient.runJob(conf);
}