printKeyOpt).create();
try {
Parser parser = new Parser();
parser.setGroup(group);
CommandLine cmdLine = parser.parse(args);
if (cmdLine.hasOption(helpOpt)) {
printHelp(group);
return;
}
if (cmdLine.hasOption(seqOpt)) {
Path path = new Path(cmdLine.getValue(seqOpt).toString());
System.out.println("Input Path: " + path);
JobClient client = new JobClient();
JobConf conf = new JobConf(Job.class);
client.setConf(conf);
FileSystem fs = FileSystem.get(path.toUri(), conf);
String dictionaryType = "text";
if (cmdLine.hasOption(dictTypeOpt)) {
dictionaryType = cmdLine.getValue(dictTypeOpt).toString();
}
String[] dictionary = null;
if (cmdLine.hasOption(dictOpt)) {
if (dictionaryType.equals("text")) {
dictionary = VectorHelper.loadTermDictionary(new File(cmdLine.getValue(dictOpt).toString()));
} else if (dictionaryType.equals("sequencefile")) {
dictionary = VectorHelper.loadTermDictionary(conf, fs, cmdLine.getValue(dictOpt).toString());
} else {
throw new OptionException(dictTypeOpt);
}
}
boolean useJSON = cmdLine.hasOption(centroidJSonOpt);
SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, conf);
SequenceFileVectorIterable vectorIterable = new SequenceFileVectorIterable(reader, cmdLine
.hasOption(vectorAsKeyOpt));
Writer writer;
if (cmdLine.hasOption(outputOpt)) {
writer = new FileWriter(cmdLine.getValue(outputOpt).toString());
} else {
writer = new OutputStreamWriter(System.out);
}
boolean printKey = cmdLine.hasOption(printKeyOpt);
SeqFileIterator iterator = (SeqFileIterator) vectorIterable.iterator();
int i = 0;
while (iterator.hasNext()) {
Vector vector = iterator.next();
if (printKey) {
writer.write(iterator.key().toString());
writer.write("\t");
}
String fmtStr = useJSON ? vector.asFormatString() : VectorHelper.vectorToString(vector, dictionary);
writer.write(fmtStr);
writer.write('\n');
i++;
}
writer.flush();
if (cmdLine.hasOption(outputOpt)) {
writer.close();
}
System.err.println("Dumped " + i + " Vectors");
}