Package edu.isi.karma.mapreduce.function

Source Code of edu.isi.karma.mapreduce.function.CreateJSONFromSequenceFile

package edu.isi.karma.mapreduce.function;

import java.io.File;
import java.io.IOException;
import java.io.PrintWriter;

import org.apache.commons.cli2.CommandLine;
import org.apache.commons.cli2.Group;
import org.apache.commons.cli2.Option;
import org.apache.commons.cli2.builder.ArgumentBuilder;
import org.apache.commons.cli2.builder.DefaultOptionBuilder;
import org.apache.commons.cli2.builder.GroupBuilder;
import org.apache.commons.cli2.commandline.Parser;
import org.apache.commons.cli2.util.HelpFormatter;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.LocatedFileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.RemoteIterator;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Text;

public class CreateJSONFromSequenceFile {
  static String filePath = null;
  static String outputPath = null;
  public static void main(String[] args) throws IOException {
    Group options = createCommandLineOptions();
        Parser parser = new Parser();
        parser.setGroup(options);
        HelpFormatter hf = new HelpFormatter();
        parser.setHelpFormatter(hf);
        parser.setHelpTrigger("--help");
        CommandLine cl = parser.parseAndHelp(args);
        if (cl == null || cl.getOptions().size() == 0 || cl.hasOption("--help") || !cl.hasOption("--filepath")) {
            hf.setGroup(options);
            hf.print();
            return;
        }
    filePath = (String) cl.getValue("--filepath");
    outputPath = filePath;
    if (cl.hasOption("--outputpath")) {
      outputPath = (String) cl.getValue("--outputpath");
    }
    FileSystem hdfs = FileSystem.get(new Configuration());
    RemoteIterator<LocatedFileStatus> itr = hdfs.listFiles(new Path(filePath), true);
    while (itr.hasNext()) {
      LocatedFileStatus status = itr.next();
      String fileName = status.getPath().getName();
      if (status.getLen() > 0) {
        String outputFileName = outputPath + File.separator + fileName + ".json";
        createJSONFromSequenceFileFrom(status.getPath(), hdfs.create(new Path(outputFileName)));
      }
    }
  }

  public static void createJSONFromSequenceFileFrom(Path input, FSDataOutputStream fsDataOutputStream) throws IOException {
    Path inputPath = input;
    Configuration conf = new Configuration();
    PrintWriter fw = new PrintWriter(fsDataOutputStream);
    SequenceFile.Reader reader = new SequenceFile.Reader(conf, SequenceFile.Reader.file(inputPath));
    Text key = new Text();
    Text val = new Text();
    fw.write("[\n");
    boolean writtenyet = false;
    while(reader.next(key, val))
    {
      if(!writtenyet)
      {
        writtenyet=true;
      }
      else
      {
        fw.write(",\n");
      }
      fw.write(val.toString());
    }
    fw.write("\n]\n");
    fw.close();
    reader.close();
  }
 
  private static Group createCommandLineOptions() {
    DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
    ArgumentBuilder abuilder = new ArgumentBuilder();
    GroupBuilder gbuilder = new GroupBuilder();

    Group options =
        gbuilder
        .withName("options")
        .withOption(buildOption("filepath", "location of the input file directory", "filepath", obuilder, abuilder))
        .withOption(buildOption("outputpath", "location of output file directory", "outputpath", obuilder, abuilder))
        .withOption(obuilder
            .withLongName("help")
            .withDescription("print this message")
            .create())
            .create();

    return options;
  }

  public static Option buildOption(String shortName, String description, String argumentName,
      DefaultOptionBuilder obuilder, ArgumentBuilder abuilder) {
    return obuilder
        .withLongName(shortName)
        .withDescription(description)
        .withArgument(
            abuilder
            .withName(argumentName)
            .withMinimum(1)
            .withMaximum(1)
            .create())
            .create();
  }
 


}
TOP

Related Classes of edu.isi.karma.mapreduce.function.CreateJSONFromSequenceFile

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.