Package org.apache.mahout.utils.io

Examples of org.apache.mahout.utils.io.ChunkedWriter


public final class SequenceFilesFromMailArchives {

  private static final Logger log = LoggerFactory.getLogger(SequenceFilesFromMailArchives.class);

  public void createSequenceFiles(MailOptions options) throws IOException {
    ChunkedWriter writer = new ChunkedWriter(new Configuration(), options.getChunkSize(), new Path(options.getOutputDir()));
    MailProcessor processor = new MailProcessor(options, options.getPrefix(), writer);
    try {
      if (options.getInput().isDirectory()) {
        PrefixAdditionFilter filter = new PrefixAdditionFilter(processor, writer);
        options.getInput().listFiles(filter);
View Full Code Here


  public static final String BASE_INPUT_PATH         = "baseinputpath";

  private static final int MAX_JOB_SPLIT_LOCATIONS = 1000000;

  public void createSequenceFiles(MailOptions options) throws IOException {
    ChunkedWriter writer = new ChunkedWriter(getConf(), options.getChunkSize(), new Path(options.getOutputDir()));
    MailProcessor processor = new MailProcessor(options, options.getPrefix(), writer);
    try {
      if (options.getInput().isDirectory()) {
        PrefixAdditionFilter filter = new PrefixAdditionFilter(processor, writer);
        options.getInput().listFiles(filter);
View Full Code Here

  }

  @Override
  protected void process(FileStatus fst, Path current) throws IOException {
    FileSystem fs = getFs();
    ChunkedWriter writer = getWriter();
    if (fst.isDir()) {
      String dirPath = getPrefix() + Path.SEPARATOR + current.getName() + Path.SEPARATOR + fst.getPath().getName();
      fs.listStatus(fst.getPath(),
                    new PrefixAdditionFilter(getConf(), dirPath, getOptions(), writer, getCharset(), fs));
    } else {
      InputStream in = null;
      try {
        in = fs.open(fst.getPath());

        StringBuilder file = new StringBuilder();
        for (String aFit : new FileLineIterable(in, getCharset(), false)) {
          file.append(aFit).append('\n');
        }
        String name = current.getName().equals(fst.getPath().getName())
            ? current.getName()
            : current.getName() + Path.SEPARATOR + fst.getPath().getName();
        writer.write(getPrefix() + Path.SEPARATOR + name, file.toString());
      } finally {
        Closeables.close(in, false);
      }
    }
  }
View Full Code Here

    throws IOException, InterruptedException, NoSuchMethodException {
    // Running sequentially
    Charset charset = Charset.forName(getOption(CHARSET_OPTION[0]));
    String keyPrefix = getOption(KEY_PREFIX_OPTION[0]);
    FileSystem fs = FileSystem.get(input.toUri(), conf);
    ChunkedWriter writer = new ChunkedWriter(conf, Integer.parseInt(options.get(CHUNK_SIZE_OPTION[0])), output);

    try {
      SequenceFilesFromDirectoryFilter pathFilter;
      String fileFilterClassName = options.get(FILE_FILTER_CLASS_OPTION[0]);
      if (PrefixAdditionFilter.class.getName().equals(fileFilterClassName)) {
View Full Code Here

    throws IOException, InterruptedException, NoSuchMethodException {
    // Running sequentially
    Charset charset = Charset.forName(getOption(CHARSET_OPTION[0]));
    String keyPrefix = getOption(KEY_PREFIX_OPTION[0]);
    FileSystem fs = FileSystem.get(input.toUri(), conf);
    ChunkedWriter writer = new ChunkedWriter(conf, Integer.parseInt(options.get(CHUNK_SIZE_OPTION[0])), output);

    try {
      SequenceFilesFromDirectoryFilter pathFilter;
      String fileFilterClassName = options.get(FILE_FILTER_CLASS_OPTION[0]);
      if (PrefixAdditionFilter.class.getName().equals(fileFilterClassName)) {
View Full Code Here

  public static final String BASE_INPUT_PATH         = "baseinputpath";

  private static final int MAX_JOB_SPLIT_LOCATIONS = 1000000;

  public void createSequenceFiles(MailOptions options) throws IOException {
    ChunkedWriter writer = new ChunkedWriter(getConf(), options.getChunkSize(), new Path(options.getOutputDir()));
    MailProcessor processor = new MailProcessor(options, options.getPrefix(), writer);
    try {
      if (options.getInput().isDirectory()) {
        PrefixAdditionDirectoryWalker walker = new PrefixAdditionDirectoryWalker(processor, writer);
        walker.walk(options.getInput());
View Full Code Here

  private static final Logger log = LoggerFactory.getLogger(
      SequenceFilesFromMailArchives.class);

  public void createSequenceFiles(MailOptions options) throws IOException {
    ChunkedWriter writer = new ChunkedWriter(
        getConf(), options.getChunkSize(), new Path(options.getOutputDir()));
    MailProcessor processor = new MailProcessor(
        options, options.getPrefix(), writer);
    try {
      if (options.getInput().isDirectory()) {
View Full Code Here

  }

  @Override
  protected void process(FileStatus fst, Path current) throws IOException {
    FileSystem fs = getFs();
    ChunkedWriter writer = getWriter();
    if (fst.isDir()) {
      String dirPath = getPrefix() + Path.SEPARATOR + current.getName() + Path.SEPARATOR + fst.getPath().getName();
      fs.listStatus(fst.getPath(),
                    new PrefixAdditionFilter(getConf(), dirPath, getOptions(), writer, getCharset(), fs));
    } else {
      InputStream in = null;
      try {
        in = fs.open(fst.getPath());

        StringBuilder file = new StringBuilder();
        for (String aFit : new FileLineIterable(in, getCharset(), false)) {
          file.append(aFit).append('\n');
        }
        String name = current.getName().equals(fst.getPath().getName())
            ? current.getName()
            : current.getName() + Path.SEPARATOR + fst.getPath().getName();
        writer.write(getPrefix() + Path.SEPARATOR + name, file.toString());
      } finally {
        Closeables.closeQuietly(in);
      }
    }
  }
View Full Code Here

    String keyPrefix = getOption(KEY_PREFIX_OPTION[0]);

    Charset charset = Charset.forName(getOption(CHARSET_OPTION[0]));
    Configuration conf = getConf();
    FileSystem fs = FileSystem.get(input.toUri(), conf);
    ChunkedWriter writer = new ChunkedWriter(conf, Integer.parseInt(options.get(CHUNK_SIZE_OPTION[0])), output);

    try {
      SequenceFilesFromDirectoryFilter pathFilter;
      String fileFilterClassName = options.get(FILE_FILTER_CLASS_OPTION[0]);
      if (PrefixAdditionFilter.class.getName().equals(fileFilterClassName)) {
View Full Code Here

TOP

Related Classes of org.apache.mahout.utils.io.ChunkedWriter

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.