Examples of org.apache.hadoop.io.SequenceFile.CompressionType

Package org.apache.hadoop.io.SequenceFile

Examples of org.apache.hadoop.io.SequenceFile.CompressionType

org.apache.hadoop.io.SequenceFile.CompressionType

      // get the path of the temporary output file
      Path file = getDefaultWorkFile(job, "");


      FileSystem fs = file.getFileSystem(InputFormatBase.getConfiguration(job));
      CompressionCodec codec = null;
      CompressionType compressionType = CompressionType.NONE;
      if (getCompressOutput(job)) {
        // find the kind of compression to do
        compressionType = SequenceFileOutputFormat.getOutputCompressionType(job);


        // find the right codec

View Full Code Here

          if (slice == DEFAULT_SLICE) {
            wname = new Path(new Path(new Path(out, segmentName), dirName), name);
          } else {
            wname = new Path(new Path(new Path(out, segmentName + "-" + slice), dirName), name);
          }
          CompressionType compType = 
              SequenceFileOutputFormat.getOutputCompressionType(job);
          if (clazz.isAssignableFrom(ParseText.class)) {
            compType = CompressionType.RECORD;
          }
          res = new MapFile.Writer(job, fs, wname.toString(), Text.class, clazz, compType, progress);

View Full Code Here

    final Path fetch =
      new Path(new Path(out, CrawlDatum.FETCH_DIR_NAME), name);
    final Path content =
      new Path(new Path(out, Content.DIR_NAME), name);
    
    final CompressionType compType = SequenceFileOutputFormat.getOutputCompressionType(job);


    final MapFile.Writer fetchOut =
      new MapFile.Writer(job, fs, fetch.toString(), Text.class, CrawlDatum.class,
          compType, progress);

View Full Code Here

    final boolean ignoreExternalLinks = job.getBoolean("db.ignore.external.links", false);
    int maxOutlinksPerPage = job.getInt("db.max.outlinks.per.page", 100);
    final boolean isParsing = job.getBoolean("fetcher.parse", true);
    final int maxOutlinks = (maxOutlinksPerPage < 0) ? Integer.MAX_VALUE
                                                     : maxOutlinksPerPage;
    final CompressionType compType = SequenceFileOutputFormat.getOutputCompressionType(job);
    Path out = FileOutputFormat.getOutputPath(job);
    
    Path text = new Path(new Path(out, ParseText.DIR_NAME), name);
    Path data = new Path(new Path(out, ParseData.DIR_NAME), name);
    Path crawl = new Path(new Path(out, CrawlDatum.PARSE_DIR_NAME), name);

View Full Code Here

              .forName(codecStr);
          FileOutputFormat.setOutputCompressorClass(jc_output, codec);
        }
        String type = conf.getCompressType();
        if (type != null && !type.trim().equals("")) {
          CompressionType style = CompressionType.valueOf(type);
          SequenceFileOutputFormat.setOutputCompressionType(jc, style);
        }
      }
      return getRecordWriter(jc_output, hiveOutputFormat, outputClass,
          isCompressed, tableInfo.getProperties(), outPath);

View Full Code Here

   * @return output stream over the created sequencefile
   */
  public static SequenceFile.Writer createSequenceWriter(JobConf jc, FileSystem fs, Path file,
      Class<?> keyClass, Class<?> valClass, boolean isCompressed) throws IOException {
    CompressionCodec codec = null;
    CompressionType compressionType = CompressionType.NONE;
    Class codecClass = null;
    if (isCompressed) {
      compressionType = SequenceFileOutputFormat.getOutputCompressionType(jc);
      codecClass = FileOutputFormat.getOutputCompressorClass(jc, DefaultCodec.class);
      codec = (CompressionCodec) ReflectionUtils.newInstance(codecClass, jc);

View Full Code Here

      RecordReader sampleReader = inputFormat.getRecordReader(splits[0], job);


      List<SequenceFile.Writer> writers = new ArrayList<SequenceFile.Writer>(
          splits.length);


      CompressionType compressionType = getOutputCompressionType(job);
      Class<? extends CompressionCodec> outputCompressorClass = getOutputCompressorClass(
          job, null);
      CompressionCodec codec = null;
      if (outputCompressorClass != null) {
        codec = ReflectionUtils.newInstance(outputCompressorClass,

View Full Code Here

              .forName(codecStr);
          FileOutputFormat.setOutputCompressorClass(jc_output, codec);
        }
        String type = conf.getCompressType();
        if (type != null && !type.trim().equals("")) {
          CompressionType style = CompressionType.valueOf(type);
          SequenceFileOutputFormat.setOutputCompressionType(jc, style);
        }
      }
      return getRecordWriter(jc_output, hiveOutputFormat, outputClass,
          isCompressed, tableInfo.getProperties(), outPath, reporter);

View Full Code Here

   * @return output stream over the created sequencefile
   */
  public static SequenceFile.Writer createSequenceWriter(JobConf jc, FileSystem fs, Path file,
      Class<?> keyClass, Class<?> valClass, boolean isCompressed) throws IOException {
    CompressionCodec codec = null;
    CompressionType compressionType = CompressionType.NONE;
    Class codecClass = null;
    if (isCompressed) {
      compressionType = SequenceFileOutputFormat.getOutputCompressionType(jc);
      codecClass = FileOutputFormat.getOutputCompressorClass(jc, DefaultCodec.class);
      codec = (CompressionCodec) ReflectionUtils.newInstance(codecClass, jc);

View Full Code Here

  }


  @Override
  public void open(String filePath) throws IOException {
    DefaultCodec defCodec = new DefaultCodec();
    CompressionType cType = CompressionType.BLOCK;
    open(filePath, defCodec, cType);
  }

View Full Code Here

0 1 2 3 4 5 6 7 8 9

TOP

Related Classes of org.apache.hadoop.io.SequenceFile.CompressionType

com.asakusafw.bulkloader.extractor.DfsFileImport

com.asakusafw.bulkloader.extractor.DfsFileImportTest

com.backtype.hadoop.pail.SequenceFileFormat

com.linkedin.camus.etl.kafka.common.SequenceFileRecordWriterProvider

com.linkedin.json.JsonSequenceFileOutputFormat

com.mozilla.pig.storage.SeqFileMultiStorage$MultiStorageSequenceOutputFormat

org.apache.accumulo.server.master.LogSort$LoggerMapFileOutputFormat

org.apache.avro.mapreduce.AvroSequenceFileOutputFormat

org.apache.flume.sink.hdfs.HDFSCompressedDataStream

org.apache.hadoop.hive.ql.exec.FileSinkOperator

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.