Examples of org.apache.hadoop.io.compress.CompressionCodecFactory

org.apache.hadoop.io.compress.CompressionCodecFactory
A factory that will find the correct codec for a given filename. @author Owen O'Malley

      assertEquals(2, desc.getStats().getNumRows().intValue());
    }


    FileSystem fs = FileSystem.get(tpch.getTestingCluster().getConfiguration());
    assertTrue(fs.exists(desc.getPath()));
    CompressionCodecFactory factory = new CompressionCodecFactory(tpch.getTestingCluster().getConfiguration());


    for (FileStatus file : fs.listStatus(desc.getPath())) {
      CompressionCodec codec = factory.getCodec(file.getPath());
      assertTrue(codec instanceof DeflateCodec);
    }
  }

View Full Code Here

    FileSystem fs = FileSystem.get(tpch.getTestingCluster().getConfiguration());
    Path path = new Path("/tajo-data/testInsertOverwriteLocationWithCompression");
    assertTrue(fs.exists(path));
    assertEquals(1, fs.listStatus(path).length);


    CompressionCodecFactory factory = new CompressionCodecFactory(tpch.getTestingCluster().getConfiguration());
    for (FileStatus file : fs.listStatus(path)){
      CompressionCodec codec = factory.getCodec(file.getPath());
      assertTrue(codec instanceof DeflateCodec);
    }
  }

View Full Code Here

    Configuration job = context.getConfiguration();
    this.maxLineLength = job.getInt("mapred.linerecordreader.maxlength", Integer.MAX_VALUE);
    start = split.getStart();
    end = start + split.getLength();
    final Path file = split.getPath();
    compressionCodecs = new CompressionCodecFactory(job);
    final CompressionCodec codec = compressionCodecs.getCodec(file);
    
    // open the file and seek to the start of the split
    FileSystem fs = file.getFileSystem(job);
    FSDataInputStream fileIn = fs.open(split.getPath());

View Full Code Here


          // If path is a directory
          if (fStats.isDir()) {
            dirs.offer(path);
          }
          else if ((new CompressionCodecFactory(job)).getCodec(path) != null) {
            return super.getSplits(job, numSplits);
          }


          while (dirs.peek() != null) {
            Path tstPath = dirs.remove();
            FileStatus[] fStatus = inpFs.listStatus(tstPath);
            for (int idx = 0; idx < fStatus.length; idx++) {
              if (fStatus[idx].isDir()) {
                dirs.offer(fStatus[idx].getPath());
              }
              else if ((new CompressionCodecFactory(job)).getCodec(fStatus[idx].getPath()) != null) {
                return super.getSplits(job, numSplits);
              }
            }
          }
        }

View Full Code Here

  }
  
  @Override
  protected boolean isSplitable(JobContext context, Path file) {
    final CompressionCodec codec =
      new CompressionCodecFactory(context.getConfiguration()).getCodec(file);
    if (null == codec) {
      return true;
    }
    return codec instanceof SplittableCompressionCodec;
  }

View Full Code Here

  }


  @Override
  protected boolean isSplitable(JobContext context, Path file) {
    CompressionCodec codec =
        new CompressionCodecFactory(context.getConfiguration()).getCodec(file);
    return codec == null;
  }

View Full Code Here

  }


  @Override
  protected boolean isSplitable(JobContext context, Path file) {
    final CompressionCodec codec =
      new CompressionCodecFactory(context.getConfiguration()).getCodec(file);
    if (null == codec) {
      return true;
    }


    // Once we remove support for Hadoop < 2.0

View Full Code Here

             "position in InputSplit"));
                 
    start = split.getStart();
    end = split.getLength() + start;
    final Path file = split.getPath();
    compressionCodecs = new CompressionCodecFactory(conf);
    codec = compressionCodecs.getCodec(file);
    
    // open the file and seek to the start of the split
    final FileSystem fs = file.getFileSystem(conf);
    fileIn = fs.open(file);

View Full Code Here

  public LineRecordReader(Configuration job, FileSplit split)
    throws IOException {
    long start = split.getStart();
    long end = start + split.getLength();
    final Path file = split.getPath();
    compressionCodecs = new CompressionCodecFactory(job);
    final CompressionCodec codec = compressionCodecs.getCodec(file);


    // open the file and seek to the start of the split
    FileSystem fs = file.getFileSystem(job);
    FSDataInputStream fileIn = fs.open(split.getPath());

View Full Code Here

        tableProperties.setProperty("columns", COLUMN_NAMES_STRING);
        tableProperties.setProperty("columns.types", COLUMN_TYPES);
        serDe.initialize(new Configuration(), tableProperties);


        if (compressionCodec != null) {
            CompressionCodec codec = new CompressionCodecFactory(new Configuration()).getCodecByName(compressionCodec);
            jobConf.set(COMPRESS_CODEC, codec.getClass().getName());
            jobConf.set(COMPRESS_TYPE, SequenceFile.CompressionType.BLOCK.toString());
        }


        RecordWriter recordWriter = outputFormat.getHiveRecordWriter(

View Full Code Here

0 1 2 3 4 5 6 7 8 9

TOP

Related Classes of org.apache.hadoop.io.compress.CompressionCodecFactory

com.cloudera.sqoop.TestExport

com.datasalt.pangool.tuplemr.mapred.lib.input.TupleTextInputFormat

com.datasalt.pangool.tuplemr.mapred.lib.input.TupleTextInputFormat$TupleTextInputReader

com.facebook.presto.hadoop.HadoopNative

com.facebook.presto.hive.AbstractTestHiveFileFormats

com.hadoop.compression.lzo.LzoIndex

com.hadoop.mapreduce.LzoLineRecordReader

com.hadoop.mapreduce.LzoSplitRecordReader

com.twitter.elephantbird.mapreduce.input.LzoRecordReader

com.twitter.elephantbird.mapreduce.input.MultiInputFormat

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.