Examples of org.apache.hadoop.mapreduce.lib.input.TextInputFormat

Package org.apache.hadoop.mapreduce.lib.input

Examples of org.apache.hadoop.mapreduce.lib.input.TextInputFormat

org.apache.hadoop.mapreduce.lib.input.TextInputFormat
An {@link InputFormat} for plain text files. Files are broken into lines.Either linefeed or carriage-return are used to signal end of line. Keys are the position in the file, and values are the line of text..

        FileInputFormat.setInputPaths(job, this.location);
    }


    @Override
    public InputFormat getInputFormat() throws IOException {
        return new TextInputFormat() {


            @Override
            protected boolean isSplitable(JobContext context, Path file) {
                return false;
            }

View Full Code Here

    @SuppressWarnings("unchecked")
    public InputFormat getInputFormat() throws IOException {
        // We will use TextInputFormat, the default Hadoop input format for
        // text.  It has a LongWritable key that we will ignore, and the value
        // is a Text (a string writable) that the JSON data is in.
        return new TextInputFormat();
    }

View Full Code Here

    public InputFormat getInputFormat() throws IOException {
        AvroStorageLog.funcCall("getInputFormat");
        if(inputAvroSchema != null)
            return new PigAvroInputFormat(inputAvroSchema);
        else
            return new TextInputFormat();
    }

View Full Code Here

    final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    env.setDegreeOfParallelism(1);
    
    // Set up the Hadoop Input Format
    Job job = Job.getInstance();
    HadoopInputFormat<LongWritable, Text> hadoopInputFormat = new HadoopInputFormat<LongWritable, Text>(new TextInputFormat(), LongWritable.class, Text.class, job);
    TextInputFormat.addInputPath(job, new Path(inputPath));
    
    // Create a Flink job with it
    DataSet<Tuple2<LongWritable, Text>> text = env.createInput(hadoopInputFormat);

View Full Code Here

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);


    // For this implementation to work, mapred.map.tasks needs to be set to the actual
    // number of mappers Hadoop will use:
    TextInputFormat inputFormat = new TextInputFormat();
    List<?> splits = inputFormat.getSplits(job);
    if (splits == null || splits.isEmpty()) {
      log.warn("Unable to compute number of splits?");
    } else {
      int numSplits = splits.size();
      log.info("Setting mapred.map.tasks = {}", numSplits);

View Full Code Here

    @SuppressWarnings("unchecked")
    public InputFormat getInputFormat() throws IOException {
        // We will use TextInputFormat, the default Hadoop input format for
        // text.  It has a LongWritable key that we will ignore, and the value
        // is a Text (a string writable) that the JSON data is in.
        return new TextInputFormat();
    }

View Full Code Here

    }


    @Override
    @SuppressWarnings("rawtypes")
    public InputFormat getInputFormat() throws IOException {
        return new TextInputFormat();
    }

View Full Code Here

    }


    @Override
    @SuppressWarnings("rawtypes")
    public InputFormat getInputFormat() throws IOException {
        return new TextInputFormat();
    }

View Full Code Here

0 1 2 3

TOP

Related Classes of org.apache.hadoop.mapreduce.lib.input.TextInputFormat

com.acme.io.JsonLoader

com.hadoopbook.pig.CutLoadFunc

com.mozilla.pig.load.DateRangeLoader

com.mozilla.pig.load.RegExLoader

org.apache.flink.hadoopcompatibility.mapreduce.example.WordCount

org.apache.hadoop.io.compress.CompressionCodec

org.apache.hadoop.io.compress.CompressionCodecFactory

org.apache.mahout.classifier.df.mapreduce.partial.PartialBuilder

org.apache.mahout.classifier.df.mapreduce.partial.PartialSequentialBuilder

org.apache.mahout.df.mapreduce.partial.PartialSequentialBuilder

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.