Package org.apache.avro.mapred

Examples of org.apache.avro.mapred.FsInput


  @Override
  public Iterator<T> read(FileSystem fs, final Path path) {
    this.mapFn.initialize();
    try {
      FsInput fsi = new FsInput(path, fs.getConf());
      final DataFileReader<T> reader = new DataFileReader<T>(fsi, recordReader);
      return new AutoClosingIterator<T>(reader, new UnmodifiableIterator<T>() {
        @Override
        public boolean hasNext() {
          return reader.hasNext();
View Full Code Here


    if(latest != null) {
      gdr.setExpected(latest);
    }

    this.reader = new DataFileReader<GenericRecord>(new FsInput(split.getPath(), job), gdr);
    this.reader.sync(split.getStart());
    this.start = reader.tell();
    this.stop = split.getStart() + split.getLength();
  }
View Full Code Here

        if (fstat == null || fstat.length == 0) {
          throw new IllegalArgumentException("No valid files found in directory: " + path);
        }
        path = fstat[0].getPath();
      }
      reader = new DataFileReader(new FsInput(path, conf), new GenericDatumReader<GenericRecord>());
      return reader.getSchema();
    } catch (IOException e) {
      throw new RuntimeException("Error reading schema from path: "  + path, e);
    } finally {
      if (reader != null) {
View Full Code Here

      throws IOException {
    Configuration conf = getConf();
    if (!BaseSqoopTestCase.isOnPhysicalCluster()) {
      conf.set(CommonArgs.FS_DEFAULT_NAME, CommonArgs.LOCAL_FS);
    }
    FsInput fsInput = new FsInput(filename, conf);
    DatumReader<GenericRecord> datumReader =
      new GenericDatumReader<GenericRecord>();
    return new DataFileReader<GenericRecord>(fsInput, datumReader);
  }
View Full Code Here

      Schema recordSchema = AvroKeyValue.getSchema(mKeySchema, options.getValueSchema());
      DatumReader<GenericRecord> datumReader =
        model.createDatumReader(recordSchema);
      mDataFileReader =
        new DataFileReader<GenericRecord>
        (new FsInput(dataFilePath, options.getConfiguration()), datumReader);
     
    }
View Full Code Here

    private <K> NavigableMap<K, Long> loadIndexFile(
        Configuration conf, Path path, Schema keySchema) throws IOException {
      DatumReader<GenericRecord> datumReader = model.createDatumReader(
          AvroKeyValue.getSchema(keySchema, Schema.create(Schema.Type.LONG)));
      DataFileReader<GenericRecord> fileReader = new DataFileReader<GenericRecord>(
          new FsInput(path, conf), datumReader);

      NavigableMap<K, Long> index;
      if (Schema.create(Schema.Type.STRING).equals(keySchema)) {
        // Because Avro STRING types are mapped to the Java CharSequence class that does not
        // mandate the implementation of Comparable, we need to specify a special
View Full Code Here

    verify(context);

  Configuration conf = new Configuration();
  conf.set("fs.default.name", "file:///");
  Path avroFile = new Path("target/temp.avro");
  DataFileReader<GenericData.Record> avroFileReader = new DataFileReader<GenericData.Record>(new FsInput(avroFile,
      conf), new SpecificDatumReader<GenericData.Record>());
   
 
  avroFileReader.seek(pointTwo);
    // Verify that the second record was written;
View Full Code Here

    // Check that the results from the MapReduce were as expected.
    FileSystem fileSystem = FileSystem.get(job.getConfiguration());
    FileStatus[] outputFiles = fileSystem.globStatus(outputPath.suffix("/myavro-r-00000.avro"));
    Assert.assertEquals(1, outputFiles.length);
    DataFileReader<GenericData.Record> reader = new DataFileReader<GenericData.Record>(
        new FsInput(outputFiles[0].getPath(), job.getConfiguration()),
        new GenericDatumReader<GenericData.Record>(STATS_SCHEMA));
    Map<String, Integer> counts = new HashMap<String, Integer>();
    for (GenericData.Record record : reader) {
      counts.put(((Utf8) record.get("name")).toString(), (Integer) record.get("count"));
    }
    reader.close();

    Assert.assertEquals(3, counts.get("apple").intValue());
    Assert.assertEquals(2, counts.get("banana").intValue());
    Assert.assertEquals(1, counts.get("carrot").intValue());

    outputFiles = fileSystem.globStatus(outputPath.suffix("/myavro1-r-00000.avro"));
    Assert.assertEquals(1, outputFiles.length);
    reader = new DataFileReader<GenericData.Record>(
        new FsInput(outputFiles[0].getPath(), job.getConfiguration()),
        new GenericDatumReader<GenericData.Record>(STATS_SCHEMA_2));
    counts = new HashMap<String, Integer>();
    for (GenericData.Record record : reader) {
      counts.put(((Utf8) record.get("name1")).toString(), (Integer) record.get("count1"));
    }
    reader.close();

    Assert.assertEquals(3, counts.get("apple").intValue());
    Assert.assertEquals(2, counts.get("banana").intValue());
    Assert.assertEquals(1, counts.get("carrot").intValue());
 
    outputFiles = fileSystem.globStatus(outputPath.suffix("/testnewwrite-r-00000.avro"));
    Assert.assertEquals(1, outputFiles.length);
    reader = new DataFileReader<GenericData.Record>(
        new FsInput(outputFiles[0].getPath(), job.getConfiguration()),
            new GenericDatumReader<GenericData.Record>(STATS_SCHEMA));
    counts = new HashMap<String, Integer>();
    for (GenericData.Record record : reader) {
       counts.put(((Utf8) record.get("name")).toString(), (Integer) record.get("count"));
    }
    reader.close();
   
    Assert.assertEquals(3, counts.get("apple").intValue());
    Assert.assertEquals(2, counts.get("banana").intValue());
    Assert.assertEquals(1, counts.get("carrot").intValue());
       
    outputFiles = fileSystem.globStatus(outputPath.suffix("/testnewwrite2-r-00000.avro"));
    Assert.assertEquals(1, outputFiles.length);
    reader = new DataFileReader<GenericData.Record>(
        new FsInput(outputFiles[0].getPath(), job.getConfiguration()),
        new GenericDatumReader<GenericData.Record>(STATS_SCHEMA_2));
    counts = new HashMap<String, Integer>();
    for (GenericData.Record record : reader) {
     counts.put(((Utf8) record.get("name1")).toString(), (Integer) record.get("count1"));
    }
    reader.close();
    Assert.assertEquals(3, counts.get("apple").intValue());
    Assert.assertEquals(2, counts.get("banana").intValue());
    Assert.assertEquals(1, counts.get("carrot").intValue());
   
    outputFiles = fileSystem.globStatus(outputPath.suffix("/testwritenonschema-r-00000.avro"));
    Assert.assertEquals(1, outputFiles.length);
    reader = new DataFileReader<GenericData.Record>(
        new FsInput(outputFiles[0].getPath(), job.getConfiguration()),
        new GenericDatumReader<GenericData.Record>(STATS_SCHEMA));
    counts = new HashMap<String, Integer>();
    for (GenericData.Record record : reader) {
      counts.put(((Utf8) record.get("name")).toString(), (Integer) record.get("count"));
    }
View Full Code Here

    Assert.assertTrue(job.waitForCompletion(true));
    FileSystem fileSystem = FileSystem.get(job.getConfiguration());
    FileStatus[] outputFiles = fileSystem.globStatus(outputPath.suffix("/myavro3-*"));
    Assert.assertEquals(1, outputFiles.length);
    DataFileReader<TextStats> reader = new DataFileReader<TextStats>(
        new FsInput(outputFiles[0].getPath(), job.getConfiguration()),
        new SpecificDatumReader<TextStats>());
    Map<String, Integer> counts = new HashMap<String, Integer>();
    for (TextStats record : reader) {
      counts.put(record.name.toString(), record.count);
    }
View Full Code Here

    // Check that the results from the MapReduce were as expected.
    FileSystem fileSystem = FileSystem.get(job.getConfiguration());
    FileStatus[] outputFiles = fileSystem.globStatus(outputPath.suffix("/myavro3-*"));
    Assert.assertEquals(1, outputFiles.length);
    DataFileReader<TextStats> reader = new DataFileReader<TextStats>(
        new FsInput(outputFiles[0].getPath(), job.getConfiguration()),
        new SpecificDatumReader<TextStats>());
    Map<String, Integer> counts = new HashMap<String, Integer>();
    for (TextStats record : reader) {
      counts.put(record.name.toString(), record.count);
    }
View Full Code Here

TOP

Related Classes of org.apache.avro.mapred.FsInput

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.