Package org.apache.avro.mapred

Examples of org.apache.avro.mapred.FsInput


    // Check that the results from the MapReduce were as expected.
    FileSystem fileSystem = FileSystem.get(job.getConfiguration());
    FileStatus[] outputFiles = fileSystem.globStatus(outputPath.suffix("/myavro-r-00000.avro"));
    Assert.assertEquals(1, outputFiles.length);
    DataFileReader<GenericData.Record> reader = new DataFileReader<GenericData.Record>(
        new FsInput(outputFiles[0].getPath(), job.getConfiguration()),
        new GenericDatumReader<GenericData.Record>(STATS_SCHEMA));
    Map<String, Integer> counts = new HashMap<String, Integer>();
    for (GenericData.Record record : reader) {
      counts.put(((Utf8) record.get("name")).toString(), (Integer) record.get("count"));
    }
    reader.close();

    Assert.assertEquals(3, counts.get("apple").intValue());
    Assert.assertEquals(2, counts.get("banana").intValue());
    Assert.assertEquals(1, counts.get("carrot").intValue());

    outputFiles = fileSystem.globStatus(outputPath.suffix("/myavro1-r-00000.avro"));
    Assert.assertEquals(1, outputFiles.length);
    reader = new DataFileReader<GenericData.Record>(
        new FsInput(outputFiles[0].getPath(), job.getConfiguration()),
        new GenericDatumReader<GenericData.Record>(STATS_SCHEMA_2));
    counts = new HashMap<String, Integer>();
    for (GenericData.Record record : reader) {
      counts.put(((Utf8) record.get("name1")).toString(), (Integer) record.get("count1"));
    }
View Full Code Here


    Assert.assertTrue(job.waitForCompletion(true));
    FileSystem fileSystem = FileSystem.get(job.getConfiguration());
    FileStatus[] outputFiles = fileSystem.globStatus(outputPath.suffix("/myavro3-*"));
    Assert.assertEquals(1, outputFiles.length);
    DataFileReader<TextStats> reader = new DataFileReader<TextStats>(
        new FsInput(outputFiles[0].getPath(), job.getConfiguration()),
        new SpecificDatumReader<TextStats>());
    Map<String, Integer> counts = new HashMap<String, Integer>();
    for (TextStats record : reader) {
      counts.put(record.name.toString(), record.count);
    }
View Full Code Here

    // Check that the results from the MapReduce were as expected.
    FileSystem fileSystem = FileSystem.get(job.getConfiguration());
    FileStatus[] outputFiles = fileSystem.globStatus(outputPath.suffix("/myavro3-*"));
    Assert.assertEquals(1, outputFiles.length);
    DataFileReader<TextStats> reader = new DataFileReader<TextStats>(
        new FsInput(outputFiles[0].getPath(), job.getConfiguration()),
        new SpecificDatumReader<TextStats>());
    Map<String, Integer> counts = new HashMap<String, Integer>();
    for (TextStats record : reader) {
      counts.put(record.name.toString(), record.count);
    }
View Full Code Here

    // Check that the results from the MapReduce were as expected.
    FileSystem fileSystem = FileSystem.get(job.getConfiguration());
    FileStatus[] outputFiles = fileSystem.globStatus(outputPath.suffix("/part-*"));
    Assert.assertEquals(1, outputFiles.length);
    DataFileReader<TextStats> reader = new DataFileReader<TextStats>(
        new FsInput(outputFiles[0].getPath(), job.getConfiguration()),
        new SpecificDatumReader<TextStats>());
    Map<String, Integer> counts = new HashMap<String, Integer>();
    for (TextStats record : reader) {
      counts.put(record.name.toString(), record.count);
    }
View Full Code Here

      LOG.debug("Loading the data file " + dataFilePath);
      Schema recordSchema = AvroKeyValue.getSchema(mKeySchema, options.getValueSchema());
      DatumReader<GenericRecord> datumReader = new GenericDatumReader<GenericRecord>(recordSchema);
      mDataFileReader =
        new DataFileReader<GenericRecord>
        (new FsInput(dataFilePath, options.getConfiguration()), datumReader);
    }
View Full Code Here

    private static <K> NavigableMap<K, Long> loadIndexFile(
        Configuration conf, Path path, Schema keySchema) throws IOException {
      DatumReader<GenericRecord> datumReader = new GenericDatumReader<GenericRecord>(
          AvroKeyValue.getSchema(keySchema, Schema.create(Schema.Type.LONG)));
      DataFileReader<GenericRecord> fileReader = new DataFileReader<GenericRecord>(
          new FsInput(path, conf), datumReader);

      NavigableMap<K, Long> index;
      if (Schema.create(Schema.Type.STRING).equals(keySchema)) {
        // Because Avro STRING types are mapped to the Java CharSequence class that does not
        // mandate the implementation of Comparable, we need to specify a special
View Full Code Here

  private DataFileReader<GenericRecord> read(Path filename) throws IOException {
    Configuration conf = new Configuration();
    if (!BaseSqoopTestCase.isOnPhysicalCluster()) {
      conf.set(CommonArgs.FS_DEFAULT_NAME, CommonArgs.LOCAL_FS);
    }
    FsInput fsInput = new FsInput(filename, conf);
    DatumReader<GenericRecord> datumReader =
      new GenericDatumReader<GenericRecord>();
    return new DataFileReader<GenericRecord>(fsInput, datumReader);
  }
View Full Code Here

  @Override
  public Iterator<T> read(FileSystem fs, final Path path) {
    this.mapFn.initialize();
    try {
      FsInput fsi = new FsInput(path, fs.getConf());
      final DataFileReader<T> reader = new DataFileReader<T>(fsi, recordReader);
      return new AutoClosingIterator<T>(reader, new UnmodifiableIterator<T>() {
        @Override
        public boolean hasNext() {
          return reader.hasNext();
View Full Code Here

      throws IOException {
    Configuration conf = getConf();
    if (!BaseSqoopTestCase.isOnPhysicalCluster()) {
      conf.set(CommonArgs.FS_DEFAULT_NAME, CommonArgs.LOCAL_FS);
    }
    FsInput fsInput = new FsInput(filename, conf);
    DatumReader<GenericRecord> datumReader =
      new GenericDatumReader<GenericRecord>();
    return new DataFileReader<GenericRecord>(fsInput, datumReader);
  }
View Full Code Here

        if (fstat == null || fstat.length == 0) {
          throw new IllegalArgumentException("No valid files found in directory: " + path);
        }
        path = fstat[0].getPath();
      }
      reader = new DataFileReader(new FsInput(path, conf), new GenericDatumReader<GenericRecord>());
      return reader.getSchema();
    } catch (IOException e) {
      throw new RuntimeException("Error reading schema from path: "  + path, e);
    } finally {
      if (reader != null) {
View Full Code Here

TOP

Related Classes of org.apache.avro.mapred.FsInput

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.