Package org.apache.avro.file

Examples of org.apache.avro.file.DataFileReader


  public static <K, V> TableSource<K, V> avroTableFile(List<Path> paths, PTableType<K, V> tableType) {
    return new AvroTableFileSource<K, V>(paths, (AvroType<Pair<K,V>>)tableType);
  }

  static Schema getSchemaFromPath(Path path, Configuration conf) {
    DataFileReader reader = null;
    try {
      FileSystem fs = FileSystem.get(conf);
      if (!fs.isFile(path)) {
        FileStatus[] fstat = fs.listStatus(path, new PathFilter() {
          @Override
          public boolean accept(Path path) {
            String name = path.getName();
            return !name.startsWith("_") && !name.startsWith(".");
          }
        });
        if (fstat == null || fstat.length == 0) {
          throw new IllegalArgumentException("No valid files found in directory: " + path);
        }
        path = fstat[0].getPath();
      }
      reader = new DataFileReader(new FsInput(path, conf), new GenericDatumReader<GenericRecord>());
      return reader.getSchema();
    } catch (IOException e) {
      throw new RuntimeException("Error reading schema from path: "  + path, e);
    } finally {
      if (reader != null) {
        try {
          reader.close();
        } catch (IOException e) {
          // ignored
        }
      }
    }
View Full Code Here


        }));
    args.addAll(Arrays.asList(extraArgs));
    run(args);

    DataFileReader<Object> reader =
      new DataFileReader(OUT_FILE, new GenericDatumReader<Object>());
   
    Iterator<Object> found = reader.iterator();
    for (Object expected :
           new RandomData(Schema.parse(SCHEMA_FILE), Integer.parseInt(COUNT)))
      assertEquals(expected, found.next());

    reader.close();
  }
View Full Code Here

  public static Source<GenericData.Record> avroFile(List<Path> paths, Configuration conf) {
    return avroFile(paths, Avros.generics(getSchemaFromPath(paths.get(0), conf)));
  }

  static Schema getSchemaFromPath(Path path, Configuration conf) {
    DataFileReader reader = null;
    try {
      FileSystem fs = FileSystem.get(conf);
      if (!fs.isFile(path)) {
        FileStatus[] fstat = fs.listStatus(path, new PathFilter() {
          @Override
          public boolean accept(Path path) {
            String name = path.getName();
            return !name.startsWith("_") && !name.startsWith(".");
          }
        });
        if (fstat == null || fstat.length == 0) {
          throw new IllegalArgumentException("No valid files found in directory: " + path);
        }
        path = fstat[0].getPath();
      }
      reader = new DataFileReader(new FsInput(path, conf), new GenericDatumReader<GenericRecord>());
      return reader.getSchema();
    } catch (IOException e) {
      throw new RuntimeException("Error reading schema from path: "  + path, e);
    } finally {
      if (reader != null) {
        try {
          reader.close();
        } catch (IOException e) {
          // ignored
        }
      }
    }
View Full Code Here

  public static <K, V> TableSource<K, V> avroTableFile(List<Path> paths, PTableType<K, V> tableType) {
    return new AvroTableFileSource<K, V>(paths, (AvroType<Pair<K,V>>)tableType);
  }

  static Schema getSchemaFromPath(Path path, Configuration conf) {
    DataFileReader reader = null;
    try {
      FileSystem fs = FileSystem.get(conf);
      if (!fs.isFile(path)) {
        FileStatus[] fstat = fs.listStatus(path, new PathFilter() {
          @Override
          public boolean accept(Path path) {
            String name = path.getName();
            return !name.startsWith("_") && !name.startsWith(".");
          }
        });
        if (fstat == null || fstat.length == 0) {
          throw new IllegalArgumentException("No valid files found in directory: " + path);
        }
        path = fstat[0].getPath();
      }
      reader = new DataFileReader(new FsInput(path, conf), new GenericDatumReader<GenericRecord>());
      return reader.getSchema();
    } catch (IOException e) {
      throw new RuntimeException("Error reading schema from path: "  + path, e);
    } finally {
      if (reader != null) {
        try {
          reader.close();
        } catch (IOException e) {
          // ignored
        }
      }
    }
View Full Code Here

      writer.append(record);
    }
    writer.flush();
    writer.close();

    DataFileReader<GenericData.Record> reader = new DataFileReader(new ReadAvroContainerBuilder.ForwardOnlySeekableInputStream(new ByteArrayInputStream(bout.toByteArray())), new GenericDatumReader());
    Schema schema2 = reader.getSchema();
    assertEquals(schema, schema2);
    for (GenericData.Record record : records) {
      assertTrue(reader.hasNext());
      GenericData.Record record2 = reader.next();
      assertEquals(record, record2);
    }
    assertFalse(reader.hasNext());
    reader.close();

    Record event = new Record();
    event.getFields().put(Fields.ATTACHMENT_BODY, new ByteArrayInputStream(bout.toByteArray()));
    morphline = createMorphline("test-morphlines/readAvroContainer");
    deleteAllDocuments();
    assertTrue(load(event));
    assertEquals(records.length, queryResultSetSize("*:*"));
       
    GenericDatumWriter datumWriter = new GenericDatumWriter(schema);
    bout = new ByteArrayOutputStream();
    Encoder encoder = EncoderFactory.get().binaryEncoder(bout, null);
    for (GenericData.Record record : records) {
      datumWriter.write(record, encoder);
    }
    encoder.flush();

    Decoder decoder = DecoderFactory.get().binaryDecoder(new ByteArrayInputStream(bout.toByteArray()), null);
    DatumReader<GenericData.Record> datumReader = new GenericDatumReader<GenericData.Record>(schema);
    for (int i = 0; i < records.length; i++) {
      GenericData.Record record3 = datumReader.read(null, decoder);
      assertEquals(records[i], record3);
    }
   
    event = new Record();
    event.getFields().put(Fields.ATTACHMENT_BODY, new ByteArrayInputStream(bout.toByteArray()));
    File tmp = new File("target/tmp-test-schema.avsc");
    try {
      tmp.deleteOnExit();
      Files.write(schema.toString(true), tmp, Charsets.UTF_8);
      morphline = createMorphline("test-morphlines/readAvroWithExternalSchema");
      deleteAllDocuments();   
      assertTrue(load(event));
      assertEquals(records.length, queryResultSetSize("*:*"));
    } finally {
      tmp.delete();
    }
       
    for (GenericData.Record record : records) {
      event = new Record();
      event.getFields().put(Fields.ATTACHMENT_BODY, record);
      morphline = createMorphline("test-morphlines/extractAvroTree");
      deleteAllDocuments();
      assertTrue(load(event));
      assertEquals(1, queryResultSetSize("*:*"));
    }
   
    String[] formats = new String[] {"", "AndSnappy"};
    for (String format : formats) {
      morphline = createMorphline("test-morphlines/writeAvroToByteArrayWithContainer" + format);
      event = new Record();
      event.getFields().putAll(Fields.ATTACHMENT_BODY, Arrays.asList(records));
      deleteAllDocuments();
      assertTrue(load(event));
      assertEquals(1, collector.getFirstRecord().get(Fields.ATTACHMENT_BODY).size());
      byte[] bytes = (byte[]) collector.getFirstRecord().getFirstValue(Fields.ATTACHMENT_BODY);
      assertNotNull(bytes);
      reader = new DataFileReader(new ReadAvroContainerBuilder.ForwardOnlySeekableInputStream(new ByteArrayInputStream(bytes)), new GenericDatumReader());
      assertEquals("bar", new String(reader.getMeta("foo"), Charsets.UTF_8));
      assertEquals("Nadja", new String(reader.getMeta("firstName"), Charsets.UTF_8));
      assertEquals(schema, reader.getSchema());
      for (GenericData.Record record : records) {
        assertTrue(reader.hasNext());
        GenericData.Record record2 = reader.next();
        assertEquals(record, record2);
      }
      assertFalse(reader.hasNext());
      reader.close();
    }
   
    formats = new String[] {"Binary", "JSON"};
    for (String format : formats) {
      morphline = createMorphline("test-morphlines/writeAvroToByteArrayWithContainerless" + format);
View Full Code Here

      Notifications.notifyBeginTransaction(morphline);
      assertTrue(morphline.process(record));
      assertEquals(1, collector.getNumStartEvents());
      assertEquals(2104, collector.getRecords().size());
     
      FileReader<GenericData.Record> reader = new DataFileReader(file, new GenericDatumReader());
      int i = 0;
      while (reader.hasNext()) {
        Record actual = collector.getRecords().get(i);
        GenericData.Record expected = reader.next();
        assertTweetEquals(expected, actual, fieldNames, i);
        i++;
      }   
      assertEquals(collector.getRecords().size(), i);
    }
View Full Code Here

  }
 
  private void runTweets(String morphlineConfigFile, String[] fieldNames) throws Exception {
    File file = new File(RESOURCES_DIR + "/test-documents/sample-statuses-20120906-141433-medium.avro");
    List<GenericData.Record> expecteds = new ArrayList();
    FileReader<GenericData.Record> reader = new DataFileReader(file, new GenericDatumReader());
    Schema schema = reader.getSchema();
    while (reader.hasNext()) {
      GenericData.Record expected = reader.next();
      expecteds.add(expected);
    }   
    assertEquals(2104, expecteds.size());

    ByteArrayOutputStream bout = new ByteArrayOutputStream();
    Encoder encoder;
    if (morphlineConfigFile.contains("Json")) {
      encoder = EncoderFactory.get().jsonEncoder(schema, bout);
    } else {
      encoder = EncoderFactory.get().binaryEncoder(bout, null);
    }
    GenericDatumWriter datumWriter = new GenericDatumWriter(schema);
    for (GenericData.Record record : expecteds) {
      datumWriter.write(record, encoder);
    }
    encoder.flush();

    morphline = createMorphline(morphlineConfigFile);
    for (int j = 0; j < 3; j++) { // also test reuse of objects and low level avro buffers
      Record record = new Record();
      record.put(Fields.ATTACHMENT_BODY, bout.toByteArray());
      collector.reset();
      startSession();
      Notifications.notifyBeginTransaction(morphline);
      assertTrue(morphline.process(record));
      assertEquals(1, collector.getNumStartEvents());
      assertEquals(2104, collector.getRecords().size());
     
      reader = new DataFileReader(file, new GenericDatumReader());
      int i = 0;
      while (reader.hasNext()) {
        Record actual = collector.getRecords().get(i);
        GenericData.Record expected = reader.next();
        assertTweetEquals(expected, actual, fieldNames, i);
        i++;
      }   
      assertEquals(collector.getRecords().size(), i);
      }
View Full Code Here

    byte[] bytes;
    if (morphlineConfigFile.contains("Container")) {
      bytes = Files.toByteArray(file);
    } else {   
      List<GenericData.Record> expecteds = new ArrayList();
      FileReader<GenericData.Record> reader = new DataFileReader(file, new GenericDatumReader());
      Schema schema = reader.getSchema();
      while (reader.hasNext()) {
        GenericData.Record expected = reader.next();
        expecteds.add(expected);
      }   
      assertEquals(2, expecteds.size());
 
      ByteArrayOutputStream bout = new ByteArrayOutputStream();
View Full Code Here

      }     
    });  

    // fetch test input data and sort like solr result set
    List<GenericData.Record> records = new ArrayList();
    FileReader<GenericData.Record> reader = new DataFileReader(file, new GenericDatumReader());
    while (reader.hasNext()) {
      GenericData.Record expected = reader.next();
      records.add(expected);
    }
    assertEquals(collector.getRecords().size(), records.size());   
    Collections.sort(records, new Comparator<GenericData.Record>() {
      @Override
View Full Code Here

      DataFileReader<GenericContainer> reader = null;
      try {
        // TODO: for better performance subclass DataFileReader
        // to eliminate expensive SchemaParser.parse() on each new file in DataFileReader.initialize().
        // Instead replace the parse() with a lookup in the byte[] cache map.
        reader = new DataFileReader(new ForwardOnlySeekableInputStream(in), datumReader);
       
        byte[] writerSchemaBytes = reader.getMeta(DataFileConstants.SCHEMA);
        Preconditions.checkNotNull(writerSchemaBytes);
        ByteArrayKey writerSchemaKey = new ByteArrayKey(writerSchemaBytes);
        ResolvingDecoder resolver = resolverCache.get(writerSchemaKey); // cache for performance
View Full Code Here

TOP

Related Classes of org.apache.avro.file.DataFileReader

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.