Package com.cloudera.cdk.morphline.api

Examples of com.cloudera.cdk.morphline.api.Record


    documentSchema.setFields(Arrays.asList(new Field("price", unionSchema, null, null)));       

    GenericData.Record document1 = new GenericData.Record(documentSchema);
    document1.put("price", expected);   

    Record jdoc1 = new Record();    
    jdoc1.put("_dataset_descriptor_schema", documentSchema);
    jdoc1.put("price", input);
    Record expect1 = jdoc1.copy();
    expect1.put(Fields.ATTACHMENT_BODY, document1);
    processAndVerifySuccess(jdoc1, expect1, false)
  }
View Full Code Here


          Schema.applyAliases(writerSchema, readerSchema), readerSchema, null);
    }
   
    protected boolean extract(GenericContainer datum, Record inputRecord) {
      incrementNumRecords();
      Record outputRecord = inputRecord.copy();
      outputRecord.put(Fields.ATTACHMENT_BODY, datum);
       
      // pass record to next command in chain:
      return getChild().process(outputRecord);
    }
View Full Code Here

    )));   
   
    morphline = createMorphline("test-morphlines/extractAvroPaths");       
    {
      deleteAllDocuments();
      Record record = new Record();
      record.put(Fields.ATTACHMENT_BODY, document0);
      startSession();
  //    System.out.println(documentSchema.toString(true));
  //    System.out.println(document0.toString());
      assertTrue(morphline.process(record));
      assertEquals(1, collector.getRecords().size());
      List expected = Arrays.asList(Arrays.asList(Arrays.asList(1, 2, 3, 4, 5), Arrays.asList(10, 20), null, null, Arrays.asList(100, 200), null));
      //List expected2 = Arrays.asList(1, 2, 3, 4, 5, 10, 20, 100, 200);
      assertEquals(expected, collector.getFirstRecord().get("/price"));
      assertEquals(expected, collector.getFirstRecord().get("/price/[]"));
//      assertEquals(expected, collector.getFirstRecord().get("/*"));
//      assertEquals(expected2, collector.getFirstRecord().get("/*/*"));
      assertEquals(Arrays.asList(), collector.getFirstRecord().get("/unknownField"));
    }
   
    {
      deleteAllDocuments();
      Record record = new Record();
      record.put(Fields.ATTACHMENT_BODY, document1);
      startSession();
  //    System.out.println(documentSchema.toString(true));
  //    System.out.println(document1.toString());
      assertTrue(morphline.process(record));
      assertEquals(1, collector.getRecords().size());
      List expected = Arrays.asList(Arrays.asList(Arrays.asList(1000)));
      assertEquals(expected, collector.getFirstRecord().get("/price"));
      assertEquals(expected, collector.getFirstRecord().get("/price/[]"));
      assertEquals(Arrays.asList(), collector.getFirstRecord().get("/unknownField"));
    }
   
    morphline = createMorphline("test-morphlines/extractAvroPathsFlattened");       
    {
      deleteAllDocuments();
      Record record = new Record();
      record.put(Fields.ATTACHMENT_BODY, document0);
      startSession();
//      System.out.println(documentSchema.toString(true));
//      System.out.println(document0.toString());
      assertTrue(morphline.process(record));
      assertEquals(1, collector.getRecords().size());
      List expected = Arrays.asList(1, 2, 3, 4, 5, 10, 20, 100, 200);
      assertEquals(expected, collector.getFirstRecord().get("/price"));
      assertEquals(expected, collector.getFirstRecord().get("/price/[]"));
      assertEquals(Arrays.asList(), collector.getFirstRecord().get("/unknownField"));
    }
   
    ingestAndVerifyAvro(documentSchema, document0);
    ingestAndVerifyAvro(documentSchema, document0, document1);
   
    Record event = new Record();
    event.getFields().put(Fields.ATTACHMENT_BODY, document0);
    morphline = createMorphline("test-morphlines/extractAvroTree");
    deleteAllDocuments();
    System.out.println(document0);
    assertTrue(load(event));
    assertEquals(1, queryResultSetSize("*:*"));
    Record first = collector.getFirstRecord();   
    AbstractParser.removeAttachments(first);
    assertEquals(Arrays.asList(1, 2, 3, 4, 5, 10, 20, 100, 200), first.get("/price"));
    assertEquals(1, first.getFields().asMap().size());
   
    {
      morphline = createMorphline("test-morphlines/toAvro");
      Record jdoc1 = new Record();    
      jdoc1.put("_dataset_descriptor_schema", documentSchema);
      jdoc1.put("price", Arrays.asList(1000));
      Record expect1 = jdoc1.copy();
      expect1.put(Fields.ATTACHMENT_BODY, document1);
      processAndVerifySuccess(jdoc1, expect1, false);
 
      Record jdoc0 = new Record();    
      jdoc0.put("_dataset_descriptor_schema", documentSchema);
      jdoc0.getFields().putAll("price", Arrays.asList(
          Arrays.asList(1, 2, 3, 4, 5),
          Arrays.asList(10, 20),
          null,
          null,
          Arrays.asList(100, 200),
          null
        )
      );
      Record expect0 = jdoc0.copy();
      expect0.put(Fields.ATTACHMENT_BODY, document0);
      processAndVerifySuccess(jdoc0, expect0, false);
    }
  }
View Full Code Here

    document1.put("name", new GenericData.Array(documentSchema.getField("name").schema(), Arrays.asList(name4)));    
   
    morphline = createMorphline("test-morphlines/extractAvroPaths");       
    {
      deleteAllDocuments();
      Record record = new Record();
      record.put(Fields.ATTACHMENT_BODY, document0);
      startSession();
//      System.out.println(documentSchema.toString(true));
//      System.out.println(document0.toString());
      assertTrue(morphline.process(record));
      assertEquals(1, collector.getRecords().size());
      assertEquals(Arrays.asList(10), collector.getFirstRecord().get("/docId"));
      assertEquals(Arrays.asList(Arrays.asList()), collector.getFirstRecord().get("/links/backward"));
      List expected = Arrays.asList(Arrays.asList(20, 40, 60));
      assertEquals(expected, collector.getFirstRecord().get("/links/forward"));
      assertEquals(expected, collector.getFirstRecord().get("/links/forward/[]"));
      assertEquals(expected, collector.getFirstRecord().get("/links/forward[]"));
      assertEquals(Arrays.asList("en-us", "en", "en-gb"), collector.getFirstRecord().get("/name/[]/language/[]/code"));
      assertEquals(Arrays.asList("en-us", "en", "en-gb"), collector.getFirstRecord().get("/name[]/language[]/code"));
      assertEquals(Arrays.asList("us", "gb"), collector.getFirstRecord().get("/name/[]/language/[]/country"));
      assertEquals(Arrays.asList("us", "gb"), collector.getFirstRecord().get("/name[]/language[]/country"));
      assertEquals(Arrays.asList(), collector.getFirstRecord().get("/unknownField"));
    }

    morphline = createMorphline("test-morphlines/extractAvroPathsFlattened");       
    {
      deleteAllDocuments();
      Record record = new Record();
      record.put(Fields.ATTACHMENT_BODY, document0);
      startSession();
//      System.out.println(documentSchema.toString(true));
//      System.out.println(document0.toString());
      assertTrue(morphline.process(record));
      assertEquals(1, collector.getRecords().size());
      assertEquals(Arrays.asList(10), collector.getFirstRecord().get("/docId"));
      assertEquals(Arrays.asList(20, 40, 60), collector.getFirstRecord().get("/links"));   
      assertEquals(Arrays.asList(), collector.getFirstRecord().get("/links/backward"));
      List expected = Arrays.asList(20, 40, 60);
      assertEquals(expected, collector.getFirstRecord().get("/links/forward"));
      assertEquals(expected, collector.getFirstRecord().get("/links/forward/[]"));
      assertEquals(expected, collector.getFirstRecord().get("/links/forward[]"));
      assertEquals(Arrays.asList("en-us", "en", "en-gb"), collector.getFirstRecord().get("/name/[]/language/[]/code"));
      assertEquals(Arrays.asList("en-us", "en", "en-gb"), collector.getFirstRecord().get("/name[]/language[]/code"));
      assertEquals(Arrays.asList("us", "gb"), collector.getFirstRecord().get("/name/[]/language/[]/country"));
      assertEquals(Arrays.asList("us", "gb"), collector.getFirstRecord().get("/name[]/language[]/country"));
      assertEquals(Arrays.asList(), collector.getFirstRecord().get("/unknownField"));
      expected = Arrays.asList("en-us", "us", "en", "http://A", "http://B", "en-gb", "gb");
      assertEquals(expected, collector.getFirstRecord().get("/name"));
    }
   
    ingestAndVerifyAvro(documentSchema, document0);
    ingestAndVerifyAvro(documentSchema, document0, document1);
   
    Record event = new Record();
    event.getFields().put(Fields.ATTACHMENT_BODY, document0);
    morphline = createMorphline("test-morphlines/extractAvroTree");
    deleteAllDocuments();
//    System.out.println(document0);
    assertTrue(load(event));
    assertEquals(1, queryResultSetSize("*:*"));
    Record first = collector.getFirstRecord();
    assertEquals(Arrays.asList("us", "gb"), first.get("/name/language/country"));
    assertEquals(Arrays.asList("en-us", "en", "en-gb"), first.get("/name/language/code"));
    assertEquals(Arrays.asList(20, 40, 60), first.get("/links/forward"));
    assertEquals(Arrays.asList("http://A", "http://B"), first.get("/name/url"));
    assertEquals(Arrays.asList(10), first.get("/docId"));
    AbstractParser.removeAttachments(first);
    assertEquals(5, first.getFields().asMap().size());

    {
      morphline = createMorphline("test-morphlines/toAvro");
      Record jdoc1 = new Record();    
      jdoc1.put("_dataset_descriptor_schema", documentSchema);
      jdoc1.put("docId", 20);
      jdoc1.put("links",
          ImmutableMap.of(
            "backward", Arrays.asList(10, 30),
            "forward", Arrays.asList(80))
      );
      jdoc1.getFields().putAll("name",
          Arrays.asList(
            ImmutableMap.of
              "language", Arrays.asList(),
              "url", "http://C"))
      );
      Record expect1 = jdoc1.copy();
      expect1.put(Fields.ATTACHMENT_BODY, document1);
      processAndVerifySuccess(jdoc1, expect1, false);
 
      Record jdoc0 = new Record();    
      jdoc0.put("_dataset_descriptor_schema", documentSchema);
      jdoc0.put("docId", 10);
      jdoc0.put("links",
          ImmutableMap.of(
            "backward", Arrays.asList(),
            "forward", Arrays.asList(20, 40, 60))
      );
     
      jdoc0.getFields().putAll("name",
          Arrays.asList(
            ImmutableMap.of
              "language", new ArrayList(Arrays.asList(
                  ImmutableMap.of("code", "en-us", "country", "us"),
                  ImmutableMap.of("code", "en"))),
              "url", "http://A"),
            ImmutableMap.of
              "language", Arrays.asList(),
              "url", "http://B"),
            ImmutableMap.of
              "language", new ArrayList(Arrays.asList(
                  ImmutableMap.of("code", "en-gb", "country", "gb")))
               )
          )
      );
      Record expect0 = jdoc0.copy();
      expect0.put(Fields.ATTACHMENT_BODY, document0);
      processAndVerifySuccess(jdoc0, expect0, false);
    }
  }
View Full Code Here

    map.put(utf8("foo"), mapRecord);
    document0.put("mapField", map);

    morphline = createMorphline("test-morphlines/extractAvroPaths");       
    deleteAllDocuments();
    Record record = new Record();
    record.put(Fields.ATTACHMENT_BODY, document0);
    startSession();
//    System.out.println(schema.toString(true));
//    System.out.println(document0.toString());
    assertTrue(morphline.process(record));
    assertEquals(1, collector.getRecords().size());
    assertEquals(Arrays.asList("nadja"), collector.getFirstRecord().get("/mapField/foo/label"));
    assertEquals(Arrays.asList(), collector.getFirstRecord().get("/unknownField"));

    morphline = createMorphline("test-morphlines/extractAvroPathsFlattened");       
    deleteAllDocuments();
    record = new Record();
    record.put(Fields.ATTACHMENT_BODY, document0);
    startSession();
//      System.out.println(documentSchema.toString(true));
//      System.out.println(document0.toString());
    assertTrue(morphline.process(record));
    assertEquals(1, collector.getRecords().size());
    assertEquals(Arrays.asList("nadja"), collector.getFirstRecord().get("/mapField/foo/label"));
    assertEquals(Arrays.asList(), collector.getFirstRecord().get("/unknownField"));
   
    ingestAndVerifyAvro(schema, document0);
   
    Record event = new Record();
    event.getFields().put(Fields.ATTACHMENT_BODY, document0);
    morphline = createMorphline("test-morphlines/extractAvroTree");
    deleteAllDocuments();
    //System.out.println(document0);
    assertTrue(load(event));
    assertEquals(1, queryResultSetSize("*:*"));
    Record first = collector.getFirstRecord();
    assertEquals(Arrays.asList("nadja"), first.get("/mapField/foo/label"));
    AbstractParser.removeAttachments(first);
    assertEquals(1, first.getFields().asMap().size());

    {
      morphline = createMorphline("test-morphlines/toAvro");
      Record jdoc0 = new Record();    
      jdoc0.put("_dataset_descriptor_schema", schema);
      jdoc0.put("mapField", new HashMap(ImmutableMap.of(
          utf8("foo"), ImmutableMap.of("label", "nadja")
          ))
      );
      Record expect0 = jdoc0.copy();
      expect0.put(Fields.ATTACHMENT_BODY, document0);
      processAndVerifySuccess(jdoc0, expect0, false)
     
      // verify that multiple maps can't be converted to a non-array schema
      jdoc0 = new Record();    
      jdoc0.put("_dataset_descriptor_schema", schema);
      jdoc0.put("mapField", new HashMap(ImmutableMap.of(
          utf8("foo"), ImmutableMap.of("label", "nadja")
          ))
      );
      jdoc0.put("mapField", new HashMap(ImmutableMap.of(
          utf8("foo"), ImmutableMap.of("label", "nadja")
          ))
      );
      collector.reset();
      assertFalse(morphline.process(jdoc0));
     
      // verify that an exception is raised if a required field is missing
      jdoc0 = new Record();    
      jdoc0.put("_dataset_descriptor_schema", schema);
      jdoc0.put("mapField", new HashMap(ImmutableMap.of(
          utf8("foo"), ImmutableMap.of()
          ))
      );
      collector.reset();
      assertFalse(morphline.process(jdoc0));
     
      // verify that default field is used if value is missing
      Schema schema2 = new Parser().parse(new File("src/test/resources/test-avro-schemas/intero2.avsc"));
      jdoc0 = new Record();    
      jdoc0.put("_dataset_descriptor_schema", schema2);
      jdoc0.put("mapField", new HashMap(ImmutableMap.of(
          utf8("foo"), ImmutableMap.of()
          ))
      );
View Full Code Here

      assertEquals(record, record2);
    }
    assertFalse(reader.hasNext());
    reader.close();

    Record event = new Record();
    event.getFields().put(Fields.ATTACHMENT_BODY, new ByteArrayInputStream(bout.toByteArray()));
    morphline = createMorphline("test-morphlines/readAvroContainer");
    deleteAllDocuments();
    assertTrue(load(event));
    assertEquals(records.length, queryResultSetSize("*:*"));
       
    GenericDatumWriter datumWriter = new GenericDatumWriter(schema);
    bout = new ByteArrayOutputStream();
    Encoder encoder = EncoderFactory.get().binaryEncoder(bout, null);
    for (GenericData.Record record : records) {
      datumWriter.write(record, encoder);
    }
    encoder.flush();

    Decoder decoder = DecoderFactory.get().binaryDecoder(new ByteArrayInputStream(bout.toByteArray()), null);
    DatumReader<GenericData.Record> datumReader = new GenericDatumReader<GenericData.Record>(schema);
    for (int i = 0; i < records.length; i++) {
      GenericData.Record record3 = datumReader.read(null, decoder);
      assertEquals(records[i], record3);
    }
   
    event = new Record();
    event.getFields().put(Fields.ATTACHMENT_BODY, new ByteArrayInputStream(bout.toByteArray()));
    File tmp = new File("target/tmp-test-schema.avsc");
    try {
      tmp.deleteOnExit();
      Files.write(schema.toString(true), tmp, Charsets.UTF_8);
      morphline = createMorphline("test-morphlines/readAvroWithExternalSchema");
      deleteAllDocuments();   
      assertTrue(load(event));
      assertEquals(records.length, queryResultSetSize("*:*"));
    } finally {
      tmp.delete();
    }
       
    for (GenericData.Record record : records) {
      event = new Record();
      event.getFields().put(Fields.ATTACHMENT_BODY, record);
      morphline = createMorphline("test-morphlines/extractAvroTree");
      deleteAllDocuments();
      assertTrue(load(event));
      assertEquals(1, queryResultSetSize("*:*"));
    }
   
    String[] formats = new String[] {"", "AndSnappy"};
    for (String format : formats) {
      morphline = createMorphline("test-morphlines/writeAvroToByteArrayWithContainer" + format);
      event = new Record();
      event.getFields().putAll(Fields.ATTACHMENT_BODY, Arrays.asList(records));
      deleteAllDocuments();
      assertTrue(load(event));
      assertEquals(1, collector.getFirstRecord().get(Fields.ATTACHMENT_BODY).size());
      byte[] bytes = (byte[]) collector.getFirstRecord().getFirstValue(Fields.ATTACHMENT_BODY);
      assertNotNull(bytes);
      reader = new DataFileReader(new ReadAvroContainerBuilder.ForwardOnlySeekableInputStream(new ByteArrayInputStream(bytes)), new GenericDatumReader());
      assertEquals("bar", new String(reader.getMeta("foo"), Charsets.UTF_8));
      assertEquals("Nadja", new String(reader.getMeta("firstName"), Charsets.UTF_8));
      assertEquals(schema, reader.getSchema());
      for (GenericData.Record record : records) {
        assertTrue(reader.hasNext());
        GenericData.Record record2 = reader.next();
        assertEquals(record, record2);
      }
      assertFalse(reader.hasNext());
      reader.close();
    }
   
    formats = new String[] {"Binary", "JSON"};
    for (String format : formats) {
      morphline = createMorphline("test-morphlines/writeAvroToByteArrayWithContainerless" + format);
      event = new Record();
      event.getFields().putAll(Fields.ATTACHMENT_BODY, Arrays.asList(records));
      deleteAllDocuments();
      assertTrue(load(event));
      assertEquals(1, collector.getFirstRecord().get(Fields.ATTACHMENT_BODY).size());
      byte[] bytes = (byte[]) collector.getFirstRecord().getFirstValue(Fields.ATTACHMENT_BODY);
      assertNotNull(bytes);
View Full Code Here

 
  private void runTweetContainer(String morphlineConfigFile, String[] fieldNames) throws Exception {
    File file = new File(RESOURCES_DIR + "/test-documents/sample-statuses-20120906-141433-medium.avro");
    morphline = createMorphline(morphlineConfigFile);   
    for (int j = 0; j < 3; j++) { // also test reuse of objects and low level avro buffers
      Record record = new Record();
      byte[] body = Files.toByteArray(file);   
      record.put(Fields.ATTACHMENT_BODY, body);
      collector.reset();
      startSession();
      Notifications.notifyBeginTransaction(morphline);
      assertTrue(morphline.process(record));
      assertEquals(1, collector.getNumStartEvents());
      assertEquals(2104, collector.getRecords().size());
     
      FileReader<GenericData.Record> reader = new DataFileReader(file, new GenericDatumReader());
      int i = 0;
      while (reader.hasNext()) {
        Record actual = collector.getRecords().get(i);
        GenericData.Record expected = reader.next();
        assertTweetEquals(expected, actual, fieldNames, i);
        i++;
      }   
      assertEquals(collector.getRecords().size(), i);
View Full Code Here

    }
    encoder.flush();

    morphline = createMorphline(morphlineConfigFile);
    for (int j = 0; j < 3; j++) { // also test reuse of objects and low level avro buffers
      Record record = new Record();
      record.put(Fields.ATTACHMENT_BODY, bout.toByteArray());
      collector.reset();
      startSession();
      Notifications.notifyBeginTransaction(morphline);
      assertTrue(morphline.process(record));
      assertEquals(1, collector.getNumStartEvents());
      assertEquals(2104, collector.getRecords().size());
     
      reader = new DataFileReader(file, new GenericDatumReader());
      int i = 0;
      while (reader.hasNext()) {
        Record actual = collector.getRecords().get(i);
        GenericData.Record expected = reader.next();
        assertTweetEquals(expected, actual, fieldNames, i);
        i++;
      }   
      assertEquals(collector.getRecords().size(), i);
View Full Code Here

    long start = System.currentTimeMillis();
    long duration = durationSecs * 1000;
    int iters = 0;
    while (System.currentTimeMillis() < start + duration) {
      Record record = new Record();
      record.put(Fields.ATTACHMENT_BODY, bytes);     
      collector.reset();
      startSession();
      assertEquals(1, collector.getNumStartEvents());
      assertTrue(morphline.process(record));
      iters++;
View Full Code Here

  private static Utf8 utf8(String str) {
    return new Utf8(str);
  }

  private String toString(GenericData.Record avroRecord) {
    Record record = new Record();
    for (Field field : avroRecord.getSchema().getFields()) {
      record.put(field.name(), avroRecord.get(field.pos()));
    }
    return record.toString(); // prints sorted by key for human readability
  }
View Full Code Here

TOP

Related Classes of com.cloudera.cdk.morphline.api.Record

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.