Package com.cloudera.cdk.morphline.api

Examples of com.cloudera.cdk.morphline.api.Record


    createRCFile(fileName, numRecords, maxColumns);
    List<Record> expected = Lists.newArrayList();
    if (rowWise) {
      // Row wise expected records
      for (int row = 0; row < numRecords; row++) {
        Record record = new Record();
        for (int column = 0; column < maxColumns; column++) {
          Text sampleText = new Text("ROW-NUM:" + row + ", COLUMN-NUM:"
              + column);
          record.put("field" + (column + 1), sampleText);
        }
        expected.add(record);
      }
    } else {
      // Column wise expected records
      for (int column = 0; column < maxColumns; column++) {
        for (int row = 0; row < numRecords; row++) {
          Record record = new Record();
          Text sampleText = new Text("ROW-NUM:" + row + ", COLUMN-NUM:"
              + column);
          record.put("field" + (column + 1), sampleText);
          expected.add(record);
        }
      }
    }
    return expected;
View Full Code Here


      return false;
    }

    if (rowWiseCheck) {
      for (int i = 0; i < actual.size(); i++) {
        Record currentExpected = expected.get(i);
        Record currentActual = actual.get(i);
        if (!areRecordColumnsEqual(currentActual, currentExpected, columnSize)) {
          return false;
        }
      }
    } else {
      for (int i = 0; i < columnSize; i++) {
        String fieldName = "field" + (i + 1);
        for (int j = 0; j < rowSize; j++) {
          Record currentExpected = expected.get((i * rowSize) + j);
          Record currentActual = actual.get((i * rowSize) + j);
          if (!isRecordColumnEqual(currentActual, currentExpected, fieldName)) {
            return false;
          }
        }
      }
View Full Code Here

    protected boolean doProcess(Record inputRecord) {
//      Preconditions.checkState(ReadAvroBuilder.AVRO_MEMORY_MIME_TYPE.equals(inputRecord.getFirstValue(Fields.ATTACHMENT_MIME_TYPE)));
      GenericContainer datum = (GenericContainer) inputRecord.getFirstValue(Fields.ATTACHMENT_BODY);
      Preconditions.checkNotNull(datum);
      Preconditions.checkNotNull(datum.getSchema());     
      Record outputRecord = inputRecord.copy();
     
      for (Map.Entry<String, Collection<String>> entry : stepMap.entrySet()) {
        String fieldName = entry.getKey();
        List<String> steps = (List<String>) entry.getValue();
        extractPath(datum, datum.getSchema(), fieldName, steps, outputRecord, 0);
View Full Code Here

    protected boolean doProcess(Record inputRecord) {
//      Preconditions.checkState(ReadAvroBuilder.AVRO_MEMORY_MIME_TYPE.equals(inputRecord.getFirstValue(Fields.ATTACHMENT_MIME_TYPE)));
      GenericContainer datum = (GenericContainer) inputRecord.getFirstValue(Fields.ATTACHMENT_BODY);
      Preconditions.checkNotNull(datum);
      Preconditions.checkNotNull(datum.getSchema());     
      Record outputRecord = inputRecord.copy();
     
      extractTree(datum, datum.getSchema(), outputRecord, outputFieldPrefix);
       
      // pass record to next command in chain:
      return getChild().process(outputRecord);
View Full Code Here

    }

    private boolean parseEntry(ArchiveInputStream archive, ArchiveEntry entry, EmbeddedExtractor extractor, Record record) {
      String name = entry.getName();
      if (archive.canReadEntryData(entry)) {
        Record entrydata = new Record(); // TODO: or pass myself?
        //Record entrydata = record.copy();
       
        // For detectors to work, we need a mark/reset supporting
        // InputStream, which ArchiveInputStream isn't, so wrap
        TemporaryResources tmp = new TemporaryResources();
View Full Code Here

        Preconditions.checkNotNull(schema);
      } else {
        schema = fixedSchema;
      }
     
      Record outputRecord = inputRecord.copy();
      AbstractParser.removeAttachments(outputRecord);
      IndexedRecord avroRecord = new GenericData.Record(schema);
     
      for (Field field : schema.getFields()) {
        String morphlineFieldName = mappings.get(field.name());
        if (morphlineFieldName == null) {
          morphlineFieldName = field.name();
        }
        List list = inputRecord.get(morphlineFieldName);
       
        Object avroResult = ERROR;
        if (field.schema().getType() == Schema.Type.ARRAY) {
          avroResult = toAvro(list, field);
        } else if (list.size() == 0) {
          try { // this will fail if there is no default value
            avroResult = ReflectData.get().getDefaultValue(field);
          } catch (AvroRuntimeException e) {
            avroResult = ERROR;
          }
        } else if (list.size() == 1) {
          avroResult = toAvro(list.get(0), field);
        }
       
        if (avroResult == ERROR) {
          LOG.debug("Cannot convert item: {} to schema: {}", list, schema);
          return false;         
        }
        avroRecord.put(field.pos(), avroResult);
      }

      outputRecord.put(Fields.ATTACHMENT_BODY, avroRecord);
       
      // pass record to next command in chain:
      return super.doProcess(outputRecord);
    }
View Full Code Here

      validateArguments();
    }
   
    @Override
    protected boolean doProcess(Record inputRecord) {     
      Record outputRecord = inputRecord.copy();
      AbstractParser.removeAttachments(outputRecord);
      ByteArrayOutputStream bout = new ByteArrayOutputStream(1024);
      if (format == Format.container) {
        writeContainer(inputRecord, bout);
      } else {
        writeContainerless(inputRecord, bout);
      }     
      outputRecord.put(Fields.ATTACHMENT_BODY, bout.toByteArray());
       
      // pass record to next command in chain:
      return super.doProcess(outputRecord);
    }
View Full Code Here

      }
    }
   
    @Override
    protected boolean doProcess(Record inputRecord, InputStream in) throws IOException {
      Record template = inputRecord.copy();
      removeAttachments(template);
      template.put(Fields.ATTACHMENT_MIME_TYPE, ReadAvroBuilder.AVRO_MEMORY_MIME_TYPE);
      Decoder decoder = prepare(in);
      try {
        while (true) {
          GenericContainer datum = datumReader.read(null, decoder);
          if (!extract(datum, template)) {
View Full Code Here

    Schema schema = new Parser().parse(new File("src/test/resources/test-avro-schemas/interop.avsc"));
    morphline = createMorphline("test-morphlines/toAvroWithSchemaFile");
   
    byte[] bytes = new byte[] {47, 13};
    byte[] fixed = new byte[16];
    Record jdoc1 = new Record();    
    jdoc1.put("_dataset_descriptor_schema", schema);
    collector.reset();
    assertFalse(morphline.process(jdoc1)); // "has no default value"

    jdoc1.put("intField", "notAnInteger");
    collector.reset();
    assertFalse(morphline.process(jdoc1)); // can't convert

    jdoc1.replaceValues("intField", "20");
    jdoc1.put("longField", "200");
    jdoc1.put("stringField", "abc");
    jdoc1.put("boolField", "true");
    jdoc1.put("floatField", "200");
    jdoc1.put("doubleField","200");
    jdoc1.put("bytesField", bytes);
    jdoc1.put("nullField", null);
    jdoc1.getFields().putAll("arrayField", Arrays.asList(10.0, 20.0));
    jdoc1.put("mapField",
        new HashMap(ImmutableMap.of("myMap",
          ImmutableMap.of("label", "car")
        ))
    );
    jdoc1.put("unionField", new ArrayList(Arrays.asList(bytes)));
    jdoc1.put("enumField", "B");
    jdoc1.put("fixedField", fixed);
    jdoc1.put("recordField",
        ImmutableMap.of
            "label", "house",
            "children", new ArrayList(Arrays.asList(bytes)))
    );   
    collector.reset();
View Full Code Here

        if (resolver == null) {
          resolver = createResolver(datumReader.getSchema(), datumReader.getExpected());
          resolverCache.put(writerSchemaKey, resolver);
          datumReader.setResolver(resolver);
        }
        Record template = inputRecord.copy();
        removeAttachments(template);
        template.put(Fields.ATTACHMENT_MIME_TYPE, ReadAvroBuilder.AVRO_MEMORY_MIME_TYPE);
        while (reader.hasNext()) {
          GenericContainer datum = reader.next();
          if (!extract(datum, template)) {
            return false;
          }
View Full Code Here

TOP

Related Classes of com.cloudera.cdk.morphline.api.Record

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.