Package com.cloudera.cdk.morphline.api

Examples of com.cloudera.cdk.morphline.api.Record


        if (includeMetaData) {
          sequenceFileMetaData = reader.getMetadata();
        }
        Class keyClass = reader.getKeyClass();
        Class valueClass = reader.getValueClass();
        Record template = inputRecord.copy();
        removeAttachments(template);
       
        while (true) {
          Writable key = (Writable)ReflectionUtils.newInstance(keyClass, conf);
          Writable val = (Writable)ReflectionUtils.newInstance(valueClass, conf);
          try {
            if (!reader.next(key, val)) {
              break;
            }
          } catch (EOFException ex) {
            // SequenceFile.Reader will throw an EOFException after reading
            // all the data, if it doesn't know the length.  Since we are
            // passing in an InputStream, we hit this case;
            LOG.trace("Received expected EOFException", ex);
            break;
          }
          incrementNumRecords();
          Record outputRecord = template.copy();
          outputRecord.put(keyField, key);
          outputRecord.put(valueField, val);
          outputRecord.put(Fields.ATTACHMENT_MIME_TYPE, OUTPUT_MEDIA_TYPE);
          if (includeMetaData && sequenceFileMetaData != null) {
            outputRecord.put(SEQUENCE_FILE_META_DATA, sequenceFileMetaData);
          }
         
          // pass record to next command in chain:
          if (!getChild().process(outputRecord)) {
            return false;
View Full Code Here


              break;
            }
            rootNode = iter.next();
          }
       
          Record doc = new Record();
          JsonNode user = rootNode.get("user");
          JsonNode idNode = rootNode.get("id_str");
          if (idNode == null || idNode.textValue() == null) {
            continue; // skip
          }
     
          doc.put("id", idPrefix + idNode.textValue());
          tryAddDate(doc, "created_at", rootNode.get("created_at"));         
          tryAddString(doc, "source", rootNode.get("source"));
          tryAddString(doc, "text", rootNode.get("text"));
          tryAddInt(doc, "retweet_count", rootNode.get("retweet_count"));
          tryAddBool(doc, "retweeted", rootNode.get("retweeted"));
View Full Code Here

  }

  @Test
  public void testReadJsonTweets() throws Exception {
    morphline = createMorphline("test-morphlines/readJsonTweets");   
    Record record = new Record();
    record.put(Fields.ATTACHMENT_BODY, getInputStream("non-length-delimited-20130430-234145-tweets.json.gz"));
    record.put(Fields.ATTACHMENT_NAME, "non-length-delimited-20130430-234145-tweets.json.gz");
    startSession();
    assertTrue(morphline.process(record));
    assertEquals(1, collector.getNumStartEvents());
    Iterator<Record> iter = collector.getRecords().iterator();
    assertEquals(Arrays.asList("1985-09-04T18:01:01Z"), iter.next().get("created_at"));
View Full Code Here

  }
   
  @Test
  public void testReadJsonTweetsLengthDelimited() throws Exception {
    morphline = createMorphline("test-morphlines/readJsonTweetsLengthDelimited");   
    Record record = new Record();   
    record.put(Fields.ATTACHMENT_BODY, getInputStream("sample-statuses-20120906-141433"));
    startSession();
    assertTrue(morphline.process(record));
    assertEquals(1, collector.getNumStartEvents());
    Iterator<Record> iter = collector.getRecords().iterator();
    assertEquals(Arrays.asList("1985-09-04T18:01:01Z"), iter.next().get("created_at"));
View Full Code Here

  }
 
  private void testDetectMimeTypesInternal(String configFile) throws Exception {
    // verify that Avro is classified as Avro 
    morphline = createMorphline(configFile);   
    Record record = new Record();   
    record.put(Fields.ATTACHMENT_BODY, Files.toByteArray(AVRO_FILE));
    startSession();
    morphline.process(record);
    assertEquals(AVRO_MIME_TYPE, collector.getFirstRecord().getFirstValue(Fields.ATTACHMENT_MIME_TYPE));

    // verify that JPG isnt' classified as JPG because this morphline uses includeDefaultMimeTypes : false
    collector.reset();
    record = new Record();   
    record.put(Fields.ATTACHMENT_BODY, Files.toByteArray(JPG_FILE));
    startSession();
    morphline.process(record);
    assertEquals("application/octet-stream", collector.getFirstRecord().getFirstValue(Fields.ATTACHMENT_MIME_TYPE));
  }
View Full Code Here

  }
 
  @Test
  public void testDetectMimeTypesWithDefaultMimeTypes() throws Exception {
    morphline = createMorphline("test-morphlines/detectMimeTypesWithDefaultMimeTypes");   
    Record record = new Record();   
    record.put(Fields.ATTACHMENT_BODY, Files.toByteArray(JPG_FILE));
    startSession();
    morphline.process(record);
    assertEquals("image/jpeg", collector.getFirstRecord().getFirstValue(Fields.ATTACHMENT_MIME_TYPE));
  }
View Full Code Here

  }

  @Test
  public void testMimeTypeAlreadySpecifiedOnInputRemainsUnchanged() throws Exception {
    morphline = createMorphline("test-morphlines/detectMimeTypesWithDefaultMimeTypes");   
    Record record = new Record();   
    record.put(Fields.ATTACHMENT_BODY, Files.toByteArray(JPG_FILE));
    record.put(Fields.ATTACHMENT_MIME_TYPE, "foo/bar");
    startSession();
    morphline.process(record);
    assertEquals("foo/bar", collector.getFirstRecord().getFirstValue(Fields.ATTACHMENT_MIME_TYPE));
  }
View Full Code Here

    assertEquals("foo/bar", collector.getFirstRecord().getFirstValue(Fields.ATTACHMENT_MIME_TYPE));
  }

  @Test
  public void testPlainText() throws Exception {
    Record event = createEvent("foo".getBytes("UTF-8"));
    assertEquals("text/plain", detect(event, false));
  }
View Full Code Here

    assertEquals("text/plain", detect(event, false));
  }

  @Test
  public void testUnknownType() throws Exception {   
    Record event = createEvent(new byte[] {3, 4, 5, 6});
    assertEquals("application/octet-stream", detect(event, false));
  }
View Full Code Here

    assertEquals("application/octet-stream", detect(event, false));
  }

  @Test
  public void testUnknownEmptyType() throws Exception {   
    Record event = createEvent(new byte[0]);
    assertEquals("application/octet-stream", detect(event, false));
  }
View Full Code Here

TOP

Related Classes of com.cloudera.cdk.morphline.api.Record

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.