Package com.cloudera.cdk.morphline.api

Examples of com.cloudera.cdk.morphline.api.Record


      validateArguments();
    }
 
    @Override
    protected boolean doProcess(Record inputRecord, InputStream stream) throws IOException {
      Record template = inputRecord.copy();
      removeAttachments(template);
      Charset detectedCharset = detectCharset(inputRecord, charset)
      BufferedReader reader = new BufferedReader(
          new InputStreamReader(stream, detectedCharset), getBufferSize(stream));
      if (ignoreFirstLine) {
        reader.readLine();
      }     

      while (true) {
        Record outputRecord = template.copy();
        if (!readNext(reader, outputRecord)) {
          break;
        }
        incrementNumRecords();
       
View Full Code Here


    Assert.assertFalse(dst.exists());
    new File(cwd, fileName).mkdirs(); // will be auto deleted!
    Files.write("wrong msg", new File(new File(cwd, fileName), fileName), Charsets.UTF_8); // will be auto deleted!

    Command morphline = createMorphline("test-morphlines/testDownloadHdfsFile", inputFile, cwd);        
    Assert.assertTrue(morphline.process(new Record()));   
    Assert.assertEquals(msg, Files.toString(dst, Charsets.UTF_8));
    if (isDir) {
      FileUtil.fullyDelete(dst.getParentFile());
    } else {
      FileUtil.fullyDelete(dst);
    }
    Assert.assertTrue(fileSystem.exists(inputFile));
    Assert.assertTrue(FileUtil.fullyDelete(cwd));
   
    // verify that subsequent calls with same inputFile won't copy the file again (to prevent races)
    morphline = createMorphline("test-morphlines/downloadHdfsFile", inputFile, cwd);      
    Assert.assertTrue(morphline.process(new Record()));   
    Assert.assertFalse(dst.exists());
    Assert.assertTrue(morphline.process(new Record()));
    Assert.assertFalse(dst.exists());
    Assert.assertFalse(cwd.exists());
   
    Assert.assertTrue(fileSystem.delete(inputFile, true));
   
View Full Code Here

      validateArguments();
    }

    @Override
    protected boolean doProcess(Record inputRecord, InputStream stream) throws IOException {
      Record template = inputRecord.copy();
      removeAttachments(template);
      template.removeAll(Fields.MESSAGE);
      Charset detectedCharset = detectCharset(inputRecord, charset)
      Reader reader = new InputStreamReader(stream, detectedCharset);
      BufferedReader lineReader = new BufferedReader(reader, getBufferSize(stream));
      StringBuilder lines = null;
      String line;
     
      while ((line = lineReader.readLine()) != null) {
        if (lines == null) {
          lines = new StringBuilder(line);
        } else {
          boolean isMatch = regex.reset(line).matches();
          if (negate) {
            isMatch = !isMatch;
          }
          /*
          not match && previous --> do next
          not match && next     --> do previous
          match && previous     --> do previous
          match && next         --> do next            
          */
          boolean doPrevious = (what == What.previous);
          if (!isMatch) {
            doPrevious = !doPrevious;
          }
         
          if (doPrevious) { // do previous
            lines.append('\n');
            lines.append(line);
          } else {          // do next
            if (lines.length() > 0 && !flushRecord(template.copy(), lines.toString())) {
              return false;
            }
            lines.setLength(0);
            lines.append(line);             
          }
        }         
      }
      if (lines != null && lines.length() > 0) {
        return flushRecord(template.copy(), lines.toString());
      }
      return true;
    }
View Full Code Here

    waitForRecoveriesToFinish(false);
   
    createAlias("aliascollection", "collection1");
   
    morphline = parse("test-morphlines/loadSolrBasic", "aliascollection");
    Record record = new Record();
    record.put(Fields.ID, "id0-innsbruck");
    record.put("text", "mytext");
    record.put("user_screen_name", "foo");
    record.put("first_name", "Nadja"); // will be sanitized
    startSession();
    assertEquals(1, collector.getNumStartEvents());
    Notifications.notifyBeginTransaction(morphline);
    assertTrue(morphline.process(record));
   
    record = new Record();
    record.put(Fields.ID, "id1-innsbruck");
    record.put("text", "mytext1");
    record.put("user_screen_name", "foo1");
    record.put("first_name", "Nadja1"); // will be sanitized
    assertTrue(morphline.process(record));
   
    Record expected = new Record();
    expected.put(Fields.ID, "id0-innsbruck");
    expected.put("text", "mytext");
    expected.put("user_screen_name", "foo");
    Iterator<Record> citer = collector.getRecords().iterator();
    assertEquals(expected, citer.next());
   
    Record expected2 = new Record();
    expected2.put(Fields.ID, "id1-innsbruck");
    expected2.put("text", "mytext1");
    expected2.put("user_screen_name", "foo1");
    assertEquals(expected2, citer.next());
   
    assertFalse(citer.hasNext());
   
    commit();
   
    QueryResponse rsp = cloudClient.query(new SolrQuery("*:*").setRows(100000).addSort(Fields.ID, SolrQuery.ORDER.asc));
    //System.out.println(rsp);
    Iterator<SolrDocument> iter = rsp.getResults().iterator();
    assertEquals(expected.getFields(), next(iter));
    assertEquals(expected2.getFields(), next(iter));
    assertFalse(iter.hasNext());
   
    Notifications.notifyRollbackTransaction(morphline);
    Notifications.notifyShutdown(morphline);
   
View Full Code Here

  @Test
  public void testConvertHTML() throws Exception {
    morphline = createMorphline("test-morphlines/convertHTML");   
    InputStream in = new FileInputStream(new File(RESOURCES_DIR + "/test-documents/helloworld.html"));
    Record record = new Record();
    record.put("id", "123");
    record.put(Fields.ATTACHMENT_BODY, in);
    String expected = Files.toString(new File(RESOURCES_DIR + "/test-documents/convertHTML-expected-output.xml"), Charsets.UTF_8);
    processAndVerifySuccess(record,
        ImmutableMultimap.of("id", "123", Fields.MESSAGE, expected)
        );   
    in.close();
View Full Code Here

  @Test
  public void testConvertHTMLBlog() throws Exception {
    morphline = createMorphline("test-morphlines/convertHTML");   
    InputStream in = new FileInputStream(new File(RESOURCES_DIR + "/test-documents/blog.html"));
    Record record = new Record();
    record.put("id", "123");
    record.put(Fields.ATTACHMENT_BODY, in);
    String expected = Files.toString(new File(RESOURCES_DIR + "/test-documents/convertHTMLBlog-expected-output.xml"), Charsets.UTF_8);
    processAndVerifySuccess(record,
        ImmutableMultimap.of("id", "123", Fields.MESSAGE, expected)
        );   
    in.close();
View Full Code Here

  @Test
  public void testConvertHTMLBlogThenRunXSQLT() throws Exception {
    morphline = createMorphline("test-morphlines/convertHTMLBlogThenRunXSLT");   
    byte[] bytes = Files.toByteArray(new File(RESOURCES_DIR + "/test-documents/blog.html"));
    Record record = new Record();
    record.put("id", "123");
    record.put(Fields.ATTACHMENT_BODY, bytes);
    for (int i = 0; i < 3; i++) {
      assertTrue(morphline.process(record.copy())); // TODO check details
    }
  } 
View Full Code Here

  @Test
  public void testConvertHTMLAndExtractLinks() throws Exception {
    morphline = createMorphline("test-morphlines/convertHTMLandExtractLinks");   
    InputStream in = new FileInputStream(new File(RESOURCES_DIR + "/test-documents/helloworld.html"));
    Record record = new Record();
    record.put("id", "123");
    record.put(Fields.ATTACHMENT_BODY, in);
    processAndVerifySuccess(record,
        ImmutableMultimap.of("id", "123", "a", "Visit Foo!", "myhref", "http://www.foo.com/", "mytarget", "_foo"),
        ImmutableMultimap.of("id", "123", "a", "Visit Bar!", "myhref", "http://www.bar.com/")
        );   
    in.close();
View Full Code Here

              Collector myCollector = new Collector();
              Command myMorphline = new PipeBuilder().build(config, null, myCollector, ctx);
             
              long start = System.currentTimeMillis();
              while (System.currentTimeMillis() < start + durationMillis) {
                Record record = new Record();
                record.put("id", "123");
                record.put(Fields.ATTACHMENT_BODY, bytes);
                for (int i = 0; i < 3; i++) {
                  processAndVerifySuccess(myMorphline, myCollector, record.copy(),
                      ImmutableMultimap.of("id", "123", Fields.MESSAGE, expected)
                      );   
                }
                iters++;
                //break;
View Full Code Here

      Path attachmentPath = getAttachmentPath(record);
      SingleStreamFileSystem fs = new SingleStreamFileSystem(in, attachmentPath);
      RCFile.Reader reader = null;
      try {
        reader = new RCFile.Reader(fs, attachmentPath, conf);
        Record template = record.copy();
        removeAttachments(template);
        template.put(Fields.ATTACHMENT_MIME_TYPE, OUTPUT_MEDIA_TYPE);
        if (includeMetaData) {
          SequenceFile.Metadata metadata = reader.getMetadata();
          if (metadata != null) {
            template.put(RC_FILE_META_DATA, metadata);
          }
        }
       
        switch (readMode) {
        case row:
View Full Code Here

TOP

Related Classes of com.cloudera.cdk.morphline.api.Record

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.