Package org.kitesdk.morphline.api

Examples of org.kitesdk.morphline.api.Record


    }
 
    @Override
    protected boolean doProcess(Record record) {
      for (Object body : record.get(Fields.ATTACHMENT_BODY)) {
        Record outputRecord = record.copy();
        AbstractParser.removeAttachments(outputRecord);
        String pathString = body.toString();
        Path path = new Path(pathString);
        InputStream in = null;
        try {
          try {
            FileSystem fs = path.getFileSystem(conf);
            in = fs.open(path);
            if (pathString.endsWith(".gz")) {
              in = new GZIPInputStream(in, 64 * 1024);
            }
            in = new BufferedInputStream(in);
            outputRecord.put(Fields.ATTACHMENT_BODY, in);
          } catch (IOException e) {
            throw new MorphlineRuntimeException(e);
          }
 
          // pass record to next command in chain:
View Full Code Here


    out.flush();
    out.close();
    Assert.assertTrue(fileSystem.exists(inputFile));

    Command morphline = createMorphline("test-morphlines/openHdfsFile");        
    Record record = new Record();
    record.put(Fields.ATTACHMENT_BODY, inputFile.toString());
    Assert.assertTrue(morphline.process(record));   
    Record expected = new Record();
    expected.put(Fields.MESSAGE, msg);
    Assert.assertEquals(expected, collector.getFirstRecord());
  }
View Full Code Here

 
    @Override
    protected boolean doProcess2(Record inputRecord, InputStream stream) throws SaxonApiException, XMLStreamException {
      incrementNumRecords();     
      for (Fragment fragment : fragments) {
        Record template = inputRecord.copy();
        removeAttachments(template);
        XdmNode document = parseXmlDocument(stream);
        LOG.trace("XQuery input document: {}", document);
        XQueryEvaluator evaluator = fragment.xQueryEvaluator;
        evaluator.setContextItem(document);
       
        int i = 0;
        for (XdmItem item : evaluator) {
          i++;
          if (LOG.isTraceEnabled()) {
            LOG.trace("XQuery result sequence item #{} is of class: {} with value: {}", new Object[] { i,
                item.getUnderlyingValue().getClass().getName(), item });
          }
          if (item.isAtomicValue()) {
            LOG.debug("Ignoring atomic value in result sequence: {}", item);
            continue;
          }
          XdmNode node = (XdmNode) item;
          Record outputRecord = template.copy();
          boolean isNonEmpty = addRecordValues(node, Axis.SELF, XdmNodeKind.ATTRIBUTE, outputRecord);
          isNonEmpty = addRecordValues(node, Axis.ATTRIBUTE, XdmNodeKind.ATTRIBUTE, outputRecord) || isNonEmpty;
          isNonEmpty = addRecordValues(node, Axis.CHILD, XdmNodeKind.ELEMENT, outputRecord) || isNonEmpty;
          if (isNonEmpty) { // pass record to next command in chain  
            if (!getChild().process(outputRecord)) {
View Full Code Here

    Assert.assertFalse(dst.exists());
    new File(cwd, fileName).mkdirs(); // will be auto deleted!
    Files.write("wrong msg", new File(new File(cwd, fileName), fileName), Charsets.UTF_8); // will be auto deleted!

    Command morphline = createMorphline("test-morphlines/testDownloadHdfsFile", inputFile, cwd);        
    Assert.assertTrue(morphline.process(new Record()));   
    Assert.assertEquals(msg, Files.toString(dst, Charsets.UTF_8));
    if (isDir) {
      FileUtil.fullyDelete(dst.getParentFile());
    } else {
      FileUtil.fullyDelete(dst);
    }
    Assert.assertTrue(fileSystem.exists(inputFile));
    Assert.assertTrue(FileUtil.fullyDelete(cwd));
   
    // verify that subsequent calls with same inputFile won't copy the file again (to prevent races)
    morphline = createMorphline("test-morphlines/downloadHdfsFile", inputFile, cwd);      
    Assert.assertTrue(morphline.process(new Record()));   
    Assert.assertFalse(dst.exists());
    Assert.assertTrue(morphline.process(new Record()));
    Assert.assertFalse(dst.exists());
    Assert.assertFalse(cwd.exists());
   
    Assert.assertTrue(fileSystem.delete(inputFile, true));
   
View Full Code Here

 
    @Override
    protected boolean doProcess2(Record inputRecord, InputStream stream) throws SaxonApiException, XMLStreamException {
      incrementNumRecords();     
      for (Fragment fragment : fragments) {
        Record outputRecord = inputRecord.copy();
        removeAttachments(outputRecord);  
        XdmNode document = parseXmlDocument(stream);
        LOG.trace("XSLT input document: {}", document);
        XsltTransformer evaluator = fragment.transformer;
        evaluator.setInitialContextNode(document);
View Full Code Here

        "readAvroParquetFileWithReaderSchema1",
        "readAvroParquetFileWithReaderSchemaExternal"
        )) {
      morphline = createMorphline("test-morphlines/" + configFile);
     
      Record morphlineRecord = new Record();
      morphlineRecord.put(ReadAvroParquetFileBuilder.FILE_UPLOAD_URL, file.toString());
      collector.reset();
     
      assertTrue(morphline.process(morphlineRecord));

      assertEquals(1, collector.getRecords().size());
View Full Code Here

    writer.write(record);
    writer.close();

    morphline = createMorphline("test-morphlines/readAvroParquetFileWithProjectionSubSchema");
   
    Record morphlineRecord = new Record();
    morphlineRecord.put(ReadAvroParquetFileBuilder.FILE_UPLOAD_URL, file.toString());
    collector.reset();
   
    assertTrue(morphline.process(morphlineRecord));

    assertEquals(1, collector.getRecords().size());
View Full Code Here

  public void doTest() throws Exception {
   
    waitForRecoveriesToFinish(false);
   
    morphline = parse("test-morphlines" + File.separator + "loadSolrBasic");   
    Record record = new Record();
    record.put(Fields.ID, "id0-innsbruck");
    record.put("text", "mytext");
    record.put("user_screen_name", "foo");
    record.put("first_name", "Nadja"); // will be sanitized
    startSession();
    assertEquals(1, collector.getNumStartEvents());
    Notifications.notifyBeginTransaction(morphline);
    assertTrue(morphline.process(record));
   
    record = new Record();
    record.put(Fields.ID, "id1-innsbruck");
    record.put("text", "mytext1");
    record.put("user_screen_name", "foo1");
    record.put("first_name", "Nadja1"); // will be sanitized
    assertTrue(morphline.process(record));
   
    Record expected = new Record();
    expected.put(Fields.ID, "id0-innsbruck");
    expected.put("text", "mytext");
    expected.put("user_screen_name", "foo");
    Iterator<Record> citer = collector.getRecords().iterator();
    assertEquals(expected, citer.next());
   
    Record expected2 = new Record();
    expected2.put(Fields.ID, "id1-innsbruck");
    expected2.put("text", "mytext1");
    expected2.put("user_screen_name", "foo1");
    assertEquals(expected2, citer.next());
   
    assertFalse(citer.hasNext());
   
    commit();
   
    QueryResponse rsp = cloudClient.query(new SolrQuery("*:*").setRows(100000).addSort(Fields.ID, SolrQuery.ORDER.asc));
    //System.out.println(rsp);
    Iterator<SolrDocument> iter = rsp.getResults().iterator();
    assertEquals(expected.getFields(), next(iter));
    assertEquals(expected2.getFields(), next(iter));
    assertFalse(iter.hasNext());
   
    Notifications.notifyRollbackTransaction(morphline);
    Notifications.notifyShutdown(morphline);
    cloudClient.shutdown();
View Full Code Here

    morphline = createMorphline("test-morphlines/rcFileMorphlineRow");
    String rcFileName = "testRCFileRowWise.rc";
    List<Record> expected = setupRCFile(rcFileName, NUM_RECORDS, NUM_COLUMNS,
        true);
    Path inputFile = dfs.makeQualified(new Path(testDirectory, rcFileName));
    Record input = new Record();
    input.put(Fields.ATTACHMENT_NAME, inputFile.toString());
    input.put(Fields.ATTACHMENT_BODY, readPath(inputFile));
    startSession();
    assertEquals(1, collector.getNumStartEvents());
    assertTrue(morphline.process(input));
    assertTrue(areFieldsEqual(expected, collector.getRecords(), NUM_COLUMNS,
        true));
View Full Code Here

    morphline = createMorphline("test-morphlines/rcFileMorphlineColumn");
    String rcFileName = "testRCFileColumnWise.rc";
    List<Record> expected = setupRCFile(rcFileName, NUM_RECORDS, NUM_COLUMNS,
        false);
    Path inputFile = dfs.makeQualified(new Path(testDirectory, rcFileName));
    Record input = new Record();
    input.put(Fields.ATTACHMENT_NAME, inputFile.toString());
    input.put(Fields.ATTACHMENT_BODY, readPath(inputFile));
    startSession();
    assertEquals(1, collector.getNumStartEvents());
    assertTrue(morphline.process(input));
    assertTrue(areFieldsEqual(expected, collector.getRecords(), NUM_COLUMNS,
        false));
View Full Code Here

TOP

Related Classes of org.kitesdk.morphline.api.Record

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.