Package com.cloudera.cdk.morphline.api

Examples of com.cloudera.cdk.morphline.api.Record


  @Test
  public void testLoadSolrBasic() throws Exception {
    //System.setProperty("ENV_SOLR_HOME", testSolrHome + "/collection1");
    morphline = createMorphline("test-morphlines/loadSolrBasic");   
    //System.clearProperty("ENV_SOLR_HOME");
    Record record = new Record();
    record.put(Fields.ID, "id0");
    record.put("first_name", "Nadja"); // will be sanitized
    startSession();
    Notifications.notifyBeginTransaction(morphline);
    assertTrue(morphline.process(record));
    assertEquals(1, collector.getNumStartEvents());
    Notifications.notifyCommitTransaction(morphline);
    Record expected = new Record();
    expected.put(Fields.ID, "id0");
    assertEquals(Arrays.asList(expected), collector.getRecords());
    assertEquals(1, queryResultSetSize("*:*"));
    Notifications.notifyRollbackTransaction(morphline);
    Notifications.notifyShutdown(morphline);
  }
View Full Code Here


   
  @Test
  public void testTokenizeText() throws Exception {
    morphline = createMorphline("test-morphlines/tokenizeText");
    for (int i = 0; i < 3; i++) {
      Record record = new Record();
      record.put(Fields.MESSAGE, "Hello World!");
      record.put(Fields.MESSAGE, "\nFoo@Bar.com #%()123");
      Record expected = record.copy();
      expected.getFields().putAll("tokens", Arrays.asList("hello", "world", "foo", "bar.com", "123"));
      collector.reset();
      startSession();
      Notifications.notifyBeginTransaction(morphline);
      assertTrue(morphline.process(record));
      assertEquals(1, collector.getNumStartEvents());
View Full Code Here

      validateArguments();
    }
 
    @Override
    protected boolean doProcess(Record inputRecord, InputStream stream) throws IOException {
      Record template = inputRecord.copy();
      removeAttachments(template);
      template.removeAll(Fields.MESSAGE);
      Charset detectedCharset = detectCharset(inputRecord, charset)
      Reader reader = new InputStreamReader(stream, detectedCharset);
      BufferedReader lineReader = new BufferedReader(reader, getBufferSize(stream));
      boolean isFirst = true;
      String line;

      while ((line = lineReader.readLine()) != null) {
        if (isFirst && ignoreFirstLine) {
          isFirst = false;
          continue; // ignore first line
        }
        if (line.length() == 0) {
          continue; // ignore empty lines
        }
        if (commentPrefix != null && line.startsWith(commentPrefix)) {
          continue; // ignore comments
        }
        Record outputRecord = template.copy();
        outputRecord.put(Fields.MESSAGE, line);
        incrementNumRecords();
       
        // pass record to next command in chain:
        if (!getChild().process(outputRecord)) {
          return false;
View Full Code Here

  public static void notifyStartSession(Command command) {
    notify(command, LifecycleEvent.START_SESSION);
  }
 
  private static void notify(Command command, LifecycleEvent event) {
    Record notification = new Record();
    notification.put(LIFE_CYLCLE, event);
    command.notify(notification);
  }
View Full Code Here

   
    waitForRecoveriesToFinish(false);
   
    // load avro records via morphline and zk into solr
    morphline = parse("test-morphlines/tutorialReadAvroContainer");   
    Record record = new Record();
    byte[] body = Files.toByteArray(file);   
    record.put(Fields.ATTACHMENT_BODY, body);
    startSession();
    Notifications.notifyBeginTransaction(morphline);
    assertTrue(morphline.process(record));
    assertEquals(1, collector.getNumStartEvents());
   
    commit();
   
    // fetch sorted result set from solr
    QueryResponse rsp = cloudClient.query(new SolrQuery("*:*").setRows(100000).addSort("id", SolrQuery.ORDER.asc));  
    assertEquals(2104, collector.getRecords().size());
    assertEquals(collector.getRecords().size(), rsp.getResults().size());
   
    Collections.sort(collector.getRecords(), new Comparator<Record>() {
      @Override
      public int compare(Record r1, Record r2) {
        return r1.get("id").toString().compareTo(r2.get("id").toString());
      }     
    });  

    // fetch test input data and sort like solr result set
    List<GenericData.Record> records = new ArrayList();
    FileReader<GenericData.Record> reader = new DataFileReader(file, new GenericDatumReader());
    while (reader.hasNext()) {
      GenericData.Record expected = reader.next();
      records.add(expected);
    }
    assertEquals(collector.getRecords().size(), records.size());   
    Collections.sort(records, new Comparator<GenericData.Record>() {
      @Override
      public int compare(GenericData.Record r1, GenericData.Record r2) {
        return r1.get("id").toString().compareTo(r2.get("id").toString());
      }     
    });  
   
    Object lastId = null;
    for (int i = 0; i < records.size(); i++) { 
      //System.out.println("myrec" + i + ":" + records.get(i));     
      Object id = records.get(i);
      if (id != null && id.equals(lastId)) {
        throw new IllegalStateException("Detected duplicate id. Test input data must not contain duplicate ids!");       
      }
      lastId = id;
    }
   
    for (int i = 0; i < records.size(); i++) { 
      //System.out.println("myrsp" + i + ":" + rsp.getResults().get(i));     
    }   

    Iterator<SolrDocument> rspIter = rsp.getResults().iterator();
    for (int i = 0; i < records.size(); i++) { 
      // verify morphline spat out expected data
      Record actual = collector.getRecords().get(i);
      GenericData.Record expected = records.get(i);
      Preconditions.checkNotNull(expected);
      assertTweetEquals(expected, actual, i);
     
      // verify Solr result set contains expected data
      actual = new Record();
      actual.getFields().putAll(next(rspIter));
      assertTweetEquals(expected, actual, i);
    }
   
    Notifications.notifyRollbackTransaction(morphline);
    Notifications.notifyShutdown(morphline);
View Full Code Here

      clob.setLength(0);
      int len;
      while ((len = reader.read(buffer)) >= 0) {
        clob.append(buffer, 0, len);
      }
      Record outputRecord = inputRecord.copy();
      removeAttachments(outputRecord);
      outputRecord.replaceValues(Fields.MESSAGE, clob.toString());
       
      // pass record to next command in chain:
      return getChild().process(outputRecord);
    }
View Full Code Here

          actual.getFirstValue(fieldName).toString());
    }
  }

  private String toString(GenericData.Record avroRecord) {
    Record record = new Record();
    for (Field field : avroRecord.getSchema().getFields()) {
      record.put(field.name(), avroRecord.get(field.pos()));
    }
    return record.toString(); // prints sorted by key for human readability
  }
View Full Code Here

    }
 
    @Override
    protected boolean doProcess(Record record) {
      for (Command childRule : childRules) {
        Record copy = copyRecords ? record.copy() : record;
        if (!catchExceptions) {
          if (childRule.process(copy)) {
            return true; // rule was executed successfully; no need to try the other remaining rules
          }         
        } else {
View Full Code Here

    Notifications.notifyStartSession(morphline);
  }

  protected ListMultimap<String, Object> next(Iterator<SolrDocument> iter) {
    SolrDocument doc = iter.next();
    Record record = toRecord(doc);
    record.removeAll("_version_"); // the values of this field are unknown and internal to solr
    return record.getFields();   
  }
View Full Code Here

    record.removeAll("_version_"); // the values of this field are unknown and internal to solr
    return record.getFields();   
  }
 
  private Record toRecord(SolrDocument doc) {
    Record record = new Record();
    for (String key : doc.keySet()) {
      record.getFields().replaceValues(key, doc.getFieldValues(key));       
    }
    return record;
  }
View Full Code Here

TOP

Related Classes of com.cloudera.cdk.morphline.api.Record

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.