Package com.cloudera.cdk.data

Examples of com.cloudera.cdk.data.DatasetRepository.load()


    final Dataset<GenericRecord> persistent = repo.load("logs");
    final DatasetWriter<GenericRecord> writer = persistent.newWriter();
    writer.open();

    // the source dataset: yesterday's partition in the staging area
    final Dataset<GenericRecord> staging = repo.load("logs-staging");
    final PartitionKey yesterday = getPartitionKey(staging, yesterdayTimestamp);
    final DatasetReader<GenericRecord> reader = staging
        .getPartition(yesterday, false).newReader();

    try {
View Full Code Here


    // open the repository
    final DatasetRepository repo = DatasetRepositories.open("repo:file:/tmp/data");

    // data is written to the staging dataset
    final Dataset<GenericRecord> staging = repo.load("logs-staging");
    final DatasetWriter<GenericRecord> writer = staging.newWriter();

    // this is going to build our simple log records
    final GenericRecordBuilder builder = new GenericRecordBuilder(
        staging.getDescriptor().getSchema());
View Full Code Here

    // Construct a local filesystem dataset repository rooted at /tmp/data
    DatasetRepository repo = DatasetRepositories.open("repo:hdfs:/tmp/data");

    // Load the events dataset
    Dataset<GenericRecord> events = repo.load("events");

    // Get a reader for the dataset and read all the events
    DatasetReader<GenericRecord> reader = events.newReader();
    try {
      reader.open();
View Full Code Here

    // Get a log4j logger
    Logger logger = Logger.getLogger(App.class);

    // Find the schema from the repository
    DatasetRepository repo = DatasetRepositories.open("repo:hdfs:/tmp/data");
    Schema schema = repo.load("events").getDescriptor().getSchema();

    // Build some events using the generic Avro API and log them using log4j
    GenericRecordBuilder builder = new GenericRecordBuilder(schema);
    for (long i = 0; i < 10; i++) {
      GenericRecord event = builder.set("id", i)
View Full Code Here

  @Override
  public int run(String[] args) throws Exception {
    DatasetRepository repo = DatasetRepositories.open("repo:hdfs://localhost:8020/user/cloudera");

    Dataset movies = repo.load("movies");
    DatasetReader reader = movies.newReader();
    try {
      reader.open();
      for (Object rec : reader) {
        System.err.println("Movie: " + rec);
View Full Code Here

          emitter.emit(pairSession.second());
        }
      }, Avros.specifics(Session.class));

    // Write the sessions to the "sessions" Dataset
    getPipeline().write(sessions, CrunchDatasets.asTarget(hcatRepo.load("sessions")),
        Target.WriteMode.APPEND);

    return run().succeeded() ? 0 : 1;
  }
View Full Code Here

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.