// data is written to the staging dataset
final Dataset<GenericRecord> staging = repo.load("logs-staging");
final DatasetWriter<GenericRecord> writer = staging.newWriter();
// this is going to build our simple log records
final GenericRecordBuilder builder = new GenericRecordBuilder(
staging.getDescriptor().getSchema());
// generate timestamps 1 second apart starting... now
final Calendar now = Calendar.getInstance();
final long yesterday = now.getTimeInMillis() - DAY_IN_MILLIS;
try {
writer.open();
// generate 15,000 messages, each 5 seconds apart, starting 24 hours ago
// this is a little less than 24 hours worth of messages
for (int second : Ranges.closed(0, 15000).asSet(DiscreteDomains.integers())) {
LOG.info("Generating log message " + second);
builder.set("timestamp", yesterday + second * 5000);
builder.set("component", "GenerateSimpleLogs");
int level = rand.nextInt(LOG_LEVELS.length);
builder.set("level", LOG_LEVELS[level]);
builder.set("message", LOG_MESSAGES[level]);
writer.write(builder.build());
}
} finally {
writer.flush();
writer.close();
}