boolean normalExit = true;
// Set up firehose
final Period intermediatePersistPeriod = spec.getTuningConfig().getIntermediatePersistPeriod();
final Firehose firehose = spec.getIOConfig().getFirehoseFactory().connect(spec.getDataSchema().getParser());
// It would be nice to get the PlumberSchool in the constructor. Although that will need jackson injectables for
// stuff like the ServerView, which seems kind of odd? Perhaps revisit this when Guice has been introduced.
final SegmentPublisher segmentPublisher = new TaskActionSegmentPublisher(this, toolbox);
// NOTE: We talk to the coordinator in various places in the plumber and we could be more robust to issues
// with the coordinator. Right now, we'll block/throw in whatever thread triggered the coordinator behavior,
// which will typically be either the main data processing loop or the persist thread.
// Wrap default DataSegmentAnnouncer such that we unlock intervals as we unannounce segments
final DataSegmentAnnouncer lockingSegmentAnnouncer = new DataSegmentAnnouncer()
{
@Override
public void announceSegment(final DataSegment segment) throws IOException
{
// Side effect: Calling announceSegment causes a lock to be acquired
toolbox.getTaskActionClient().submit(new LockAcquireAction(segment.getInterval()));
toolbox.getSegmentAnnouncer().announceSegment(segment);
}
@Override
public void unannounceSegment(final DataSegment segment) throws IOException
{
try {
toolbox.getSegmentAnnouncer().unannounceSegment(segment);
}
finally {
toolbox.getTaskActionClient().submit(new LockReleaseAction(segment.getInterval()));
}
}
@Override
public void announceSegments(Iterable<DataSegment> segments) throws IOException
{
// Side effect: Calling announceSegments causes locks to be acquired
for (DataSegment segment : segments) {
toolbox.getTaskActionClient().submit(new LockAcquireAction(segment.getInterval()));
}
toolbox.getSegmentAnnouncer().announceSegments(segments);
}
@Override
public void unannounceSegments(Iterable<DataSegment> segments) throws IOException
{
try {
toolbox.getSegmentAnnouncer().unannounceSegments(segments);
}
finally {
for (DataSegment segment : segments) {
toolbox.getTaskActionClient().submit(new LockReleaseAction(segment.getInterval()));
}
}
}
};
// NOTE: getVersion will block if there is lock contention, which will block plumber.getSink
// NOTE: (and thus the firehose)
// Shouldn't usually happen, since we don't expect people to submit tasks that intersect with the
// realtime window, but if they do it can be problematic. If we decide to care, we can use more threads in
// the plumber such that waiting for the coordinator doesn't block data processing.
final VersioningPolicy versioningPolicy = new VersioningPolicy()
{
@Override
public String getVersion(final Interval interval)
{
try {
// Side effect: Calling getVersion causes a lock to be acquired
final TaskLock myLock = toolbox.getTaskActionClient()
.submit(new LockAcquireAction(interval));
return myLock.getVersion();
}
catch (IOException e) {
throw Throwables.propagate(e);
}
}
};
DataSchema dataSchema = spec.getDataSchema();
RealtimeIOConfig realtimeIOConfig = spec.getIOConfig();
RealtimeTuningConfig tuningConfig = spec.getTuningConfig()
.withBasePersistDirectory(new File(toolbox.getTaskWorkDir(), "persist"))
.withVersioningPolicy(versioningPolicy);
final FireDepartment fireDepartment = new FireDepartment(
dataSchema,
realtimeIOConfig,
tuningConfig,
null,
null,
null,
null
);
final RealtimeMetricsMonitor metricsMonitor = new RealtimeMetricsMonitor(ImmutableList.of(fireDepartment));
this.queryRunnerFactoryConglomerate = toolbox.getQueryRunnerFactoryConglomerate();
// NOTE: This pusher selects path based purely on global configuration and the DataSegment, which means
// NOTE: that redundant realtime tasks will upload to the same location. This can cause index.zip and
// NOTE: descriptor.json to mismatch, or it can cause historical nodes to load different instances of the
// NOTE: "same" segment.
final RealtimePlumberSchool plumberSchool = new RealtimePlumberSchool(
toolbox.getEmitter(),
toolbox.getQueryRunnerFactoryConglomerate(),
toolbox.getSegmentPusher(),
lockingSegmentAnnouncer,
segmentPublisher,
toolbox.getNewSegmentServerView(),
toolbox.getQueryExecutorService(),
null,
null,
null,
null,
null,
null,
0
);
this.plumber = plumberSchool.findPlumber(dataSchema, tuningConfig, fireDepartment.getMetrics());
try {
plumber.startJob();
// Set up metrics emission
toolbox.getMonitorScheduler().addMonitor(metricsMonitor);
// Time to read data!
long nextFlush = new DateTime().plus(intermediatePersistPeriod).getMillis();
while (firehose.hasMore()) {
final InputRow inputRow;
try {
inputRow = firehose.nextRow();
if (inputRow == null) {
continue;
}
int currCount = plumber.add(inputRow);
if (currCount == -1) {
fireDepartment.getMetrics().incrementThrownAway();
log.debug("Throwing away event[%s]", inputRow);
if (System.currentTimeMillis() > nextFlush) {
plumber.persist(firehose.commit());
nextFlush = new DateTime().plus(intermediatePersistPeriod).getMillis();
}
continue;
}
fireDepartment.getMetrics().incrementProcessed();
if (currCount >= tuningConfig.getMaxRowsInMemory() || System.currentTimeMillis() > nextFlush) {
plumber.persist(firehose.commit());
nextFlush = new DateTime().plus(intermediatePersistPeriod).getMillis();
}
}
catch (ParseException e) {
log.warn(e, "unparseable line");
fireDepartment.getMetrics().incrementUnparseable();
}
}
}
catch (Throwable e) {
normalExit = false;
log.makeAlert(e, "Exception aborted realtime processing[%s]", dataSchema.getDataSource())
.emit();
throw e;
}
finally {
if (normalExit) {
try {
plumber.persist(firehose.commit());
plumber.finishJob();
}
catch (Exception e) {
log.makeAlert(e, "Failed to finish realtime task").emit();
}