LOG.info(String.format("Starting deduplication of a snapshot with %d tasks.", numInputTasks));
Snapshot partialSnapshot = snapshot.deepCopy();
partialSnapshot.unsetTasks();
DeduplicatedSnapshot deduplicatedSnapshot = new DeduplicatedSnapshot()
.setPartialSnapshot(partialSnapshot);
// Nothing to do if we don't have any input tasks.
if (!snapshot.isSetTasks()) {
LOG.warning("Got snapshot with unset tasks field.");
return deduplicatedSnapshot;
}
// Match each unique TaskConfig to its hopefully-multiple ScheduledTask owners.
ListMultimap<TaskConfig, ScheduledTask> index = Multimaps.index(
snapshot.getTasks(),
SCHEDULED_TO_CONFIG);
for (Entry<TaskConfig, List<ScheduledTask>> entry : Multimaps.asMap(index).entrySet()) {
deduplicatedSnapshot.addToTaskConfigs(entry.getKey());
for (ScheduledTask scheduledTask : entry.getValue()) {
deduplicatedSnapshot.addToPartialTasks(new DeduplicatedScheduledTask()
.setPartialScheduledTask(deepCopyWithoutTaskConfig(scheduledTask))
.setTaskConfigId(deduplicatedSnapshot.getTaskConfigsSize() - 1));
}
}
int numOutputTasks = deduplicatedSnapshot.getTaskConfigsSize();
LOG.info(String.format(
"Finished deduplicating snapshot. Deduplication ratio: %d/%d = %.2f%%.",
numInputTasks,
numOutputTasks,