@Override
public TaskAttemptStateInternal transition(TaskAttemptImpl attempt,
TaskAttemptEvent event) {
TaskAttemptEventOutputFailed outputFailedEvent =
(TaskAttemptEventOutputFailed) event;
TezEvent tezEvent = outputFailedEvent.getInputFailedEvent();
TezTaskAttemptID failedDestTaId = tezEvent.getSourceInfo().getTaskAttemptID();
InputReadErrorEvent readErrorEvent = (InputReadErrorEvent)tezEvent.getEvent();
int failedInputIndexOnDestTa = readErrorEvent.getIndex();
if (readErrorEvent.getVersion() != attempt.getID().getId()) {
throw new TezUncheckedException(attempt.getID()
+ " incorrectly blamed for read error from " + failedDestTaId
+ " at inputIndex " + failedInputIndexOnDestTa + " version"
+ readErrorEvent.getVersion());
}
LOG.info(attempt.getID()
+ " blamed for read error from " + failedDestTaId
+ " at inputIndex " + failedInputIndexOnDestTa);
attempt.uniquefailedOutputReports.add(failedDestTaId);
float failureFraction = ((float) attempt.uniquefailedOutputReports.size())
/ outputFailedEvent.getConsumerTaskNumber();
// If needed we can also use the absolute number of reported output errors
// If needed we can launch a background task without failing this task
// to generate a copy of the output just in case.
// If needed we can consider only running consumer tasks
if (failureFraction <= MAX_ALLOWED_OUTPUT_FAILURES_FRACTION) {
return attempt.getInternalState();
}
String message = attempt.getID() + " being failed for too many output errors";
LOG.info(message);
attempt.addDiagnosticInfo(message);
// send input failed event
Vertex vertex = attempt.getVertex();
Map<Vertex, Edge> edges = vertex.getOutputVertices();
if (edges != null && !edges.isEmpty()) {
List<TezEvent> tezIfEvents = Lists.newArrayListWithCapacity(edges.size());
for (Vertex edgeVertex : edges.keySet()) {
tezIfEvents.add(new TezEvent(new InputFailedEvent(),
new EventMetaData(EventProducerConsumerType.SYSTEM,
vertex.getName(),
edgeVertex.getName(),
attempt.getID())));
}