Counter dupCounter = context.getCounter("app", "duplicate report");
int edgeCount = 0, dups = 0, numReports = 0;
for(Text rep_text: values) {
Report r = Report.createFromString(rep_text.toString());
numReports++;
if(numReports < MAX_IN_MEMORY_REPORTS) {
if(reports.containsKey(r.getMetadata().getOpIdString()))
dups++;
reports.put(r.getMetadata().getOpIdString(), r);
} else if(numReports == MAX_IN_MEMORY_REPORTS) {
//bail out, prepare to do an external sort.
return;
} else
;
// do the external sort
}
reportCounter.increment(reports.size());
dupCounter.increment(dups);
CausalGraph g = new CausalGraph(reports);
PtrReverse reverser = new PtrReverse();
List<Report> sortedReports = g.topoSort(reverser);
int sortedLen = sortedReports.size();
if(sortedLen!= reports.size()) {
if(sortedLen > 0)
log.warn(taskIDString+": I only sorted " + sortedLen + " items, but expected "
+ reports.size()+", is your list cyclic?");
else
log.warn(taskIDString+": every event in graph has a predecessor; perhaps "
+ "the start event isn't in the input set?");
}
log.debug(taskIDString+": " + reverser.edgeCount + " total edges");
edgeCounter.increment(reverser.edgeCount);
badEdgeCounter.increment(reverser.badCount);
Text[] finalOutput = new Text[sortedReports.size()];
int i=0;
for(Report r:sortedReports)
finalOutput[i++] = new Text(r.toString());
TextArrayWritable out = new TextArrayWritable();
out.set(finalOutput);
context.write(taskID, out);
//Should sort values topologically and output list. or?