public void run() {
if (this.location == null || this.location.length() <= 0) {
return;
}
ArchiveReader arc = getArchiveReader();
if (arc == null) {
return;
}
try {
ARCMapRunner.this.mapper.onARCOpen();
this.reporter.incrCounter(Counter.ARCS_COUNT, 1);
// Iterate over each ARCRecord.
for (final Iterator i = arc.iterator();
i.hasNext() && !currentThread().isInterrupted();) {
final ARCRecord rec = (ARCRecord)i.next();
this.reporter.incrCounter(Counter.ARCRECORDS_COUNT, 1);
try {
ARCMapRunner.this.mapper.map(
new Text(rec.getMetaData().getUrl()),
new ObjectWritable(rec), this.output,
this.reporter);
final long b = rec.getMetaData().getContentBegin();
final long l = rec.getMetaData().getLength();
final long recordLength = (l > b)? (l - b): l;
if (recordLength >
ARCConstants.DEFAULT_MAX_ARC_FILE_SIZE) {
// Now, if the content length is larger than a
// standard ARC, then it is most likely the last
// record in the ARC because ARC is closed after we
// exceed 100MB (DEFAULT_MAX_ARC...). Calling
// hasNext above will make us read through the
// whole record, even if its a 1.7G video. On a
// loaded machine, this might cause us timeout with
// tasktracker -- so, just skip out here.
this.reporter.setStatus("skipping " +
this.location + " -- very long record " +
rec.getMetaData());
this.reporter.
incrCounter(Counter.LONG_ARCRECORDS_COUNT, 1);
break;
}
} catch (final Throwable e) {
// Failed parse of record. Keep going.
LOG.warn("Error processing " + rec.getMetaData(), e);
}
}
if (currentThread().isInterrupted()) {
LOG.info(currentThread().getName() + " interrupted");
}
this.reporter.setStatus("closing " + this.location, true);
} catch (final Throwable e) {
// Problem parsing arc file.
this.reporter.incrCounter(Counter.BAD_ARC_PARSE_COUNT, 1);
final String msg = "Error parsing " + this.location;
//try {
this.reporter.setStatus(msg, true);
/* TODO MC - to be compitable with hadoop 0.14
} catch (final IOException ioe) {
ioe.printStackTrace();
}
*/
LOG.warn("ARCMapRunner - Throwable:"+ msg, e);
}
finally {
try {
arc.close();
ARCMapRunner.this.mapper.onARCClose();
} catch (final IOException e) {
e.printStackTrace();
}
}