public static enum LogCounters {
LOG_LINE_ERRORS
}
public static PCollection<CommonLogEntry> logs(PCollection<String> lines) {
PTypeFamily tf = lines.getTypeFamily();
return lines
.parallelDo(new DoFn<String, CommonLogEntry>() {
transient ApacheCommonLogReader logReader;
transient Logger log;
@Override
public void initialize() {
logReader = new ApacheCommonLogReader();
log = LoggerFactory.getLogger(CrunchUtils.class);
}
@Override
public void process(String input, Emitter<CommonLogEntry> emitter) {
try {
CommonLogEntry log = logReader.decodeLine(input);
if(log != null) {
emitter.emit(log);
} else {
processingError(input, null);
}
} catch (IOException e) {
processingError(input, e);
}
}
void processingError(String line, @Nullable Throwable t) {
super.getCounter(LogCounters.LOG_LINE_ERRORS).increment(1);
log.error("Hit exception parsing line '" + line + "'", t);
}
}, tf.records(CommonLogEntry.class));
}