public void map(LongWritable key, WritableWarcRecord record,
OutputCollector<Text, BehemothDocument> output, Reporter reporter)
throws IOException {
WarcRecord wr = record.getRecord();
if (wr.getHeaderRecordType().equals("response") == false)
return;
byte[] binarycontent = wr.getContent();
String uri = wr.getHeaderMetadataItem("WARC-Target-URI");
// skip non http documents
if (uri.startsWith("http") == false)
return;
String ip = wr.getHeaderMetadataItem("WARC-IP-Address");
HttpResponse response;
try {
response = new HttpResponse(binarycontent);
} catch (ProtocolException e) {