Logger l = Logger.getLogger(writer.getClass().getName());
Level oldLevel = l.getLevel();
try {
l.setLevel(Level.WARNING);
for (final Iterator<ArchiveRecord> i = reader.iterator(); i.hasNext();) {
WARCRecord r = (WARCRecord)i.next();
if (!isARCType(r.getHeader().getMimetype())) {
continue;
}
if (r.getHeader().getContentBegin() <= 0) {
// Otherwise, because length include Header-Line and
// Named Fields, these will end up in the ARC unless there
// is a non-zero content begin.
continue;
}
String ip = (String)r.getHeader().
getHeaderValue((WARCConstants.HEADER_KEY_IP));
long length = r.getHeader().getLength();
int offset = r.getHeader().getContentBegin();
// This mimetype is not exactly what you'd expect to find in
// an ARC though technically its 'correct'. To get right one,
// need to parse the HTTP Headers. Thats messy. Not doing for
// now.
String mimetype = r.getHeader().getMimetype();
// Clean out ISO time string '-', 'T', ':', and 'Z' characters.
String t = r.getHeader().getDate().replaceAll("[-T:Z]", "");
long time = ArchiveUtils.getSecondsSinceEpoch(t).getTime();
writer.write(r.getHeader().getUrl(), mimetype, ip, time,
(int)(length - offset), r);
}
} finally {
if (reader != null) {
reader.close();