transform(reader, warc);
}
protected void transform(final ARCReader reader, final File warc)
throws IOException {
WARCWriter writer = null;
// No point digesting. Digest is available after reading of ARC which
// is too late for inclusion in WARC.
reader.setDigest(false);
try {
BufferedOutputStream bos =
new BufferedOutputStream(new FileOutputStream(warc));
// Get the body of the first ARC record as a String so can dump it
// into first record of WARC.
final Iterator<ArchiveRecord> i = reader.iterator();
ARCRecord firstRecord = (ARCRecord)i.next();
ByteArrayOutputStream baos =
new ByteArrayOutputStream((int)firstRecord.getHeader().
getLength());
firstRecord.dump(baos);
// Add ARC first record content as an ANVLRecord.
ANVLRecord ar = new ANVLRecord();
ar.addLabelValue("Filedesc", baos.toString());
List<String> metadata = new ArrayList<String>(1);
metadata.add(ar.toString());
// Now create the writer. If reader was compressed, lets write
// a compressed WARC.
writer = new WARCWriter(
new AtomicInteger(),
bos,
warc,
new WARCWriterPoolSettingsData(
"", "", -1, reader.isCompressed(), null, metadata, generator));
// Write a warcinfo record with description about how this WARC
// was made.
writer.writeWarcinfoRecord(warc.getName(),
"Made from " + reader.getReaderIdentifier() + " by " +
this.getClass().getName() + "/" + getRevision());
for (; i.hasNext();) {
write(writer, (ARCRecord)i.next());
}
} finally {
if (reader != null) {
reader.close();
}
if (writer != null) {
// I don't want the close being logged -- least, not w/o log of
// an opening (and that'd be a little silly for simple script
// like this). Currently, it logs at level INFO so that close
// of files gets written to log files. Up the log level just
// for the close.
Logger l = Logger.getLogger(writer.getClass().getName());
Level oldLevel = l.getLevel();
l.setLevel(Level.WARNING);
try {
writer.close();
} finally {
l.setLevel(oldLevel);
}
}
}