GZWarcRecord record = new GZWarcRecord();
Filter<WarcRecord> filter = Filters.adaptFilterBURL2WarcRecord (new TrueFilter());
WarcFilteredIterator it = new WarcFilteredIterator(in, record, filter);
WarcHttpResponse response = new WarcHttpResponse();
Graph mdGraph = new org.openrdf.model.impl.GraphImpl();
String mdGraphURI = "http://challenge.semanticweb.org/2008/metadata";
ValueFactory vf = mdGraph.getValueFactory();
String dcNS = "http://purl.org/dc/elements/1.1/";
DatatypeFactory dtf = null;
try {
dtf = DatatypeFactory.newInstance();
} catch (DatatypeConfigurationException e1) {
// TODO Auto-generated catch block
e1.printStackTrace();
}
GregorianCalendar c = new GregorianCalendar ();
try {
int cnt = 0;
// while (cnt < 10 && it.hasNext()) {
while (it.hasNext()) {
WarcRecord nextRecord = it.next();
//Get the HttpResponse
try {
response.fromWarcRecord (nextRecord);
if (debugMode) {
System.out.println("RECORD : " + String.format("%05d", cnt));
System.out.println(" subjectUri: " + nextRecord.header.subjectUri);
System.out.println("contentType: " + nextRecord.header.contentType);
System.out.println(" dataLength: " + nextRecord.header.dataLength);
System.out.println("actual data: " + nextRecord.block.length());
System.out.println(" missing: " +
(nextRecord.header.dataLength - nextRecord.block.length()) + "b");
}
l.output(nextRecord.header.subjectUri.toString());
URI s, p, o;
Literal lit;
if (cnt == max ){ return ; }
if (cnt >= min && cnt < max) {
s = vf.createURI(nextRecord.header.subjectUri.toString());
p = vf.createURI(dcNS, "source");
lit = vf.createLiteral(inFile);
mdGraph.add(s,p,lit);
c.setTime(nextRecord.header.creationDate);
XMLGregorianCalendar xc = dtf.newXMLGregorianCalendar(c);
p = vf.createURI(dcNS, "date");
lit = vf.createLiteral(xc);
mdGraph.add(s,p,lit);
curFile = w.write(response.contentAsStream(), cnt, start);
try {
ldAddStmt.setString(1, curFile);
ldAddStmt.setString(2, nextRecord.header.subjectUri.toString());
ResultSet res = ldAddStmt.executeQuery();