log.logSevere("RuntimeException:", e);
}
}
log.logInfo("URLs vorher: " + urlIndexFile.size() + " Entries loaded during Iteratorloop: " + iteratorCount + " kaputte URLs: " + damagedURLS.size());
final HTTPClient client = new HTTPClient();
final Iterator<String> eiter2 = damagedURLS.iterator();
byte[] urlHashBytes;
while (eiter2.hasNext()) {
urlHashBytes = ASCII.getBytes(eiter2.next());
// trying to fix the invalid URL
String oldUrlStr = null;
try {
// getting the url data as byte array
final Row.Entry entry = urlIndexFile.get(urlHashBytes);
// getting the wrong url string
oldUrlStr = entry.getColString(1).trim();
int pos = -1;
if ((pos = oldUrlStr.indexOf("://")) != -1) {
// trying to correct the url
final String newUrlStr = "http://" + oldUrlStr.substring(pos + 3);
final DigestURI newUrl = new DigestURI(newUrlStr);
if (client.HEADResponse(newUrl.toString()) != null
&& client.getHttpResponse().getStatusLine().getStatusCode() == 200) {
entry.setCol(1, UTF8.getBytes(newUrl.toString()));
urlIndexFile.put(entry);
if (log.isInfo()) log.logInfo("UrlDB-Entry with urlHash '" + ASCII.String(urlHashBytes) + "' corrected\n\tURL: " + oldUrlStr + " -> " + newUrlStr);
} else {
remove(urlHashBytes);
if (log.isInfo()) log.logInfo("UrlDB-Entry with urlHash '" + ASCII.String(urlHashBytes) + "' removed\n\tURL: " + oldUrlStr + "\n\tConnection Status: " + (client.getHttpResponse() == null ? "null" : client.getHttpResponse().getStatusLine()));
}
}
} catch (final Exception e) {
remove(urlHashBytes);
if (log.isInfo()) log.logInfo("UrlDB-Entry with urlHash '" + ASCII.String(urlHashBytes) + "' removed\n\tURL: " + oldUrlStr + "\n\tExecption: " + e.getMessage());