this.run = false;
return;
}
}
}
final URIMetadataRow entry = eiter.next();
if (entry == null) {
if (Log.isFine("URLDBCLEANER")) Log.logFine("URLDBCLEANER", "entry == null");
} else if (entry.hash() == null) {
if (Log.isFine("URLDBCLEANER")) Log.logFine("URLDBCLEANER", ++blacklistedUrls + " blacklisted (" + ((double) blacklistedUrls / totalSearchedUrls) * 100 + "%): " + "hash == null");
} else {
final URIMetadataRow.Components metadata = entry.metadata();
totalSearchedUrls++;
if (metadata == null) {
if (Log.isFine("URLDBCLEANER")) Log.logFine("URLDBCLEANER", "corrupted entry for hash = " + ASCII.String(entry.hash()));
remove(entry.hash());
continue;
}
if (metadata.url() == null) {
if (Log.isFine("URLDBCLEANER")) Log.logFine("URLDBCLEANER", ++blacklistedUrls + " blacklisted (" + ((double) blacklistedUrls / totalSearchedUrls) * 100 + "%): " + ASCII.String(entry.hash()) + "URL == null");
remove(entry.hash());
continue;
}
if (blacklist.isListed(Blacklist.BLACKLIST_CRAWLER, metadata.url()) ||
blacklist.isListed(Blacklist.BLACKLIST_DHT, metadata.url()) ||
(crawlStacker.urlInAcceptedDomain(metadata.url()) != null)) {
lastBlacklistedUrl = metadata.url().toNormalform(true, true);
lastBlacklistedHash = ASCII.String(entry.hash());
if (Log.isFine("URLDBCLEANER")) Log.logFine("URLDBCLEANER", ++blacklistedUrls + " blacklisted (" + ((double) blacklistedUrls / totalSearchedUrls) * 100 + "%): " + ASCII.String(entry.hash()) + " " + metadata.url().toNormalform(false, true));
remove(entry.hash());
if (blacklistedUrls % 100 == 0) {
Log.logInfo("URLDBCLEANER", "Deleted " + blacklistedUrls + " URLs until now. Last deleted URL-Hash: " + lastBlacklistedUrl);
}
}
lastUrl = metadata.url().toNormalform(true, true);
lastHash = ASCII.String(entry.hash());
}
}
} catch (final RuntimeException e) {
if (e.getMessage() != null && e.getMessage().indexOf("not found in LURL") != -1) {
Log.logWarning("URLDBCLEANER", "urlHash not found in LURL", e);