this.run = false;
return;
}
}
}
final URIMetadataRow entry = eiter.next();
if (entry == null) {
if (Log.isFine("URLDBCLEANER")) Log.logFine("URLDBCLEANER", "entry == null");
} else if (entry.hash() == null) {
if (Log.isFine("URLDBCLEANER")) Log.logFine("URLDBCLEANER", ++this.blacklistedUrls + " blacklisted (" + ((double) this.blacklistedUrls / this.totalSearchedUrls) * 100 + "%): " + "hash == null");
} else {
final URIMetadataRow.Components metadata = entry.metadata();
this.totalSearchedUrls++;
if (metadata == null) {
if (Log.isFine("URLDBCLEANER")) Log.logFine("URLDBCLEANER", "corrupted entry for hash = " + ASCII.String(entry.hash()));
remove(entry.hash());
continue;
}
if (metadata.url() == null) {
if (Log.isFine("URLDBCLEANER")) Log.logFine("URLDBCLEANER", ++this.blacklistedUrls + " blacklisted (" + ((double) this.blacklistedUrls / this.totalSearchedUrls) * 100 + "%): " + ASCII.String(entry.hash()) + "URL == null");
remove(entry.hash());
continue;
}
if (this.blacklist.isListed(Blacklist.BLACKLIST_CRAWLER, metadata.url()) ||
this.blacklist.isListed(Blacklist.BLACKLIST_DHT, metadata.url()) ||
(this.crawlStacker.urlInAcceptedDomain(metadata.url()) != null)) {
this.lastBlacklistedUrl = metadata.url().toNormalform(true, true);
this.lastBlacklistedHash = ASCII.String(entry.hash());
if (Log.isFine("URLDBCLEANER")) Log.logFine("URLDBCLEANER", ++this.blacklistedUrls + " blacklisted (" + ((double) this.blacklistedUrls / this.totalSearchedUrls) * 100 + "%): " + ASCII.String(entry.hash()) + " " + metadata.url().toNormalform(false, true));
remove(entry.hash());
if (this.blacklistedUrls % 100 == 0) {
Log.logInfo("URLDBCLEANER", "Deleted " + this.blacklistedUrls + " URLs until now. Last deleted URL-Hash: " + this.lastBlacklistedUrl);
}
}
this.lastUrl = metadata.url().toNormalform(true, true);
this.lastHash = ASCII.String(entry.hash());
}
}
} catch (final RuntimeException e) {
if (e.getMessage() != null && e.getMessage().indexOf("not found in LURL",0) != -1) {
Log.logWarning("URLDBCLEANER", "urlHash not found in LURL", e);