return null;
}
public boolean executeResolve(final int thread) {
JTweet tweet = null;
try {
tweet = resolverQueue.take();
} catch (Exception ex) {
if (thread == 0)
logger.warn("url resolver " + thread + " died " + ex.getMessage());
return false;
}
String origUrl = tweet.getUrl();
String url = origUrl;
try {
boolean doFetch = true;
String resUrl = fetcher.getResolvedUrl(url, resolveTimeout);
if (!Helper.isEmpty(resUrl) && resUrl.length() > url.length()) {
url = resUrl;
// check if resolved url already exists
if (exists(resUrl)) {
unresolvedCache.remove(resUrl);
doFetch = false;
}
}
if (doFetch) {
JResult res = fetcher.fetchAndExtract(url, resolveTimeout, false);
// set resolved url
if (tweet.getUrlEntries().size() > 0) {
UrlEntry ue = tweet.getUrlEntries().iterator().next();
ue.setResolvedUrl(res.getUrl());
ue.setResolvedTitle(res.getTitle());
ue.setResolvedSnippet(res.getText());
ue.setResolvedDomain(Helper.extractDomain(url));
}
if (urlTitleCleaner.contains(res.getTitle()))
tweet.setQuality(20);
if (res.getTitle().isEmpty())
emptyTitleCounter.addAndGet(1);
counter.addAndGet(1);
if (thread < 3) {
float secs = (System.nanoTime() - start.get()) / 1e+9f;
logger.info(thread + "| " + counter.get() / secs + " entries/sec"//, secs:" + secs
+ ", feeded:" + counter
+ ", resolverQueue.size:" + resolverQueue.size()
+ ", unresolved.size:" + unresolvedCache.size()
+ ", tooOld.size:" + tooOldMap.size()
+ ", empty titles:" + emptyTitleCounter);
}
}
} catch (Exception ex) {
//logger.info("Error while resolveAndFetch url:" + art.getUrl() + " Error:" + Helper.getMsg(ex));
tweet.setQuality(Math.round(tweet.getQuality() * 0.8f));
} finally {
// always feed the article even if there was an error
tweetSearch.queueObject(tweet);
// real time get ensures that we have at least the url in aindex (not so for origURL!)
unresolvedCache.remove(tweet.getUrl());
// DISABLED for now as
// if (!checkAgainQueue.offer(art))
// logger.error("checkAgainQueue full. Skipped:" + art.getUrl());
}