throw new RuntimeException(e);
}
}
protected void fetch(CrawlURI curi, String whoisServer, String whoisQuery) {
WhoisClient client = new WhoisClient();
Recorder recorder = curi.getRecorder();
try {
client.setConnectTimeout(getSoTimeoutMs());
client.setDefaultTimeout(getSoTimeoutMs());
if (curi.getUURI().getPort() > 0) {
client.connect(whoisServer, curi.getUURI().getPort());
} else {
client.connect(whoisServer);
}
client.setSoTimeout(getSoTimeoutMs()); // must be after connect()
curi.getData().put(CoreAttributeConstants.A_WHOIS_SERVER_IP,
client.getRemoteAddress().getHostAddress());
recorder.inputWrap(client.getInputStream(whoisQuery));
// look for info about whois server in the response
// XXX run regex on the whole thing, rather than line by line?
BufferedReader reader = new BufferedReader(new InputStreamReader(recorder.getRecordedInput(), "ASCII"));
for (String line = reader.readLine(); line != null; line = reader.readLine()) {
Matcher matcher = TextUtils.getMatcher(WHOIS_SERVER_REGEX, line);
if (matcher.find()) {
// gets rid of "domain " for whois.verisign-grs.com queries
String key = whoisQuery.replaceFirst("(\\S+\\s+)+", "").toLowerCase();
referralServers.put(key, matcher.group(1).toLowerCase());
if (logger.isLoggable(Level.FINE)) {
logger.fine("added referral server " + matcher.group(1) + " to server list for " + key);
}
}
}
curi.setContentType("text/plain");
curi.setFetchStatus(S_WHOIS_SUCCESS);
} catch (IOException e) {
if (logger.isLoggable(Level.FINE)) {
logger.fine("failed to connect to whois server for uri " + curi + ": " + e);
}
curi.getNonFatalFailures().add(e);
curi.setFetchStatus(S_CONNECT_FAILED);
} finally {
recorder.close();
curi.setContentSize(recorder.getRecordedInput().getSize());
logger.fine(curi + ": " + recorder.getRecordedInput().getSize() + " bytes read");
if (client != null && client.isConnected()) try {
client.disconnect();
} catch (IOException e) {
logger.fine("problem closing connection to whois server for uri " + curi + ": " + e);
}
urlProgress.put(curi.toString(), UrlStatus.DONE.ordinal());