if (disposition.equals("SUCCESS"))
isSuccessful = true;
}
// Output a basic page count
output.collect(new Text("Pages Requested\tTotal"), new LongWritable(1));
output.collect(new Text("Pages Requested\t"+disposition), new LongWritable(1));
// Output the HTTP result
String httpResult = "[missing]";
if (jsonObj.has("http_result"))
httpResult = jsonObj.get("http_result").getAsString().trim().toUpperCase();
output.collect(new Text("HTTP Code\t"+httpResult+" ("+disposition+")"), new LongWritable(1));
// If the request was not successful, move to the next record
if (isSuccessful == false)
return;
// Gather the host name
try {
URI uri = new URI(url);
String host = uri.getHost();
if (host == null || host.equals(""))
throw new URISyntaxException(url, "Unable to gather host or no host found");
// Gather the domain object
InternetDomainName domainObj = InternetDomainName.from(host);
// Output the TLD
String publicSuffix = "[none]";
if (domainObj.hasPublicSuffix())
publicSuffix = domainObj.publicSuffix().name().trim().toLowerCase();
output.collect(new Text("TLD\t"+publicSuffix), new LongWritable(1));
// Output the private domain
// WARNING - This dramatically increases the size of the output.
String privateDomain = "[invalid]";
if (domainObj.topPrivateDomain() != null)
privateDomain = domainObj.topPrivateDomain().name().trim().toLowerCase();
//output.collect(new Text("Domain\t"+privateDomain), new LongWritable(1));
}
catch (URISyntaxException ex) {
output.collect(new Text("TLD\t[invalid URL]"), new LongWritable(1));
reporter.incrCounter(this._counterGroup, "Invalid URLs", 1);
}
// Output MIME Type
String mimeType = "[missing]";
if (jsonObj.has("mime_type"))
mimeType = jsonObj.get("mime_type").getAsString().trim().toLowerCase();
output.collect(new Text("Type\t"+mimeType), new LongWritable(1));
// Output Charset
String charset = "[missing]";
if (jsonObj.has("charset_detected"))
charset = jsonObj.get("charset_detected").getAsString().trim().toUpperCase();
output.collect(new Text("Charset\t"+charset), new LongWritable(1));
// Download Size
if (jsonObj.has("download_size") == true)
output.collect(new Text("Content Size\t"), new LongWritable(jsonObj.get("download_size").getAsInt()));
}
catch (IOException ex) {
throw ex;
}
catch (Exception ex) {