* OR null if the url is invalid
*/
public static URLFPV2 getURLFPV2FromCanonicalURL(String canonicalURL) {
// create a url fp record
URLFPV2 urlFP = new URLFPV2();
urlFP.setUrlHash(URLFingerprint.generate64BitURLFPrint(canonicalURL));
String hostName = fastGetHostFromURL(canonicalURL);
String rootDomainName = null;
if (hostName != null)
rootDomainName = URLUtils.extractRootDomainName(hostName);
if (hostName != null && rootDomainName != null) {
// ok we want to strip the leading www. if necessary
if (hostName.startsWith("www.")) {
// ok now. one nasty hack ... :-(
// if root name does not equal full host name ...
if (!rootDomainName.equals(hostName)) {
// strip the www. prefix
hostName = hostName.substring(4);
}
}
urlFP.setDomainHash(FPGenerator.std64.fp(hostName));
urlFP.setRootDomainHash(FPGenerator.std64.fp(rootDomainName));
return urlFP;
}
return null;
}