Package org.archive.url

Examples of org.archive.url.UsableURI


 
  private static String getKey(String url, boolean prefix)
  throws URIException {

    String key = ArchiveUtils.addImpliedHttpIfNecessary(url);
    UsableURI uuri = UsableURIFactory.getInstance(key);
    key = uuri.getScheme() + "://" + uuri.getAuthority() +
      uuri.getEscapedPathQuery();

    key = SURT.fromURI(key);
   
    int hashPos = key.indexOf('#');
    if(hashPos != -1) {
View Full Code Here


    // hope that it's a server relative request, with a valid referrer:
    String referer = httpRequest.getHeader("Referer");
    if (referer == null) return null;
   
    LOGGER.fine("referer:" + referer);
    UsableURI uri = UsableURIFactory.getInstance(referer);
    String path = uri.getPath();

    String remainder = path.substring(1);
    int thirdSlash = remainder.indexOf('/');
    LOGGER.fine("referer:(" + referer + ") remain(" + remainder +
      ") 3rd(" + thirdSlash + ")");
    if (thirdSlash == -1) return null;

    String datespec = remainder.substring(0, thirdSlash);
    String url = remainder.substring(thirdSlash + 1);
    url = UrlOperations.fixupScheme(url);
    url = ArchiveUtils.addImpliedHttpIfNecessary(url);
    String thisPath = httpRequest.getRequestURI();
    String queryString = httpRequest.getQueryString();
    if (queryString != null) {
      thisPath += "?" + queryString;
    }

    String resolved = UrlOperations.resolveUrl(url, thisPath);
    String contextPath = httpRequest.getContextPath();
    String finalUrl = uri.getScheme() + "://" + uri.getAuthority() +
        contextPath + "/" + datespec + "/" + resolved;
    // cross your fingers!!!
    LOGGER.info("Server-Relative-Redirect:\t" + referer + "\t" +
        thisPath + "\t" + finalUrl);
    return finalUrl;
View Full Code Here

    // hope that it's a server relative request, with a valid referrer:
    String referer = httpRequest.getHeader("Referer");
    if (referer == null) return null;
   
    final UsableURI refuri = UsableURIFactory.getInstance(referer);

    // Check that the Referer is our current wayback path
    // before attempting to use referer as base archival url

    if ((matchHost != null && !matchHost.equals(refuri.getHost())) ||
        (matchPort != -1 && refuri.getPort() != -1 && matchPort != refuri
        .getPort())) {
      LOGGER.info("Server-Relative-Redirect: Skipping, Referer " +
          refuri.getHost() + ":" + refuri.getPort() +
          " not from matching wayback host:port\t");
      return null;
    }

    String path = refuri.getPath();
    int secondSlash = path.indexOf('/', 1);
    if (secondSlash == -1) return null;
   
    String collection = path.substring(0, secondSlash);
    collection = modifyCollection(collection);

    String remainder = path.substring(secondSlash + 1);
    int thirdSlash = remainder.indexOf('/');
    if (thirdSlash == -1) return null;

    String datespec = remainder.substring(0, thirdSlash);
    if (!datespec.isEmpty() &&
        !Character.isDigit(datespec.charAt(0))) {
      datespec = null;
    }

    String url = remainder.substring(thirdSlash + 1);
    url = UrlOperations.fixupScheme(url);
    url = ArchiveUtils.addImpliedHttpIfNecessary(url);

    String thisPath = httpRequest.getRequestURI();
    String queryString = httpRequest.getQueryString();
    if (queryString != null) {
      thisPath += "?" + queryString;
    }
    String resolved = UrlOperations.resolveUrl(url, thisPath);

    String contextPath = httpRequest.getContextPath();
    StringBuilder sb = new StringBuilder(refuri.getScheme());
    sb.append("://");
    sb.append(refuri.getAuthority());
    sb.append(contextPath);
    sb.append(collection);
    sb.append("/");
    if (datespec != null) {
      sb.append(datespec);
View Full Code Here

          // in...
          return defaultValue;
        }
      }
    }
    UsableURI absBaseURI;
    UsableURI resolvedURI = null;
    try {
      absBaseURI = UsableURIFactory.getInstance(baseUrl);
      resolvedURI = UsableURIFactory.getInstance(absBaseURI, url);
    } catch (URIException e) {
      LOGGER.warning(e.getLocalizedMessage() + ": " + url);
      return defaultValue;
    }
   
    return resolvedURI.getEscapedURI();
  }
View Full Code Here

   * invalid, or if the url is the root of the authority.
   */
  public static String getUrlParentDir(String url) {
   
    try {
      UsableURI uri = UsableURIFactory.getInstance(url);
      String path = uri.getPath();
      if(path.length() > 1) {
        int startIdx = path.length()-1;
        if(path.charAt(path.length()-1) == '/') {
          startIdx--;
        }
        int idx = path.lastIndexOf('/',startIdx);
        if(idx >= 0) {
          uri.setPath(path.substring(0,idx+1));
          uri.setQuery(null);
          return uri.toString();
        }
      }
    } catch (URIException e) {
      LOGGER.warning(e.getLocalizedMessage() + ": " + url);
    }
View Full Code Here

    // was the only easy way I could find to get the correct unescaping
    // out of UsableURIs, possible a bug. Definitely needs some TLC in any case,
    // as building UsableURIs is *not* a cheap operation.
   
    // unescape anything that can be:
    UsableURI tmpURI = null;
    try {
      tmpURI = UsableURIFactory.getInstance(searchUrl);
    } catch (StringIndexOutOfBoundsException e) {
      LOGGER.warning(e.getMessage() + ": " + searchUrl);
      return searchUrl;
//    } catch(URIException e) {
//      LOGGER.warning(e.getMessage() + ": " + searchUrl);
//      return searchUrl;     
    }
    tmpURI.setPath(tmpURI.getPath());
   
    // convert to UsableURI to perform required URI fixup:
    UsableURI searchURI = UsableURIFactory.getInstance(tmpURI.getURI());
   
    // replace ' ' with '+' (this is only to match Alexa's canonicalization)
    String newPath = searchURI.getEscapedPath().replace("%20","+");
   
    // replace multiple consecutive '/'s in the path.
    while(newPath.contains("//")) {
      newPath = newPath.replace("//","/");
    }
   
    // this would remove trailing a '/' character, unless the path is empty
    // but we're not going to do this just yet..
//    if((newPath.length() > 1) && newPath.endsWith("/")) {
//      newPath = newPath.substring(0,newPath.length()-1);
//    }
   
    StringBuilder sb = new StringBuilder(searchUrl.length());
    sb.append(searchURI.getHostBasename());

    // omit port if scheme default:
    int defaultSchemePort = UrlOperations.schemeToDefaultPort(scheme);
    if(searchURI.getPort() != defaultSchemePort
        && searchURI.getPort() != -1) {

      sb.append(":").append(searchURI.getPort());
    }

    sb.append(newPath);
    if(searchURI.getEscapedQuery() != null) {
      sb.append("?").append(searchURI.getEscapedQuery());
    }

    return sb.toString();
  }
View Full Code Here

TOP

Related Classes of org.archive.url.UsableURI

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.