Package org.archive.wayback.webapp

Source Code of org.archive.wayback.webapp.ServerRelativeArchivalRedirect

/*
*  This file is part of the Wayback archival access software
*   (http://archive-access.sourceforge.net/projects/wayback/).
*
*  Licensed to the Internet Archive (IA) by one or more individual
*  contributors.
*
*  The IA licenses this file to You under the Apache License, Version 2.0
*  (the "License"); you may not use this file except in compliance with
*  the License.  You may obtain a copy of the License at
*
*      http://www.apache.org/licenses/LICENSE-2.0
*
*  Unless required by applicable law or agreed to in writing, software
*  distributed under the License is distributed on an "AS IS" BASIS,
*  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
*  See the License for the specific language governing permissions and
*  limitations under the License.
*/
package org.archive.wayback.webapp;

import java.io.IOException;
import java.util.logging.Logger;

import javax.servlet.ServletException;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;

import org.archive.url.UsableURI;
import org.archive.url.UsableURIFactory;
import org.archive.util.ArchiveUtils;
import org.archive.wayback.util.Timestamp;
import org.archive.wayback.util.url.UrlOperations;
import org.archive.wayback.util.webapp.AbstractRequestHandler;
import org.archive.wayback.util.webapp.RequestHandler;

/**
* {@code ServerRelativeArchivalRedirect} is a {@link RequestHandler}
* that redirects <i>leaked</i> server-relative URL back to replay request
* URL.
* <p>For example, assuming {@code Referer} is
* {@code http://web.archive.org/web/20010203040506/http://example.com/index.html},
* it redirects request {@code http://web.archive.org/js/foo.js}
* to {@code http://web.archive.org/web/20010203040506/http://example.com/js/foo.js}.</p>
* <p>It is typically set up as catch-all {@code RequestHandler}</p>
* @author brad
*/
public class ServerRelativeArchivalRedirect extends AbstractRequestHandler {
  private static final Logger LOGGER = Logger
    .getLogger(ServerRelativeArchivalRedirect.class.getName());

  boolean useCollection = false;
  private String matchHost = null;
  private int matchPort = -1;
  private String replayPrefix;

  private String handleRequestWithCollection(HttpServletRequest httpRequest,
      HttpServletResponse httpResponse) throws ServletException,
      IOException {
    // TODO: check if this works with non-empty context path.
    // I believe requestURI starts with context path. So non-empty
    // context path will break collection and timestamp extraction
    // code below. It'd be desirable for this class to get a pattern
    // of replay URL from somewhere - replayPreifx?

    // hope that it's a server relative request, with a valid referrer:
    String referer = httpRequest.getHeader("Referer");
    if (referer == null) return null;
   
    final UsableURI refuri = UsableURIFactory.getInstance(referer);

    // Check that the Referer is our current wayback path
    // before attempting to use referer as base archival url

    if ((matchHost != null && !matchHost.equals(refuri.getHost())) ||
        (matchPort != -1 && refuri.getPort() != -1 && matchPort != refuri
        .getPort())) {
      LOGGER.info("Server-Relative-Redirect: Skipping, Referer " +
          refuri.getHost() + ":" + refuri.getPort() +
          " not from matching wayback host:port\t");
      return null;
    }

    String path = refuri.getPath();
    int secondSlash = path.indexOf('/', 1);
    if (secondSlash == -1) return null;
   
    String collection = path.substring(0, secondSlash);
    collection = modifyCollection(collection);

    String remainder = path.substring(secondSlash + 1);
    int thirdSlash = remainder.indexOf('/');
    if (thirdSlash == -1) return null;

    String datespec = remainder.substring(0, thirdSlash);
    if (!datespec.isEmpty() &&
        !Character.isDigit(datespec.charAt(0))) {
      datespec = null;
    }

    String url = remainder.substring(thirdSlash + 1);
    url = UrlOperations.fixupScheme(url);
    url = ArchiveUtils.addImpliedHttpIfNecessary(url);

    String thisPath = httpRequest.getRequestURI();
    String queryString = httpRequest.getQueryString();
    if (queryString != null) {
      thisPath += "?" + queryString;
    }
    String resolved = UrlOperations.resolveUrl(url, thisPath);

    String contextPath = httpRequest.getContextPath();
    StringBuilder sb = new StringBuilder(refuri.getScheme());
    sb.append("://");
    sb.append(refuri.getAuthority());
    sb.append(contextPath);
    sb.append(collection);
    sb.append("/");
    if (datespec != null) {
      sb.append(datespec);
      sb.append("/");
    }
    sb.append(resolved);
   
    return sb.toString();
  }

  /**
   * modify collection if necessary.
   * <p>default implementation simply return {@code collection}.</p>
   * @param collection (the first path component of Referer URL.)
   * note value has leading slash, which must be retained.
   * @return possibly modified collection.
   */
  protected String modifyCollection(String collection) {
    return collection;
  }

  private String handleRequestWithoutCollection(
      HttpServletRequest httpRequest, HttpServletResponse httpResponse)
      throws ServletException, IOException {

    // hope that it's a server relative request, with a valid referrer:
    String referer = httpRequest.getHeader("Referer");
    if (referer == null) return null;
   
    LOGGER.fine("referer:" + referer);
    UsableURI uri = UsableURIFactory.getInstance(referer);
    String path = uri.getPath();

    String remainder = path.substring(1);
    int thirdSlash = remainder.indexOf('/');
    LOGGER.fine("referer:(" + referer + ") remain(" + remainder +
      ") 3rd(" + thirdSlash + ")");
    if (thirdSlash == -1) return null;

    String datespec = remainder.substring(0, thirdSlash);
    String url = remainder.substring(thirdSlash + 1);
    url = UrlOperations.fixupScheme(url);
    url = ArchiveUtils.addImpliedHttpIfNecessary(url);
    String thisPath = httpRequest.getRequestURI();
    String queryString = httpRequest.getQueryString();
    if (queryString != null) {
      thisPath += "?" + queryString;
    }

    String resolved = UrlOperations.resolveUrl(url, thisPath);
    String contextPath = httpRequest.getContextPath();
    String finalUrl = uri.getScheme() + "://" + uri.getAuthority() +
        contextPath + "/" + datespec + "/" + resolved;
    // cross your fingers!!!
    LOGGER.info("Server-Relative-Redirect:\t" + referer + "\t" +
        thisPath + "\t" + finalUrl);
    return finalUrl;
  }

  @Override
  public boolean handleRequest(HttpServletRequest httpRequest,
      HttpServletResponse httpResponse) throws ServletException,
      IOException {
    if (matchHost != null) {
      if (!matchHost.equals(httpRequest.getServerName())) {
        LOGGER.fine("Wrong host for ServerRelativeRed(" +
            httpRequest.getServerName() + ")");
        return false;
      }
    }
    if (matchPort != -1) {
      if (matchPort != httpRequest.getLocalPort()) {
        LOGGER.fine("Wrong port for ServerRealtiveRed(" +
            httpRequest.getServerName() + ")(" +
            httpRequest.getLocalPort() + ") :" +
            httpRequest.getRequestURI());
        return false;
      }
    }
    String replayUrl = (useCollection ?
        handleRequestWithCollection(httpRequest, httpResponse) :
          handleRequestWithoutCollection(httpRequest, httpResponse));
    if (replayUrl == null && replayPrefix != null) {
      String thisPath = httpRequest.getRequestURI();
      String queryString = httpRequest.getQueryString();
      if (queryString != null) {
        thisPath += "?" + queryString;
      }
      // TODO: rethink this fallback, for now adding https support as
      // well
      if (thisPath.startsWith("/http://") ||
          thisPath.startsWith("/https://")) {
        // assume a replay request:
        StringBuilder sb = new StringBuilder(thisPath.length() +
          replayPrefix.length() + 16);
        sb.append(replayPrefix);
        sb.append(Timestamp.currentTimestamp().getDateStr());
        sb.append(thisPath);
        replayUrl = sb.toString();
      }
    }
    if (replayUrl != null) {
      // Gotta make sure this is properly cached, or
      // weird things happen:
      httpResponse.addHeader("Vary", "Referer");
      httpResponse.sendRedirect(replayUrl);
      return true;
    }
    return false;
  }

  /**
   * @return the useCollection
   */
  public boolean isUseCollection() {
    return useCollection;
  }

  /**
   * whether replay URL has <i>collection</i> part.
   * <p>set this to {@code true} if replay URL has <i>collection</i>
   * part, path component between context path and timestamp (although
   * it's called <i>collection</i> based on the common usage of this part,
   * there's no need to have particular semantics.)</p>
   * <p>collection part will be passed to {@link #modifyCollection(String)}
   * before constructing final replay URL to redirect to.<p>
   * @param useCollection the useCollection to set
   */
  public void setUseCollection(boolean useCollection) {
    this.useCollection = useCollection;
  }

  /**
   * @return the matchHost
   */
  public String getMatchHost() {
    return matchHost;
  }

  /**
   * optional host name {@code Referer} URL should match.
   * @param matchHost the matchHost to set
   */
  public void setMatchHost(String matchHost) {
    this.matchHost = matchHost;
  }

  /**
   * @return the matchPort
   */
  public int getMatchPort() {
    return matchPort;
  }

  /**
   * optional port number {@code Referer} URL should match.
   * @param matchPort the matchPort to set
   */
  public void setMatchPort(int matchPort) {
    this.matchPort = matchPort;
  }

  /**
   * @return the replayPrefix
   */
  public String getReplayPrefix() {
    return replayPrefix;
  }

  /**
   * optional replay URL prefix used by fallback method.
   * @param replayPrefix the replayPrefix to set
   */
  public void setReplayPrefix(String replayPrefix) {
    this.replayPrefix = replayPrefix;
  }
}
TOP

Related Classes of org.archive.wayback.webapp.ServerRelativeArchivalRedirect

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.