Package org.archive.wayback.archivalurl

Source Code of org.archive.wayback.archivalurl.ArchivalUrlReplayRenderer

/* ArchivalUrlReplayRenderer
*
* $Id$
*
* Created on 6:11:00 PM Aug 8, 2007.
*
* Copyright (C) 2007 Internet Archive.
*
* This file is part of wayback-core.
*
* wayback-core is free software; you can redistribute it and/or modify
* it under the terms of the GNU Lesser Public License as published by
* the Free Software Foundation; either version 2.1 of the License, or
* any later version.
*
* wayback-core is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
* GNU Lesser Public License for more details.
*
* You should have received a copy of the GNU Lesser Public License
* along with wayback-core; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
*/
package org.archive.wayback.archivalurl;

import java.io.IOException;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.Iterator;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.logging.Logger;

import javax.servlet.ServletException;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;

import org.archive.wayback.ReplayRenderer;
import org.archive.wayback.ResultURIConverter;
import org.archive.wayback.core.Resource;
import org.archive.wayback.core.SearchResult;
import org.archive.wayback.core.SearchResults;
import org.archive.wayback.core.WaybackRequest;
import org.archive.wayback.exception.BadContentException;
import org.archive.wayback.replay.HTMLPage;
import org.archive.wayback.replay.HttpHeaderProcessor;
import org.archive.wayback.replay.HttpHeaderOperation;
import org.archive.wayback.util.url.UrlOperations;

/**
* ReplayRenderer responsible for marking up HTML pages so they replay in
* ArchivalUrl context:
*   resolve in page URLs
*   add HTML comment and javascript to modify URLs client-side to point back
*       to this context
*
* @author brad
* @version $Date$, $Revision$
*/
public class ArchivalUrlReplayRenderer implements ReplayRenderer, HttpHeaderProcessor {

  private List<String> jsInserts = null;
  private List<String> jspInserts = null;
  private boolean serverSideRendering = false;

  private static final Logger LOGGER = Logger.getLogger( ArchivalUrlReplayRenderer.class.getName() );
  private final SimpleDateFormat logDateFormatter = new SimpleDateFormat("dd/MMM/yyyy:hh:mm:ss Z", Locale.ENGLISH);

  /* (non-Javadoc)
   * @see org.archive.wayback.ReplayRenderer#renderResource(javax.servlet.http.HttpServletRequest, javax.servlet.http.HttpServletResponse, org.archive.wayback.core.WaybackRequest, org.archive.wayback.core.SearchResult, org.archive.wayback.core.Resource, org.archive.wayback.ResultURIConverter, org.archive.wayback.core.SearchResults)
   */
  public void renderResource(HttpServletRequest httpRequest,
      HttpServletResponse httpResponse, WaybackRequest wbRequest,
      SearchResult result, Resource resource,
      ResultURIConverter uriConverter, SearchResults results)
      throws ServletException, IOException, BadContentException {

    StringBuilder toInsert = new StringBuilder(300);

    HttpHeaderOperation.copyHTTPMessageHeader(resource, httpResponse);

    Map<String,String> headers = HttpHeaderOperation.processHeaders(resource, result, uriConverter, this);

    // Load content into an HTML page, and resolve load-time URLs:
    HTMLPage page = new HTMLPage(resource,result,uriConverter);
    page.readFully();
   
    if(serverSideRendering) {
      page.resolveAllPageUrls();
    } else {
      page.resolvePageUrls();
    }
    if(jsInserts != null) {
      Iterator<String> itr = jsInserts.iterator();
      while(itr.hasNext()) {
        toInsert.append(page.getJSIncludeString(itr.next()));
      }
    }
    if(jspInserts != null) {
      Iterator<String> itr = jspInserts.iterator();
      while(itr.hasNext()) {
        toInsert.append(page.includeJspString(itr.next(), httpRequest,
            httpResponse, wbRequest, results, result));
      }
    }

    // insert the new content:
    if(serverSideRendering) {
      page.insertAtStartOfBody(toInsert.toString());
    } else {
      page.insertAtEndOfBody(toInsert.toString());
    }
   
    // set the corrected length:
    int bytes = page.getBytes().length;
    headers.put(HTTP_LENGTH_HEADER, String.valueOf(bytes));
    // Tomcat will always send a charset... It's trying to be smarter than
    // we are. If the original page didn't include a "charset" as part of
    // the "Content-Type" HTTP header, then Tomcat will use the default..
    // who knows what that is, or what that will do to the page..
    // let's try explicitly setting it to what we used:
    httpResponse.setCharacterEncoding(page.getCharSet());

    // send back the headers:
    HttpHeaderOperation.sendHeaders(headers, httpResponse);

    page.writeToOutputStream(httpResponse.getOutputStream());
   
    // log this replay request
    logReplayRequest(httpRequest, httpRequest.getParameter("sid"));
  }

  /* (non-Javadoc)
   * @see org.archive.wayback.replay.HeaderFilter#filter(java.util.Map, java.lang.String, java.lang.String, org.archive.wayback.ResultURIConverter, org.archive.wayback.core.SearchResult)
   */
  public void filter(Map<String, String> output, String key, String value,
      ResultURIConverter uriConverter, SearchResult result) {

    String keyUp = key.toUpperCase();

    // omit Content-Length header
    if (keyUp.equals(HTTP_LENGTH_HEADER_UP)) {
      return;
    }

    // omit Transfer-Encoding header - /* BUG wayback 735 */   
    if (keyUp.equals(HTTP_TRANSFER_ENC_HEADER_UP)) {
      return;
    }

    // rewrite Location header URLs
    if (keyUp.startsWith(HTTP_LOCATION_HEADER_UP) ||
        keyUp.startsWith(HTTP_CONTENT_BASE_HEADER_UP)) {

      String baseUrl = result.getAbsoluteUrl();
      String cd = result.getCaptureDate();
      // by the spec, these should be absolute already, but just in case:
      String u = UrlOperations.resolveUrl(baseUrl, value);

      output.put(key, uriConverter.makeReplayURI(cd,u));
    } else if(keyUp.startsWith(HTTP_CONTENT_TYPE_HEADER_UP)) {
      output.put("X-Wayback-Orig-" + key,value);
      output.put(key,value);
    } else {
      // others go out as-is:

      output.put(key, value);
    }
  }

  /**
   * @return the jsInserts
   */
  public List<String> getJsInserts() {
    return jsInserts;
  }

  /**
   * @param jsInserts the jsInserts to set
   */
  public void setJsInserts(List<String> jsInserts) {
    this.jsInserts = jsInserts;
  }

  /**
   * @return the jspInserts
   */
  public List<String> getJspInserts() {
    return jspInserts;
  }

  /**
   * @param jspInserts the jspInserts to set
   */
  public void setJspInserts(List<String> jspInserts) {
    this.jspInserts = jspInserts;
  }

  /**
   * @return the isServerSideRendering
   */
  public boolean isServerSideRendering() {
    return serverSideRendering;
  }

  /**
   * @param isServerSideRendering the isServerSideRendering to set
   */
  public void setServerSideRendering(boolean serverSideRendering) {
    this.serverSideRendering = serverSideRendering;
  }

  /**
   * @param request the Request of the page to replay
   * @param sid session id from nutchwax
   */
  private void logReplayRequest(HttpServletRequest request, String sid) {
                String referer = request.getHeader("referer") != null ? request.getHeader("referer"):"-";
    String sessionId = sid != null ? sid : "-";

                StringBuilder logEntry = new StringBuilder();
                logEntry.append( "#session#" );
                logEntry.append( " " );
                logEntry.append( request.getRemoteHost() );
                logEntry.append( " - " );
    logEntry.append( sessionId );
                logEntry.append( " [" );
                logEntry.append( logDateFormatter.format(new Date(request.getSession().getLastAccessedTime())) );
                logEntry.append( "] \"");
                logEntry.append( request.getMethod() );
                logEntry.append( " " );
                logEntry.append( request.getServletPath() );
    if ( request.getQueryString() != null) {
      logEntry.append( "?" );
      logEntry.append( request.getQueryString() );
    }
                logEntry.append( " " );
                logEntry.append( request.getProtocol() );
                logEntry.append( "\" 200 -1 \"" );
                logEntry.append( referer );
                logEntry.append( "\" \"" );
                logEntry.append( request.getHeader("User-Agent") );

                LOGGER.info( logEntry.toString() );
        }

}
TOP

Related Classes of org.archive.wayback.archivalurl.ArchivalUrlReplayRenderer

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.