Package org.archive.wayback.liveweb

Source Code of org.archive.wayback.liveweb.RemoteLiveWebCache

/*
*  This file is part of the Wayback archival access software
*   (http://archive-access.sourceforge.net/projects/wayback/).
*
*  Licensed to the Internet Archive (IA) by one or more individual
*  contributors.
*
*  The IA licenses this file to You under the Apache License, Version 2.0
*  (the "License"); you may not use this file except in compliance with
*  the License.  You may obtain a copy of the License at
*
*      http://www.apache.org/licenses/LICENSE-2.0
*
*  Unless required by applicable law or agreed to in writing, software
*  distributed under the License is distributed on an "AS IS" BASIS,
*  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
*  See the License for the specific language governing permissions and
*  limitations under the License.
*/
package org.archive.wayback.liveweb;

import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.net.ConnectException;
import java.net.SocketException;
import java.net.SocketTimeoutException;
import java.net.URL;
import java.util.logging.Logger;
import java.util.zip.GZIPInputStream;

import org.apache.commons.httpclient.ConnectTimeoutException;
import org.apache.commons.httpclient.HostConfiguration;
import org.apache.commons.httpclient.HttpClient;
import org.apache.commons.httpclient.HttpMethod;
import org.apache.commons.httpclient.MultiThreadedHttpConnectionManager;
import org.apache.commons.httpclient.NoHttpResponseException;
import org.apache.commons.httpclient.methods.GetMethod;
import org.apache.commons.httpclient.params.HttpClientParams;
import org.archive.io.arc.ARCRecord;
import org.archive.wayback.core.Resource;
import org.archive.wayback.exception.LiveDocumentNotAvailableException;
import org.archive.wayback.exception.LiveWebCacheUnavailableException;
import org.archive.wayback.exception.LiveWebTimeoutException;
import org.archive.wayback.exception.ResourceNotAvailableException;
import org.archive.wayback.resourcestore.resourcefile.ArcResource;
import org.archive.wayback.resourcestore.resourcefile.ResourceFactory;

/**
* @author brad
*
*/
public class RemoteLiveWebCache implements LiveWebCache {
  private static final Logger LOGGER = Logger.getLogger(
      RemoteLiveWebCache.class.getName());

    protected MultiThreadedHttpConnectionManager connectionManager = null;
    protected HostConfiguration hostConfiguration = null;
    protected HttpClient http = null;
    protected String requestPrefix = null;

    /**
     *
     */
    public RemoteLiveWebCache() {
      connectionManager = new MultiThreadedHttpConnectionManager();
      hostConfiguration = new HostConfiguration();
    HttpClientParams params = new HttpClientParams();
        params.setParameter(HttpClientParams.RETRY_HANDLER, new NoRetryHandler());
      http = new HttpClient(params,connectionManager);
      http.setHostConfiguration(hostConfiguration);
    }

    /* (non-Javadoc)
   * @see org.archive.wayback.liveweb.LiveWebCache#getCachedResource(java.net.URL, long, boolean)
   */
  public Resource getCachedResource(URL url, long maxCacheMS,
      boolean bUseOlder) throws LiveDocumentNotAvailableException,
      LiveWebCacheUnavailableException, LiveWebTimeoutException, IOException {
    String urlString = url.toExternalForm();
   
    if (requestPrefix != null) {
      urlString = requestPrefix + urlString;
    }
   
    HttpMethod method = null;
    try {
      method = new GetMethod(urlString);
    } catch(IllegalArgumentException e) {
      LOGGER.warning("Bad URL for live web fetch:" + urlString);
      throw new LiveDocumentNotAvailableException("Url:" + urlString +
          "does not look like an URL?");
    }
    boolean success = false;
      try {
        int status = http.executeMethod(method);
        if(status == 200) {

          ByteArrayInputStream bais = new ByteArrayInputStream(method.getResponseBody());
          ARCRecord r = new ARCRecord(
              new GZIPInputStream(bais),
              "id",0L,false,false,true);
          ArcResource ar = (ArcResource)
            ResourceFactory.ARCArchiveRecordToResource(r, null);
          if(ar.getStatusCode() == 502) {
            throw new LiveDocumentNotAvailableException(urlString);
          } else if(ar.getStatusCode() == 504) {
            throw new LiveWebTimeoutException("Timeout:" + urlString);
          }
          success = true;
          return ar;
         
        } else {
          throw new LiveWebCacheUnavailableException(urlString);
        }

      } catch (ResourceNotAvailableException e) {
        throw new LiveDocumentNotAvailableException(urlString);

      } catch (NoHttpResponseException e) {

        throw new LiveWebCacheUnavailableException("No Http Response for "
            + urlString);

      } catch (ConnectException e) {
        throw new LiveWebCacheUnavailableException(e.getLocalizedMessage()
            + " : " + urlString);
      } catch (SocketException e) {
        throw new LiveWebCacheUnavailableException(e.getLocalizedMessage()
            + " : " + urlString);
      } catch (SocketTimeoutException e) {
        throw new LiveWebTimeoutException(e.getLocalizedMessage()
            + " : " + urlString);
      } catch(ConnectTimeoutException e) {
        throw new LiveWebTimeoutException(e.getLocalizedMessage()
            + " : " + urlString);       
    } finally {
      if (!success) {
        method.abort();
      }
        method.releaseConnection();
      }
  }

  /* (non-Javadoc)
   * @see org.archive.wayback.liveweb.LiveWebCache#shutdown()
   */
  public void shutdown() {
    // TODO Auto-generated method stub
  }
   

    /**
     * @param hostPort to proxy requests through - ex. "localhost:3128"
     */
    public void setProxyHostPort(String hostPort) {
      int colonIdx = hostPort.indexOf(':');
      if(colonIdx > 0) {
        String host = hostPort.substring(0,colonIdx);
        int port = Integer.valueOf(hostPort.substring(colonIdx+1));
       
//            http.getHostConfiguration().setProxy(host, port);
        hostConfiguration.setProxy(host, port);
      }
    }
    /**
     * @param maxTotalConnections the HttpConnectionManagerParams config
     */
    public void setMaxTotalConnections(int maxTotalConnections) {
      connectionManager.getParams().
        setMaxTotalConnections(maxTotalConnections);
    }
    /**
     * @return the HttpConnectionManagerParams maxTotalConnections config
     */
    public int getMaxTotalConnections() {
      return connectionManager.getParams().getMaxTotalConnections();
    }
    /**
     * @param maxHostConnections the HttpConnectionManagerParams config
     */
    public void setMaxHostConnections(int maxHostConnections) {
      connectionManager.getParams().
        setMaxConnectionsPerHost(hostConfiguration, maxHostConnections);
    }

    /**
     * @return the HttpConnectionManagerParams maxHostConnections config
     */
    public int getMaxHostConnections() {
      return connectionManager.getParams().
        getMaxConnectionsPerHost(hostConfiguration);
    }

    /**
   * @return the connectionTimeoutMS
   */
  public int getConnectionTimeoutMS() {
    return connectionManager.getParams().getConnectionTimeout();
  }

  /**
   * @param connectionTimeoutMS the connectionTimeoutMS to set
   */
  public void setConnectionTimeoutMS(int connectionTimeoutMS) {
      connectionManager.getParams().setConnectionTimeout(connectionTimeoutMS);
  }

  /**
   * @return the socketTimeoutMS
   */
  public int getSocketTimeoutMS() {
    return connectionManager.getParams().getSoTimeout();
  }

  /**
   * @param socketTimeoutMS the socketTimeoutMS to set
   */
  public void setSocketTimeoutMS(int socketTimeoutMS) {
      connectionManager.getParams().setSoTimeout(socketTimeoutMS);
  }

  public String getRequestPrefix() {
    return requestPrefix;
  }

  public void setRequestPrefix(String requestPrefix) {
    this.requestPrefix = requestPrefix;
  }
 
  public HttpClient getHttpClient()
  {
    return http;
  }
}
TOP

Related Classes of org.archive.wayback.liveweb.RemoteLiveWebCache

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.