Package org.archive.wayback.accesscontrol.oracleclient

Source Code of org.archive.wayback.accesscontrol.oracleclient.OracleExclusionFilter

/*  This file is part of the Wayback archival access software
*   (http://archive-access.sourceforge.net/projects/wayback/).
*
*  Licensed to the Internet Archive (IA) by one or more individual
*  contributors.
*
*  The IA licenses this file to You under the Apache License, Version 2.0
*  (the "License"); you may not use this file except in compliance with
*  the License.  You may obtain a copy of the License at
*
*      http://www.apache.org/licenses/LICENSE-2.0
*
*  Unless required by applicable law or agreed to in writing, software
*  distributed under the License is distributed on an "AS IS" BASIS,
*  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
*  See the License for the specific language governing permissions and
*  limitations under the License.
*/
package org.archive.wayback.accesscontrol.oracleclient;

import java.util.Date;

import org.archive.accesscontrol.AccessControlClient;
import org.archive.accesscontrol.RobotsUnavailableException;
import org.archive.accesscontrol.RuleOracleUnavailableException;
import org.archive.util.ArchiveUtils;
import org.archive.wayback.core.CaptureSearchResult;
import org.archive.wayback.resourceindex.filters.ExclusionFilter;

/**
* @author brad
*
*/
public class OracleExclusionFilter extends ExclusionFilter {
  protected AccessControlClient client = null;
  protected String accessGroup = null;
 
  private final static String POLICY_ALLOW = "allow";
  private final static String POLICY_BLOCK = "block";
  private final static String POLICY_ROBOT = "robots";
  private boolean notifiedRobotSeen = false;
//  private boolean notifiedRobotPassed = false;
  private boolean notifiedAdminSeen = false;
  private boolean notifiedAdminPassed = false;
 
  /**
   * @param oracleUrl String URL prefix for the Oracle HTTP server
   * @param accessGroup String group to use with requests to the Oracle
   */
  public OracleExclusionFilter(String oracleUrl, String accessGroup) {
    this(oracleUrl,accessGroup,null);
  }
  /**
   * @param oracleUrl String URL prefix for the Oracle HTTP server
   * @param accessGroup String group to use with requests to the Oracle
   * @param proxyHostPort String proxyHost:proxyPort to use for robots.txt
   */
  public OracleExclusionFilter(String oracleUrl, String accessGroup,
      String proxyHostPort) {
    client = new AccessControlClient(oracleUrl);
    if(proxyHostPort != null) {
         int colonIdx = proxyHostPort.indexOf(':');
        if(colonIdx > 0) {
          String host = proxyHostPort.substring(0,colonIdx);
          int port = Integer.valueOf(proxyHostPort.substring(colonIdx+1));
          client.setRobotProxy(host, port);
        }
    }
    this.accessGroup = accessGroup;
  }
 
  protected int handleAllow()
  {
    if(!notifiedAdminSeen) {
      notifiedAdminSeen = true;
      if(filterGroup != null) {
        filterGroup.setSawAdministrative();
      }
    }
    if(!notifiedAdminPassed) {
      notifiedAdminPassed = true;
      if(filterGroup != null) {
        filterGroup.setPassedAdministrative();
      }
    }
    return FILTER_INCLUDE;   
  }
 
  protected int handleBlock()
  {
    if(!notifiedAdminSeen) {
      notifiedAdminSeen = true;
      if(filterGroup != null) {
        filterGroup.setSawAdministrative();
      }
    }
    if(!notifiedAdminPassed) {
      notifiedAdminPassed = true;
      if(filterGroup != null) {
        filterGroup.setPassedAdministrative(false);
      }
    }
    return FILTER_EXCLUDE;
  }
 
  protected int handleRobots()
  {
    if(!notifiedRobotSeen) {
      notifiedRobotSeen = true;
      if(filterGroup != null) {
        filterGroup.setSawRobots();
      }
    }
    return FILTER_INCLUDE;
//    if(robotFilter != null) {
//      if(!notifiedRobotPassed) {
//        notifiedRobotPassed = true;
//        if(filterGroup != null) {
//          filterGroup.setPassedRobot();
//        }
//      }
//      return robotFilter.filterObject(o);
//    } else {
//      return FILTER_EXCLUDE;
//    }   
  }
 
 
  public int filterObject(CaptureSearchResult o) {
    String url = o.getOriginalUrl();
    Date captureDate = o.getCaptureDate();
    Date retrievalDate = new Date();
   
    String policy;
    try {
      policy = client.getPolicy(ArchiveUtils.addImpliedHttpIfNecessary(url), captureDate, retrievalDate,
          accessGroup);
      if(policy != null) {
        if(policy.equals(POLICY_ALLOW)) {
          return handleAllow();
        } else if(policy.equals(POLICY_BLOCK)) {
          return handleBlock();
        } else if(policy.equals(POLICY_ROBOT)) {
          return handleRobots();
        }
      }
    } catch (RobotsUnavailableException e) {
      e.printStackTrace();
    } catch (RuleOracleUnavailableException e) {
      e.printStackTrace();
    }
    return FILTER_EXCLUDE;     
  }
}
TOP

Related Classes of org.archive.wayback.accesscontrol.oracleclient.OracleExclusionFilter

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.