Package org.archive.wayback.resourceindex.cdxserver

Source Code of org.archive.wayback.resourceindex.cdxserver.AccessCheckFilter

package org.archive.wayback.resourceindex.cdxserver;

import org.archive.cdxserver.auth.AuthToken;
import org.archive.cdxserver.filter.CDXAccessFilter;
import org.archive.cdxserver.filter.CDXFilter;
import org.archive.format.cdx.CDXLine;
import org.archive.util.io.RuntimeIOException;
import org.archive.wayback.core.CaptureSearchResult;
import org.archive.wayback.core.FastCaptureSearchResult;
import org.archive.wayback.exception.AdministrativeAccessControlException;
import org.archive.wayback.exception.RobotAccessControlException;
import org.archive.wayback.resourceindex.filters.ExclusionFilter;
import org.archive.wayback.util.url.UrlOperations;

public class AccessCheckFilter implements CDXAccessFilter {
 
  protected ExclusionFilter adminFilter;
  protected ExclusionFilter robotsFilter;
  protected CDXFilter prefixFilter1;
  protected CDXFilter prefixFilter2;
 
  protected CaptureSearchResult resultTester;
 
  protected AuthToken authToken;
 
  protected String lastKey;
  protected boolean cachedValue = false;

  public AccessCheckFilter(
      AuthToken token,
      ExclusionFilter adminFilter,
      ExclusionFilter robotsFilter,
      CDXFilter prefixFilter1,
      CDXFilter prefixFilter2) {
     
    this.authToken = token;
   
      this.adminFilter = adminFilter;
      this.robotsFilter = robotsFilter;
     
      this.prefixFilter1 = prefixFilter1;
      this.prefixFilter2 = prefixFilter2;
     
      this.resultTester = new FastCaptureSearchResult();
    }
 
  public boolean include(String urlKey, String originalUrl, boolean throwOnFail) {
   
    if ((lastKey != null) && lastKey.equals(urlKey)) {
      return cachedValue;
    }
   
    cachedValue = false;
   
    if (UrlOperations.urlToScheme(originalUrl) == null) {
      originalUrl = UrlOperations.HTTP_SCHEME + originalUrl;
    }

    resultTester.setUrlKey(urlKey);
    resultTester.setOriginalUrl(originalUrl);
   
    return include(resultTester, throwOnFail);
  }
   
  public boolean include(CaptureSearchResult resultTester, boolean throwOnFail)
  {     
    int status = ExclusionFilter.FILTER_INCLUDE;
     
    // Admin Excludes
    if (adminFilter != null) {
      status = adminFilter.filterObject(resultTester);
    }
   
    if (status != ExclusionFilter.FILTER_INCLUDE) {
      if (throwOnFail) {
        throw new RuntimeIOException(403, new AdministrativeAccessControlException(resultTester.getOriginalUrl() + " is not available in the Wayback Machine."));
      } else {
        lastKey = resultTester.getUrlKey();
        return cachedValue;
      }
    }
   
    // Robot Excludes
    if ((robotsFilter != null) && !authToken.isIgnoreRobots()) {
      status = robotsFilter.filterObject(resultTester);
    }
   
    if (status != ExclusionFilter.FILTER_INCLUDE) {
      if (throwOnFail) {
        throw new RuntimeIOException(403, new RobotAccessControlException(resultTester.getOriginalUrl() + " is blocked by the sites robots.txt file"));
      } else {
        lastKey = resultTester.getUrlKey();
        return cachedValue;
      }
    }
   
    lastKey = resultTester.getUrlKey();
    cachedValue = true;
   
    return cachedValue;
    }
 
  @Override
  public boolean includeUrl(String urlKey, String originalUrl)
  {
    return include(urlKey, originalUrl, true);
  }

  @Override
    public boolean includeCapture(CDXLine line) {
   
      if (!include(line.getUrlKey(), line.getOriginalUrl(), false)) {
        return false;
      }
     
    // Custom Prefix Filters
    if (prefixFilter1 != null) {
      if (!prefixFilter1.include(line)) {
        return false;
      }
    }
   
    if (prefixFilter2 != null) {
      if (!prefixFilter2.include(line)) {
        return false;
      }
    }
   
    return true;
    }
}
TOP

Related Classes of org.archive.wayback.resourceindex.cdxserver.AccessCheckFilter

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.