Package net.matuschek.spider

Source Code of net.matuschek.spider.RobotTask

package net.matuschek.spider;

/*********************************************
    Copyright (c) 2001 by Daniel Matuschek
*********************************************/

import java.net.MalformedURLException;
import java.net.URL;

import net.matuschek.http.HttpConstants;

/**
* The RobotTask implements a simple object that represents a task
* for the web robot.
*
* @author Daniel Matuschek
* @version $Id: RobotTask.java,v 1.7 2003/02/25 13:34:48 oliver_schmidt Exp $
*/
public class RobotTask
  implements Comparable
{

  /**
   * Creates a new RobotTask with the given parameters
   *
   * @param url - the URL of the RobotTask
   * @param maxDepth - maximal search depth starting from this task
   * @param referer - content of the HTTP Referer header, use "-" if
   * you don't want to use a Referer
   */
  public RobotTask(URL url, int maxDepth, String referer) {
  setUrl(url);
    this.maxDepth=maxDepth;
    this.referer=referer;
  }
 
  /**
   * Creates a new RobotTask with the given parameters
   *
   * @param urlString - the URL (as String) of the RobotTask
   * @param maxDepth - maximal search depth starting from this task
   * @param referer - content of the HTTP Referer header, use "-" if
   * you don't want to use a Referer
   */
  public RobotTask(String urlString, int maxDepth, String referer) {
  this.urlString = urlString;
  this.maxDepth=maxDepth;
  this.referer=referer;
 
 
  public URL getUrl() {
  try {
    return new URL(urlString);
  } catch (MalformedURLException e) {
    e.printStackTrace();
    return null;
  }
  }
 
  public void setUrl(URL url) {
  urlString = url.toString();
  hashCode = 0;
  }
 
  public int getMaxDepth() {
    return maxDepth;
  }
 
  public void setMaxDepth(int maxDepth) {
    this.maxDepth = maxDepth;
  }
 
  public String getReferer() {
    return referer;
  }
 
  public void setReferer(String referer) {
    this.referer = referer;
  }

  public int getMethod() {
    return method;
  }

  public void setMethod(int method) {
    this.method = method;
  hashCode = 0;
  }

  public String getParamString() {
    return paramString;
  }

  public void setParamString(String paramString) {
    this.paramString = paramString;
  hashCode = 0;
  }




  /**
   * two RobotTasks are equal, if they represent the
   * same URL
   */
  public boolean equals(Object o) {
    try {
      return (compareTo(o) == 0);
    } catch (ClassCastException e) {
      return false;
    }
  }

 
  /**
   * Implements a natural order for RobotTasks. This is based
   * on
   * @param o another RobotTask object to compare to
   * @return 0 if o is equal to this object, 1 if it is smaller,
   * -1 otherwise
   * @exception ClassCastException if o is no RobotTask object
   */
  public int compareTo(Object o)
    throws ClassCastException
  {
  RobotTask r = (RobotTask)o;

  if (r == null) {
    throw new ClassCastException("object to compare to is null");
  }

  int diff = hashCode() - r.hashCode();
  if (diff == 0) {
    String me = this.getInternalStringRepresentation();
    String it = r.getInternalStringRepresentation();
    diff = me.compareTo(it);
  }

  return diff;
  }


  /**
   * Gets a String representation for this RobotTask object. Format
   * may change without notice. Should be used for debugging and logging
   * only.
   * @return a String represantation for this task
   */
  public String toString() {
    return urlString + " " + paramString + " Method " + method;
  }


  /**
   * Gets a hashcode for this object. It is based on the String hash code
   * implementation used with the internal string representation of this
   * object
   */
  public int hashCode() {
  if (hashCode != 0) {
    return hashCode;
  }
  hashCode = getInternalStringRepresentation().hashCode();
  return hashCode;
  }

  /**
   * Gets an internal String representation for comparisons
   * and hash code generation.
   *
   * Currently this contains the url, the parameters and the method.
   * Because this is only used as an internal key and the URL and
   * parameters can be very long, I have decided to use the MD5 hash of
   * the longer representation.
   *
   * @return a String that should be unique for every object
   */
  public String getInternalStringRepresentation() {
    return (paramString == null && method == HttpConstants.GET)
    ? urlString
    : urlString + paramString + method;
 
 
//  protected URL url; -> urlString holds all the information to save memory
  private int maxDepth;
  private String referer;
  protected int method=HttpConstants.GET;
  protected String paramString=null;
 
  protected int hashCode = 0; // cached hashCode for quick retrieval and comparison
  protected String urlString; // URL as String because the class URL uses lots of space
  protected int retries = 0; // number of retries
   
/*
* Increases retries and returns increased value.
*/
public int retry() { return ++retries;  }

}
TOP

Related Classes of net.matuschek.spider.RobotTask

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.