package net.matuschek.spider;
/*********************************************
Copyright (c) 2001 by Daniel Matuschek
*********************************************/
import java.net.MalformedURLException;
import java.net.URL;
import net.matuschek.http.HttpConstants;
/**
* The RobotTask implements a simple object that represents a task
* for the web robot.
*
* @author Daniel Matuschek
* @version $Id: RobotTask.java,v 1.7 2003/02/25 13:34:48 oliver_schmidt Exp $
*/
public class RobotTask
implements Comparable
{
/**
* Creates a new RobotTask with the given parameters
*
* @param url - the URL of the RobotTask
* @param maxDepth - maximal search depth starting from this task
* @param referer - content of the HTTP Referer header, use "-" if
* you don't want to use a Referer
*/
public RobotTask(URL url, int maxDepth, String referer) {
setUrl(url);
this.maxDepth=maxDepth;
this.referer=referer;
}
/**
* Creates a new RobotTask with the given parameters
*
* @param urlString - the URL (as String) of the RobotTask
* @param maxDepth - maximal search depth starting from this task
* @param referer - content of the HTTP Referer header, use "-" if
* you don't want to use a Referer
*/
public RobotTask(String urlString, int maxDepth, String referer) {
this.urlString = urlString;
this.maxDepth=maxDepth;
this.referer=referer;
}
public URL getUrl() {
try {
return new URL(urlString);
} catch (MalformedURLException e) {
e.printStackTrace();
return null;
}
}
public void setUrl(URL url) {
urlString = url.toString();
hashCode = 0;
}
public int getMaxDepth() {
return maxDepth;
}
public void setMaxDepth(int maxDepth) {
this.maxDepth = maxDepth;
}
public String getReferer() {
return referer;
}
public void setReferer(String referer) {
this.referer = referer;
}
public int getMethod() {
return method;
}
public void setMethod(int method) {
this.method = method;
hashCode = 0;
}
public String getParamString() {
return paramString;
}
public void setParamString(String paramString) {
this.paramString = paramString;
hashCode = 0;
}
/**
* two RobotTasks are equal, if they represent the
* same URL
*/
public boolean equals(Object o) {
try {
return (compareTo(o) == 0);
} catch (ClassCastException e) {
return false;
}
}
/**
* Implements a natural order for RobotTasks. This is based
* on
* @param o another RobotTask object to compare to
* @return 0 if o is equal to this object, 1 if it is smaller,
* -1 otherwise
* @exception ClassCastException if o is no RobotTask object
*/
public int compareTo(Object o)
throws ClassCastException
{
RobotTask r = (RobotTask)o;
if (r == null) {
throw new ClassCastException("object to compare to is null");
}
int diff = hashCode() - r.hashCode();
if (diff == 0) {
String me = this.getInternalStringRepresentation();
String it = r.getInternalStringRepresentation();
diff = me.compareTo(it);
}
return diff;
}
/**
* Gets a String representation for this RobotTask object. Format
* may change without notice. Should be used for debugging and logging
* only.
* @return a String represantation for this task
*/
public String toString() {
return urlString + " " + paramString + " Method " + method;
}
/**
* Gets a hashcode for this object. It is based on the String hash code
* implementation used with the internal string representation of this
* object
*/
public int hashCode() {
if (hashCode != 0) {
return hashCode;
}
hashCode = getInternalStringRepresentation().hashCode();
return hashCode;
}
/**
* Gets an internal String representation for comparisons
* and hash code generation.
*
* Currently this contains the url, the parameters and the method.
* Because this is only used as an internal key and the URL and
* parameters can be very long, I have decided to use the MD5 hash of
* the longer representation.
*
* @return a String that should be unique for every object
*/
public String getInternalStringRepresentation() {
return (paramString == null && method == HttpConstants.GET)
? urlString
: urlString + paramString + method;
}
// protected URL url; -> urlString holds all the information to save memory
private int maxDepth;
private String referer;
protected int method=HttpConstants.GET;
protected String paramString=null;
protected int hashCode = 0; // cached hashCode for quick retrieval and comparison
protected String urlString; // URL as String because the class URL uses lots of space
protected int retries = 0; // number of retries
/*
* Increases retries and returns increased value.
*/
public int retry() { return ++retries; }
}