package net.matuschek.spider;
/*********************************************
Copyright (c) 2001 by Daniel Matuschek
*********************************************/
import java.util.HashSet;
import java.util.LinkedList;
/**
* Memory based implementation of the TaskList interface.
* Uses a LinkedList for sequential access
* and a HashSet for fast retrieval
* To be thread safe, all methods are synchronized.
* <p>
* @author Daniel Matuschek, Jo Christen, Oliver Schmidt
* @version $Id: HashedMemoryTaskList.java,v 1.7 2004/08/09 13:07:22 matuschd Exp $*
*/
public class HashedMemoryTaskList implements TaskList {
public static int DEFAULT_CAPACITY = 50000;
/**
* Task store as linked list for sequential access.
*
* @link aggregation
* @associates <{RobotTask}>
*/
//private Vector list = new Vector();
private LinkedList list = new LinkedList();
/**
* Task store as hash set for fast searching
* In this list the internal string represantation of
* the task will be stored.
*
* @link aggregation
* @associates <{RobotTask}>
*/
private HashSet hashset = null;
/**
* Defines, if sequential access if allowed.
*
* Under some circumstances it may be useful to NOT have
* sequential access but only to find, if an object is in
* the list. E.g. this makes sense for the list of URLs
* already visited.
*/
private boolean sequentialAccess;
/**
* Standard constructor.
* (sequentialAccess=true, initialCapacity=DEFAULT_CAPACITY)
*/
public HashedMemoryTaskList() {
this(true, DEFAULT_CAPACITY);
}
/**
* Constructor with sequentialAccess-flag
*
* @param sequentialAccess can be set to false if the removeFirst
* and elementAt methods are not used. In this case, storage
* of a task will need less memory. This can be interesting if you
* have to store lots of objects in this list.
*/
public HashedMemoryTaskList(boolean sequentialAccess) {
this(sequentialAccess, DEFAULT_CAPACITY);
}
/**
* Constructor with sequentialAccess-flag and initialCapacity.
*
* @param sequentialAccess can be set to false if the removeFirst
* @param initial capacity (expected document count)
* and elementAt methods are not used. In this case, storage
* of a task will need less memory. This can be interesting if you
* have to store lots of objects in this list.
*/
public HashedMemoryTaskList(boolean sequentialAccess, int initialCapacity) {
this.sequentialAccess = sequentialAccess;
this.hashset = new HashSet(initialCapacity);
}
/**
* Add a task to the end of the list
*
* @param task a RobotTask object to store in the queue
*/
public synchronized void add(RobotTask task) {
if (this.sequentialAccess) {
list.add(task);
}
hashset.add(task);
}
/**
* Add a task at the beginning of list
* @param task a RobotTask object to store in the queue
*/
public synchronized void addAtStart(RobotTask task) {
if (this.sequentialAccess) {
list.add(0,task);
}
// we need to put a dummy value object to the HashTable, adding
// null as value do not work
hashset.add(task);
}
/**
* Clean up the list, remove all objects
*/
public synchronized void clear() {
list.clear();
hashset.clear();
}
/**
* Is this robot task stored in the list ?
*/
public synchronized boolean contains(RobotTask task) {
return hashset.contains(task);
}
/**
* Remove this object from the list
*/
public synchronized boolean remove(RobotTask task) {
hashset.remove(task);
if (this.sequentialAccess) {
return list.remove(task);
} else {
return true;
}
}
/**
* Get and remove the first element.
*
* @return the first task in the list. This object will also be removed
* from the list.
* @exception ArrayIndexOutOfBoundsException if the list is empty or
* sequential access is not allowed
*/
public synchronized RobotTask removeFirst()
throws ArrayIndexOutOfBoundsException
{
if (! this.sequentialAccess) {
throw
new ArrayIndexOutOfBoundsException("sequential access not allowed");
}
//RobotTask task = (RobotTask)list.elementAt(0);
//list.removeElementAt(0);
RobotTask task = (RobotTask) list.removeFirst();
hashset.remove(task);
return task;
}
/**
* Returns the number of elements in this list
*/
public synchronized int size() {
return hashset.size();
}
/**
* Get the n-th element in the list. Elements are numbered form 0 to
* size-1.
* @param position
* @exception ArrayIndexOutOfBoundsException if the given position doesn't
* exist
*/
public synchronized RobotTask elementAt(int position)
throws ArrayIndexOutOfBoundsException
{
if (! this.sequentialAccess) {
throw
new ArrayIndexOutOfBoundsException("sequential access not allowed");
}
//return (RobotTask)(list.elementAt(position));
return (RobotTask)(list.get(position));
}
}