Package net.matuschek.spider

Source Code of net.matuschek.spider.HashedMemoryTaskList

package net.matuschek.spider;

/*********************************************
     Copyright (c) 2001 by Daniel Matuschek
*********************************************/

import java.util.HashSet;
import java.util.LinkedList;

/**
* Memory based implementation of the TaskList interface.
* Uses a LinkedList for sequential access
* and a HashSet for fast retrieval
* To be thread safe, all methods are synchronized.
* <p>
* @author Daniel Matuschek, Jo Christen, Oliver Schmidt
* @version $Id: HashedMemoryTaskList.java,v 1.7 2004/08/09 13:07:22 matuschd Exp $*
*/
public class HashedMemoryTaskList implements TaskList {

   public static int DEFAULT_CAPACITY = 50000;
  
  /**
   * Task store as linked list for sequential access.
   *
   * @link aggregation
   * @associates <{RobotTask}>
   */
  //private Vector list = new Vector();
  private LinkedList list = new LinkedList();

  /**
   * Task store as hash set for fast searching
   * In this list the internal string represantation of
   * the task will be stored.
   *
   * @link aggregation
   * @associates <{RobotTask}>
   */
  private HashSet hashset = null;


  /**
   * Defines, if sequential access if allowed.
   *
   * Under some circumstances it may be useful to NOT have
   * sequential access but only to find, if an object is in
   * the list. E.g. this makes sense for the list of URLs
   * already visited.
   */
  private boolean sequentialAccess;


  /**
   * Standard constructor.
   * (sequentialAccess=true, initialCapacity=DEFAULT_CAPACITY)
   */
  public HashedMemoryTaskList() {
    this(true, DEFAULT_CAPACITY);
  }


  /**
   * Constructor with sequentialAccess-flag
   *
   * @param sequentialAccess can be set to false if the removeFirst
   * and elementAt methods are not used. In this case, storage
   * of a task will need less memory. This can be interesting if you
   * have to store lots of objects in this list.
   */
  public HashedMemoryTaskList(boolean sequentialAccess) {
    this(sequentialAccess, DEFAULT_CAPACITY);
  }

  /**
   * Constructor with sequentialAccess-flag and initialCapacity.
   *
   * @param sequentialAccess can be set to false if the removeFirst
   * @param initial capacity (expected document count)
   * and elementAt methods are not used. In this case, storage
   * of a task will need less memory. This can be interesting if you
   * have to store lots of objects in this list.
   */
  public HashedMemoryTaskList(boolean sequentialAccess, int initialCapacity) {
    this.sequentialAccess = sequentialAccess;
    this.hashset = new HashSet(initialCapacity);
  }



  /**
   * Add a task to the end of the list
   *
   * @param task a RobotTask object to store in the queue
   */
  public synchronized void add(RobotTask task) {
    if (this.sequentialAccess) {
      list.add(task);
    }
    hashset.add(task);
  }


  /**
   * Add a task at the beginning of list
   * @param task a RobotTask object to store in the queue
   */
  public synchronized void addAtStart(RobotTask task) {
    if (this.sequentialAccess) {
      list.add(0,task);
    }
    // we need to put a dummy value object to the HashTable, adding
    // null as value do not work
    hashset.add(task);
  }


  /**
   * Clean up the list, remove all objects
   */
  public synchronized void clear() {
    list.clear();
    hashset.clear();
  }


  /**
   * Is this robot task stored in the list ?
   */
  public synchronized boolean contains(RobotTask task) {
    return hashset.contains(task);
  }


  /**
   * Remove this object from the list
   */
  public synchronized boolean remove(RobotTask task) {
    hashset.remove(task);
    if (this.sequentialAccess) {
      return list.remove(task);
    } else {
      return true;
    }
  }

 
  /**
   * Get and remove the first element.
   *
   * @return the first task in the list. This object will also be removed
   * from the list.
   * @exception ArrayIndexOutOfBoundsException if the list is empty or
   * sequential access is not allowed
   */
  public synchronized RobotTask removeFirst()
    throws ArrayIndexOutOfBoundsException
  {
    if (! this.sequentialAccess) {
      throw
  new ArrayIndexOutOfBoundsException("sequential access not allowed");
    }
    //RobotTask task = (RobotTask)list.elementAt(0);
    //list.removeElementAt(0);
    RobotTask task = (RobotTask) list.removeFirst();
    hashset.remove(task);
    return task;
  }


  /**
   * Returns the number of elements in this list
   */
  public synchronized int size() {
    return hashset.size();
  }


  /**
   * Get the n-th element in the list. Elements are numbered form 0 to
   * size-1.
   * @param position
   * @exception ArrayIndexOutOfBoundsException if the given position doesn't
   * exist
   */
  public synchronized RobotTask elementAt(int position)
    throws ArrayIndexOutOfBoundsException
  {
    if (! this.sequentialAccess) {
      throw
  new ArrayIndexOutOfBoundsException("sequential access not allowed");
    }
    //return (RobotTask)(list.elementAt(position));
    return (RobotTask)(list.get(position));
  }

}
TOP

Related Classes of net.matuschek.spider.HashedMemoryTaskList

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.