Package rcm.util

Examples of rcm.util.Timer


           
        state = CrawlEvent.STARTED;
        sendCrawlEvent (state);
       
        synchronized (crawlQueue) {           
            Timer timer = new CrawlTimer (this);
            int timeout = dp.getCrawlTimeout();
            if (timeout > 0)
                timer.set (timeout*1000, false);

            int nWorms = Math.max (dp.getMaxThreads (), 1);
            worms = new Worm[nWorms];
            for (int i=0; i<nWorms; ++i) {
                worms[i] = new Worm (this, i);
                worms[i].start ();
            }

            try {
                while (state == CrawlEvent.STARTED) {
                    if (numPagesLeft == 0) {
                        // ran out of links to crawl
                        state = CrawlEvent.STOPPED;
                        sendCrawlEvent (state);
                    }
                    else if (synchronous) {
                        // Synchronous mode.
                        // Main thread calls process() on each link
                        // in crawlQueue, in priority order.
                        Link link = (Link)crawlQueue.getMin ();
                        if (link.getStatus () == LinkEvent.DOWNLOADED)
                            process (link);
                        else
                            crawlQueue.wait ();
                    }
                    else
                        // Asynchronous crawling.
                        // Main thread does nothing but wait, while
                        // background threads call process().
                        crawlQueue.wait ();
                }
            } catch (InterruptedException e) {}

            timer.cancel ();
               
            for (int i=0; i<worms.length; ++i)
                worms[i].die ();
            if (state == CrawlEvent.PAUSED) {
                // put partly-processed links back in fetchQueue
View Full Code Here


     * Fetch loop
     *
     */

    void fetch (Worm w) {
        Timer timer = new WormTimer (w);

        while (!w.dead) {
            //System.err.println (w + ": fetching a link");

            // pull the highest-priority link from the fetch queue
            synchronized (fetchQueue) {
                while (!w.dead
                       && (w.link = (Link)fetchQueue.deleteMin ()) == null) {
                    try {
                        fetchQueue.wait ();
                    } catch (InterruptedException e) {}
                }
            }

            if (w.dead)
                return;
               
            //System.err.println (w + ": processing " + w.link.toDescription());
           
            try {
                // download the link to get a page
                DownloadParameters dp;
                Page page;

                dp = w.link.getDownloadParameters();
                if (dp == null)
                    dp = this.dp;
                int timeout = dp.getDownloadTimeout();

                sendLinkEvent (w.link, LinkEvent.RETRIEVING);
                try {
                   
                    if (timeout > 0)
                        timer.set (timeout*1000, false);

                    if (dp.getObeyRobotExclusion()
                        && robotExclusion.disallowed (w.link.getURL()))
                        throw new IOException ("disallowed by Robot Exclusion Standard (robots.txt)");

                    page = new Page (w.link, dp);
                   
                } finally {
                    timer.cancel ();
                }
                   
                if (w.dead)
                    return;
                   
View Full Code Here

TOP

Related Classes of rcm.util.Timer

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.