Package de.excrawler.server

Source Code of de.excrawler.server.crawllistWorker

/*
*  Copyright (C) 2010 Yves Hoppe.  All Rights Reserved.
*  http://ex-crawler.sourceforge.net
*
*  This is free software; you can redistribute it and/or modify
*  it under the terms of the GNU General Public License as published by
*  the Free Software Foundation; either version 2 of the License, or
*  (at your option) any later version.
*
*  This software is distributed in the hope that it will be useful,
*  but WITHOUT ANY WARRANTY; without even the implied warranty of
*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
*  GNU General Public License for more details.
*
*  You should have received a copy of the GNU General Public License
*  along with this software; if not, write to the Free Software
*  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307,
*  USA.
*/

package de.excrawler.server;

import java.io.*;
import java.net.*;
import java.sql.*;
import java.util.logging.*;
import java.util.regex.Pattern;

/**
* Controls the crawl of websites
* starts thread with the chars to crawl (like a|b|c|d|e) and the mysql limit
* @author Yves Hoppe
*/


public class crawllistWorker extends Thread {

    public String letter;
    public int limit;

    public static String status = "Download Worker is doing nothing";
    public static int WORKING = 1;

    Logger logger = Logger.getLogger(Main.class.getName());
    Connection connection = null;

  
    public crawllistWorker(String chr, int lim) {
       super("crawl " + chr);
       this.letter = chr;
       this.limit = lim;
    }

    @Override
    public void run() {
    try {
    String currentLetter = null;
    int current = 0;

    // System.out.println("letter: " + letter);

    String pattern = "[|]";
    Pattern splitter = Pattern.compile(pattern);
    String[] result = splitter.split(letter);

    while(true)
    {
    String address = null;
    String filename = null;
    currentLetter = result[current];
   
    Class.forName("com.mysql.jdbc.Driver");
    connection = DriverManager.getConnection(CrawlerConfig.MYSQLURL.toString(), CrawlerConfig.MYSQLUSER, CrawlerConfig.MYSQLPASSWORD);

    String sql = "Select * FROM crawllist_" + currentLetter + " WHERE status = 1 or status = 6 ORDER by priority ASC LIMIT 0, ?";

    PreparedStatement statement = connection.prepareStatement(sql, ResultSet.TYPE_SCROLL_SENSITIVE, ResultSet.CONCUR_UPDATABLE);
    statement.setInt(1, limit);

    ResultSet entry = statement.executeQuery();

    while(entry.next())
    {
        int i = 0;
        address = entry.getString("url");
        filename = CrawlerConfig.TMPDIR + File.separatorChar + "tmp_" + currentLetter + "_" + String.valueOf(entry.getInt("id"));
        status = "crawllistWorker crawls: " + address;
        /* This needs updating !!! */
//       InitCrawler crawl = new InitCrawler(address, filename, currentLetter);
//       crawl.run();
        new InitCrawler(address, filename, currentLetter).start();

        crawllistWorker.this.sleep(7000); // prevention of too many threads - optimize in isAlive! value should depend on threadcount..
       
        entry.deleteRow();
        i++;
    }
   
    entry.close();
    statement.close();
    connection.close();
   
    if (WORKING == 0)
        break;

    // System.out.println("Current: " + current + " Letter: " + currentLetter);

        if (current < result.length -1)
            current++;
        else
            current = 0;
  
    }
    } catch (Exception e) {
        logger.log(Level.WARNING, "Exception at crawllistWorker - restarting", e);
    }
    }

    public String getStatus(){
        return status;
    }

}
TOP

Related Classes of de.excrawler.server.crawllistWorker

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.