/*
* Copyright (C) 2010 Yves Hoppe. All Rights Reserved.
* http://ex-crawler.sourceforge.net
*
* This is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this software; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
* USA.
*/
package de.excrawler.server;
import java.io.*;
import java.net.*;
import java.sql.*;
import java.util.logging.*;
import java.util.regex.Pattern;
/**
* Controls the crawl of websites
* starts thread with the chars to crawl (like a|b|c|d|e) and the mysql limit
* @author Yves Hoppe
*/
public class crawllistWorker extends Thread {
public String letter;
public int limit;
public static String status = "Download Worker is doing nothing";
public static int WORKING = 1;
Logger logger = Logger.getLogger(Main.class.getName());
Connection connection = null;
public crawllistWorker(String chr, int lim) {
super("crawl " + chr);
this.letter = chr;
this.limit = lim;
}
@Override
public void run() {
try {
String currentLetter = null;
int current = 0;
// System.out.println("letter: " + letter);
String pattern = "[|]";
Pattern splitter = Pattern.compile(pattern);
String[] result = splitter.split(letter);
while(true)
{
String address = null;
String filename = null;
currentLetter = result[current];
Class.forName("com.mysql.jdbc.Driver");
connection = DriverManager.getConnection(CrawlerConfig.MYSQLURL.toString(), CrawlerConfig.MYSQLUSER, CrawlerConfig.MYSQLPASSWORD);
String sql = "Select * FROM crawllist_" + currentLetter + " WHERE status = 1 or status = 6 ORDER by priority ASC LIMIT 0, ?";
PreparedStatement statement = connection.prepareStatement(sql, ResultSet.TYPE_SCROLL_SENSITIVE, ResultSet.CONCUR_UPDATABLE);
statement.setInt(1, limit);
ResultSet entry = statement.executeQuery();
while(entry.next())
{
int i = 0;
address = entry.getString("url");
filename = CrawlerConfig.TMPDIR + File.separatorChar + "tmp_" + currentLetter + "_" + String.valueOf(entry.getInt("id"));
status = "crawllistWorker crawls: " + address;
/* This needs updating !!! */
// InitCrawler crawl = new InitCrawler(address, filename, currentLetter);
// crawl.run();
new InitCrawler(address, filename, currentLetter).start();
crawllistWorker.this.sleep(7000); // prevention of too many threads - optimize in isAlive! value should depend on threadcount..
entry.deleteRow();
i++;
}
entry.close();
statement.close();
connection.close();
if (WORKING == 0)
break;
// System.out.println("Current: " + current + " Letter: " + currentLetter);
if (current < result.length -1)
current++;
else
current = 0;
}
} catch (Exception e) {
logger.log(Level.WARNING, "Exception at crawllistWorker - restarting", e);
}
}
public String getStatus(){
return status;
}
}