Package de.excrawler.server

Source Code of de.excrawler.server.DownWebWorker

/*
*  Copyright (C) 2010 Ex-Crawler Project.  All Rights Reserved.
*  http://ex-crawler.sourceforge.net
*
*  This is free software; you can redistribute it and/or modify
*  it under the terms of the GNU General Public License as published by
*  the Free Software Foundation; either version 2 of the License, or
*  (at your option) any later version.
*
*  This software is distributed in the hope that it will be useful,
*  but WITHOUT ANY WARRANTY; without even the implied warranty of
*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
*  GNU General Public License for more details.
*
*  You should have received a copy of the GNU General Public License
*  along with this software; if not, write to the Free Software
*  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307,
*  USA.
*/

package de.excrawler.server;

import java.util.logging.*;
import java.sql.*;
import java.io.*;
import java.net.*;
import java.util.regex.Pattern;
import java.util.Date;

/**
* Downloads websites and saves them to temporary files
* @author Yves Hoppe
*/

public class DownWebWorker extends Thread {

    public String letter;
    public int limit;

    public static String status = "Download web worker is doing nothing";
    public static int WORKING = 1;
  
    Logger logger = Logger.getLogger(Main.class.getName());
    Connection connection = null;


    DownWebWorker(String chr, int lim) {
        super("downweb-" + chr);
        this.letter = chr;
        this.limit = lim;
    }

    @Override
    public void run() {
        BufferedReader reader = null;
        BufferedWriter writer = null;
    try {
        String currentLetter = null;
        int current = 0;

//      System.out.println("letter: " + letter);
       
        String pattern = "[|]";
        Pattern splitter = Pattern.compile(pattern);
        String[] result = splitter.split(letter);      

        while(true) {
        String address = null;
        currentLetter = result[current];
       
        connection = DatabaseTools.getDBConnection();

        // Ok hardcoding it - bad hack, but no other solution in the hand
        String sql = "Select * FROM crawllist_" + currentLetter + " WHERE status = 0 or status = 5 ORDER by priority ASC LIMIT 0, ?";
        PreparedStatement statement = connection.prepareStatement(sql, ResultSet.TYPE_SCROLL_SENSITIVE, ResultSet.CONCUR_UPDATABLE);
        //statement.setString(1, currentLetter);
        statement.setInt(1, limit);

        ResultSet entry = statement.executeQuery();

        int success = 0;

        while(entry.next())
        {
            address = entry.getString("url");
            status = "DownloadWebWorker downloads: " + address;

            InputStream is = null;
            try {
                URL aURL = new URL(address);

                File newfile = new File(CrawlerConfig.TMPDIR + File.separatorChar + "tmp_"+ currentLetter + "_"+ entry.getInt("id"));
                // e.g sites/a_crawlistid Todo: Move to another tmp directory;
                newfile.createNewFile();

                is = aURL.openStream();

                reader = new BufferedReader(new InputStreamReader(is));
                writer = new BufferedWriter(new FileWriter(newfile), 1000000);
              
                String line;

                while( (line = reader.readLine()) != null) {
                    if(!line.isEmpty())
                    {
                    writer.write(line);
                    writer.newLine();
                    }
                }

                writer.flush();
                writer.close();
                reader.close();
                is.close();

                success = 1;
//                System.out.println("Downloaded of site successfull: " + address);

            } catch (Exception e) {
               logger.log(Level.INFO, "Error at Download Worker " + DownWebWorker.currentThread().getName(), e); // tooo many errors out there so just info
               success = 0;
               if (reader != null)
               reader.close();
               if (writer != null)
               writer.close();
            } finally {
                if (reader != null)
                reader.close();
                if (writer != null)
                writer.close();              
            }

            if (success == 1)
            {
               entry.updateInt("status", 1);
               entry.updateRow();
            } else {
               entry.updateInt("status", 7);
               int prior = entry.getInt("priority");
               prior += 3;
               entry.updateInt("priority", prior);
               entry.updateTimestamp("date", new java.sql.Timestamp(System.currentTimeMillis()));
               entry.updateRow();
            }
        }

        entry.close();
        statement.close();
        connection.close();

        if (WORKING == 0)
            break;
       
        // System.out.println("Current: " + current + " Letter: " + currentLetter);

        if (current < result.length -1)
            current++;
        else
            current = 0;
        }
       
    } catch (Exception e) {
        logger.log(Level.SEVERE, "DownWebWorker " + DownWebWorker.currentThread().getName() + " hangs", e);
        DownWebWorker.currentThread().start();   
    }
    } // End run

}
TOP

Related Classes of de.excrawler.server.DownWebWorker

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.