Package com.flaptor.hounder.crawler.pagedb.distributed

Examples of com.flaptor.hounder.crawler.pagedb.distributed.DPageDB


        if (!distributed) {
            logger.error("Can't redistribute a non-distributed PageDB");
        } else {
            try {
                PageDB oldPageDB = new PageDB(pagedbDir);
                DPageDB newPageDB = new DPageDB(pagedbDir+".new");
                oldPageDB.open(PageDB.READ);
                newPageDB.open(DPageDB.WRITE + DPageDB.UNSORTED);
                long total = oldPageDB.getSize();
                long done = 0;
                for (Page page : oldPageDB) {
                    newPageDB.addPage(page);
                    if (++done % 10000 == 0) {
                        logger.info("Redistributed "+done+" of "+total+" pages.");
                    }
                }
                oldPageDB.close();
                newPageDB.close();
                String oldName = oldPageDB.getDir();
                PageDB tmpPageDB = new PageDB(pagedbDir+".tmp");
                tmpPageDB.deleteDir(false);
                if (attempt(oldPageDB.rename(tmpPageDB.getDir()), "renaming pagedb -> pagedb.tmp")) {
                    if (attempt(newPageDB.rename(oldName), "renaming pagedb.new -> pagedb")) {
                        if (attempt(tmpPageDB.deleteDir(false), "deleting pagedb.tmp")) {
                            logger.info("Done redistributing.");
                        }
                    }
                }
View Full Code Here


     */
    public void cleanup() {
        CrawlerProgress.cleanup();
        PageDB tmpPageDB;
        if (distributed) {
            tmpPageDB = new DPageDB(pagedbDir + ".tmp", pageCatcher);
        } else {
            tmpPageDB = new PageDB(pagedbDir + ".tmp");
        }
        PageDB newPageDB = new PageDB(pagedbDir + ".new");
        tmpPageDB.deleteDir(true);
View Full Code Here

        PageDB oldPageDB = new PageDB(pagedbDir);
        PageDB tmpPageDB;
        PageDB newPageDB;
        if (createNewPageDB) {
            if (distributed) {
                tmpPageDB = new DPageDB(pagedbDir + ".tmp", pageCatcher);
//                if (starting) {
//                    logger.info("Waiting for other nodes to start...");
//                    ((DPageDB)tmpPageDB).synch();
//                    logger.info("All nodes started");
//                    starting = false;
View Full Code Here

TOP

Related Classes of com.flaptor.hounder.crawler.pagedb.distributed.DPageDB

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.