Examples of RobotsTxtConfig


Examples of de.anomic.http.server.RobotsTxtConfig

   
    public static servletProperties respond(final RequestHeader header, final serverObjects post, final serverSwitch env) {
        final Switchboard sb = (Switchboard) env;
        final servletProperties prop = new servletProperties();
       
        final RobotsTxtConfig rbc = ((Switchboard)env).robotstxtConfig;
        prop.put("clientname", sb.peers.mySeed().getPublicAddress());
       
        if (post != null) {
            if (post.containsKey("save")) {
                rbc.setAllDisallowed(post.containsKey(RobotsTxtConfig.ALL));
                rbc.setBlogDisallowed(post.containsKey(RobotsTxtConfig.BLOG));
                rbc.setBookmarksDisallowed(post.containsKey(RobotsTxtConfig.BOOKMARKS));
                rbc.setDirsDisallowed(post.containsKey(RobotsTxtConfig.DIRS));
                rbc.setFileshareDisallowed(post.containsKey(RobotsTxtConfig.FILESHARE));
                rbc.setHomepageDisallowed(post.containsKey(RobotsTxtConfig.HOMEPAGE));
                rbc.setLockedDisallowed(post.containsKey(RobotsTxtConfig.LOCKED));
                rbc.setNetworkDisallowed(post.containsKey(RobotsTxtConfig.NETWORK));
                rbc.setNewsDisallowed(post.containsKey(RobotsTxtConfig.NEWS));
                rbc.setStatusDisallowed(post.containsKey(RobotsTxtConfig.STATUS));
                rbc.setSurftipsDisallowed(post.containsKey(RobotsTxtConfig.SURFTIPS));
                rbc.setWikiDisallowed(post.containsKey(RobotsTxtConfig.WIKI));
                rbc.setProfileDisallowed(post.containsKey(RobotsTxtConfig.PROFILE));
                env.setConfig(SwitchboardConstants.ROBOTS_TXT, rbc.toString());
            }
        }
       
        prop.put(RobotsTxtConfig.ALL + ".checked", (rbc.isAllDisallowed()) ? "1" : "0");
        prop.put(RobotsTxtConfig.BLOG + ".checked", (rbc.isBlogDisallowed()) ? "1" : "0");
        prop.put(RobotsTxtConfig.BOOKMARKS + ".checked", (rbc.isBookmarksDisallowed()) ? "1" : "0");
        prop.put(RobotsTxtConfig.DIRS + ".checked", (rbc.isDirsDisallowed()) ? "1" : "0");
        prop.put(RobotsTxtConfig.FILESHARE + ".checked", (rbc.isFileshareDisallowed()) ? "1" : "0");
        prop.put(RobotsTxtConfig.HOMEPAGE + ".checked", (rbc.isHomepageDisallowed()) ? "1" : "0");
        prop.put(RobotsTxtConfig.LOCKED + ".checked", (rbc.isLockedDisallowed()) ? "1" : "0");
        prop.put(RobotsTxtConfig.NETWORK + ".checked", (rbc.isNetworkDisallowed()) ? "1" : "0");
        prop.put(RobotsTxtConfig.NEWS + ".checked", (rbc.isNewsDisallowed()) ? "1" : "0");
        prop.put(RobotsTxtConfig.STATUS + ".checked", (rbc.isStatusDisallowed()) ? "1" : "0");
        prop.put(RobotsTxtConfig.SURFTIPS + ".checked", (rbc.isSurftipsDisallowed()) ? "1" : "0");
        prop.put(RobotsTxtConfig.WIKI + ".checked", (rbc.isWikiDisallowed()) ? "1" : "0");
        prop.put(RobotsTxtConfig.PROFILE + ".checked", (rbc.isProfileDisallowed()) ? "1" : "0");
        return prop;
    }
View Full Code Here

Examples of de.anomic.http.server.RobotsTxtConfig

public class robots {
   
    public static servletProperties respond(final RequestHeader header, final serverObjects post, final serverSwitch env) {
        final servletProperties prop = new servletProperties();
        final RobotsTxtConfig rbc = ((Switchboard)env).robotstxtConfig;
       
        if (rbc.isAllDisallowed()) {
            prop.put(RobotsTxtConfig.ALL, 1);
        } else {
            if (rbc.isBlogDisallowed()) prop.put(RobotsTxtConfig.ALL + "_" + RobotsTxtConfig.BLOG, "1");
            if (rbc.isBookmarksDisallowed()) prop.put(RobotsTxtConfig.ALL + "_" + RobotsTxtConfig.BOOKMARKS, "1");
            if (rbc.isFileshareDisallowed()) prop.put(RobotsTxtConfig.ALL + "_" + RobotsTxtConfig.FILESHARE, "1");
            if (rbc.isHomepageDisallowed()) prop.put(RobotsTxtConfig.ALL + "_" + RobotsTxtConfig.HOMEPAGE, "1");
            if (rbc.isNetworkDisallowed()) prop.put(RobotsTxtConfig.ALL + "_" + RobotsTxtConfig.NETWORK, "1");
            if (rbc.isNewsDisallowed()) prop.put(RobotsTxtConfig.ALL + "_" + RobotsTxtConfig.NEWS, "1");
            if (rbc.isStatusDisallowed()) prop.put(RobotsTxtConfig.ALL + "_" + RobotsTxtConfig.STATUS, "1");
            if (rbc.isSurftipsDisallowed()) prop.put(RobotsTxtConfig.ALL + "_" + RobotsTxtConfig.SURFTIPS, "1");
            if (rbc.isWikiDisallowed()) prop.put(RobotsTxtConfig.ALL + "_" + RobotsTxtConfig.WIKI, "1");
            if (rbc.isProfileDisallowed()) prop.put(RobotsTxtConfig.ALL + "_" + RobotsTxtConfig.PROFILE, "1");
           
            if (rbc.isLockedDisallowed() || rbc.isDirsDisallowed()) {
                final ArrayList<String>[] p = getFiles(env.getConfig(SwitchboardConstants.HTROOT_PATH, SwitchboardConstants.HTROOT_PATH_DEFAULT));
                if (rbc.isLockedDisallowed()) {
                    prop.put(RobotsTxtConfig.ALL + "_" + RobotsTxtConfig.LOCKED, p[0].size());
                    for (int i=0; i<p[0].size(); i++)
                        prop.put(RobotsTxtConfig.ALL + "_" + RobotsTxtConfig.LOCKED + "_" + i + "_page", p[0].get(i));
                }
                if (rbc.isDirsDisallowed()) {
                    prop.put(RobotsTxtConfig.ALL + "_" + RobotsTxtConfig.DIRS, p[1].size());
                    for (int i=0; i<p[1].size(); i++)
                        prop.put(RobotsTxtConfig.ALL + "_" + RobotsTxtConfig.DIRS + "_" + i + "_dir", p[1].get(i));
                }
            }
View Full Code Here

Examples of de.anomic.http.server.RobotsTxtConfig

   
    public static servletProperties respond(final RequestHeader header, final serverObjects post, final serverSwitch env) {
        final Switchboard sb = (Switchboard) env;
        final servletProperties prop = new servletProperties();
       
        final RobotsTxtConfig rbc = ((Switchboard)env).robotstxtConfig;
        prop.put("clientname", sb.peers.mySeed().getPublicAddress());
       
        if (post != null) {
            if (post.containsKey("save")) {
                rbc.setAllDisallowed(post.containsKey(RobotsTxtConfig.ALL));
                rbc.setBlogDisallowed(post.containsKey(RobotsTxtConfig.BLOG));
                rbc.setBookmarksDisallowed(post.containsKey(RobotsTxtConfig.BOOKMARKS));
                rbc.setDirsDisallowed(post.containsKey(RobotsTxtConfig.DIRS));
                rbc.setFileshareDisallowed(post.containsKey(RobotsTxtConfig.FILESHARE));
                rbc.setHomepageDisallowed(post.containsKey(RobotsTxtConfig.HOMEPAGE));
                rbc.setLockedDisallowed(post.containsKey(RobotsTxtConfig.LOCKED));
                rbc.setNetworkDisallowed(post.containsKey(RobotsTxtConfig.NETWORK));
                rbc.setNewsDisallowed(post.containsKey(RobotsTxtConfig.NEWS));
                rbc.setStatusDisallowed(post.containsKey(RobotsTxtConfig.STATUS));
                rbc.setSurftipsDisallowed(post.containsKey(RobotsTxtConfig.SURFTIPS));
                rbc.setWikiDisallowed(post.containsKey(RobotsTxtConfig.WIKI));
                rbc.setProfileDisallowed(post.containsKey(RobotsTxtConfig.PROFILE));
                env.setConfig(SwitchboardConstants.ROBOTS_TXT, rbc.toString());
            }
        }
       
        prop.put(RobotsTxtConfig.ALL + ".checked", (rbc.isAllDisallowed()) ? "1" : "0");
        prop.put(RobotsTxtConfig.BLOG + ".checked", (rbc.isBlogDisallowed()) ? "1" : "0");
        prop.put(RobotsTxtConfig.BOOKMARKS + ".checked", (rbc.isBookmarksDisallowed()) ? "1" : "0");
        prop.put(RobotsTxtConfig.DIRS + ".checked", (rbc.isDirsDisallowed()) ? "1" : "0");
        prop.put(RobotsTxtConfig.FILESHARE + ".checked", (rbc.isFileshareDisallowed()) ? "1" : "0");
        prop.put(RobotsTxtConfig.HOMEPAGE + ".checked", (rbc.isHomepageDisallowed()) ? "1" : "0");
        prop.put(RobotsTxtConfig.LOCKED + ".checked", (rbc.isLockedDisallowed()) ? "1" : "0");
        prop.put(RobotsTxtConfig.NETWORK + ".checked", (rbc.isNetworkDisallowed()) ? "1" : "0");
        prop.put(RobotsTxtConfig.NEWS + ".checked", (rbc.isNewsDisallowed()) ? "1" : "0");
        prop.put(RobotsTxtConfig.STATUS + ".checked", (rbc.isStatusDisallowed()) ? "1" : "0");
        prop.put(RobotsTxtConfig.SURFTIPS + ".checked", (rbc.isSurftipsDisallowed()) ? "1" : "0");
        prop.put(RobotsTxtConfig.WIKI + ".checked", (rbc.isWikiDisallowed()) ? "1" : "0");
        prop.put(RobotsTxtConfig.PROFILE + ".checked", (rbc.isProfileDisallowed()) ? "1" : "0");
        return prop;
    }
View Full Code Here

Examples of de.anomic.http.server.RobotsTxtConfig

public class robots {
   
    public static servletProperties respond(final RequestHeader header, final serverObjects post, final serverSwitch env) {
        final servletProperties prop = new servletProperties();
        final RobotsTxtConfig rbc = ((Switchboard)env).robotstxtConfig;
       
        if (rbc.isAllDisallowed()) {
            prop.put(RobotsTxtConfig.ALL, 1);
        } else {
            if (rbc.isBlogDisallowed()) prop.put(RobotsTxtConfig.ALL + "_" + RobotsTxtConfig.BLOG, "1");
            if (rbc.isBookmarksDisallowed()) prop.put(RobotsTxtConfig.ALL + "_" + RobotsTxtConfig.BOOKMARKS, "1");
            if (rbc.isFileshareDisallowed()) prop.put(RobotsTxtConfig.ALL + "_" + RobotsTxtConfig.FILESHARE, "1");
            if (rbc.isHomepageDisallowed()) prop.put(RobotsTxtConfig.ALL + "_" + RobotsTxtConfig.HOMEPAGE, "1");
            if (rbc.isNetworkDisallowed()) prop.put(RobotsTxtConfig.ALL + "_" + RobotsTxtConfig.NETWORK, "1");
            if (rbc.isNewsDisallowed()) prop.put(RobotsTxtConfig.ALL + "_" + RobotsTxtConfig.NEWS, "1");
            if (rbc.isStatusDisallowed()) prop.put(RobotsTxtConfig.ALL + "_" + RobotsTxtConfig.STATUS, "1");
            if (rbc.isSurftipsDisallowed()) prop.put(RobotsTxtConfig.ALL + "_" + RobotsTxtConfig.SURFTIPS, "1");
            if (rbc.isWikiDisallowed()) prop.put(RobotsTxtConfig.ALL + "_" + RobotsTxtConfig.WIKI, "1");
            if (rbc.isProfileDisallowed()) prop.put(RobotsTxtConfig.ALL + "_" + RobotsTxtConfig.PROFILE, "1");
           
            if (rbc.isLockedDisallowed() || rbc.isDirsDisallowed()) {
                final ArrayList<String>[] p = getFiles(env.getConfig(SwitchboardConstants.HTROOT_PATH, SwitchboardConstants.HTROOT_PATH_DEFAULT));
                if (rbc.isLockedDisallowed()) {
                    prop.put(RobotsTxtConfig.ALL + "_" + RobotsTxtConfig.LOCKED, p[0].size());
                    for (int i=0; i<p[0].size(); i++)
                        prop.put(RobotsTxtConfig.ALL + "_" + RobotsTxtConfig.LOCKED + "_" + i + "_page", p[0].get(i));
                }
                if (rbc.isDirsDisallowed()) {
                    prop.put(RobotsTxtConfig.ALL + "_" + RobotsTxtConfig.DIRS, p[1].size());
                    for (int i=0; i<p[1].size(); i++)
                        prop.put(RobotsTxtConfig.ALL + "_" + RobotsTxtConfig.DIRS + "_" + i + "_dir", p[1].get(i));
                }
            }
View Full Code Here

Examples of edu.uci.ics.crawler4j.robotstxt.RobotstxtConfig

            crawlConfig.setCrawlStorageFolder( storageFolder.getAbsolutePath() );
            crawlConfig.setUserAgentString("Apache Any23 Web Crawler");
           
            final PageFetcher pageFetcher = new PageFetcher(crawlConfig);

            RobotstxtConfig robotstxtConfig = new RobotstxtConfig();
            final RobotstxtServer robotstxtServer = new RobotstxtServer(robotstxtConfig, pageFetcher);
           
            controller = new CrawlController(crawlConfig, pageFetcher, robotstxtServer);
        } catch (Exception e) {
            throw new IllegalArgumentException("Error while initializing crawler controller.", e);
View Full Code Here

Examples of edu.uci.ics.crawler4j.robotstxt.RobotstxtConfig

    config.setCrawlStorageFolder(rootFolder);
    config.setMaxPagesToFetch(10);
    config.setPolitenessDelay(1000);

    PageFetcher pageFetcher = new PageFetcher(config);
    RobotstxtConfig robotstxtConfig = new RobotstxtConfig();
    RobotstxtServer robotstxtServer = new RobotstxtServer(robotstxtConfig, pageFetcher);
    CrawlController controller = new CrawlController(config, pageFetcher, robotstxtServer);

    controller.addSeed("http://www.ics.uci.edu/");
    controller.start(LocalDataCollectorCrawler.class, numberOfCrawlers);
View Full Code Here

Examples of edu.uci.ics.crawler4j.robotstxt.RobotstxtConfig

    /*
     * Instantiate the controller for this crawl.
     */
    PageFetcher pageFetcher = new PageFetcher(config);
    RobotstxtConfig robotstxtConfig = new RobotstxtConfig();
    RobotstxtServer robotstxtServer = new RobotstxtServer(robotstxtConfig, pageFetcher);
    CrawlController controller = new CrawlController(config, pageFetcher, robotstxtServer);

    /*
     * For each crawl, you need to add some seed urls. These are the first
View Full Code Here

Examples of edu.uci.ics.crawler4j.robotstxt.RobotstxtConfig

    PageFetcher pageFetcher2 = new PageFetcher(config2);

    /*
     * We will use the same RobotstxtServer for both of the crawlers.
     */
    RobotstxtConfig robotstxtConfig = new RobotstxtConfig();
    RobotstxtServer robotstxtServer = new RobotstxtServer(robotstxtConfig, pageFetcher1);

    CrawlController controller1 = new CrawlController(config1, pageFetcher1, robotstxtServer);
    CrawlController controller2 = new CrawlController(config2, pageFetcher2, robotstxtServer);

View Full Code Here

Examples of edu.uci.ics.crawler4j.robotstxt.RobotstxtConfig

    config.setIncludeBinaryContentInCrawling(true);

    String[] crawlDomains = new String[] { "http://uci.edu/" };

    PageFetcher pageFetcher = new PageFetcher(config);
    RobotstxtConfig robotstxtConfig = new RobotstxtConfig();
    RobotstxtServer robotstxtServer = new RobotstxtServer(robotstxtConfig, pageFetcher);
    CrawlController controller = new CrawlController(config, pageFetcher, robotstxtServer);
    for (String domain : crawlDomains) {
      controller.addSeed(domain);
    }
View Full Code Here

Examples of edu.uci.ics.crawler4j.robotstxt.RobotstxtConfig

    /*
     * Instantiate the controller for this crawl.
     */
    PageFetcher pageFetcher = new PageFetcher(config);
    RobotstxtConfig robotstxtConfig = new RobotstxtConfig();
    RobotstxtServer robotstxtServer = new RobotstxtServer(robotstxtConfig, pageFetcher);
    CrawlController controller = new CrawlController(config, pageFetcher, robotstxtServer);

    /*
     * For each crawl, you need to add some seed urls. These are the first
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.