Package org.apache.nutch.protocol.http.api.RobotRulesParser

Examples of org.apache.nutch.protocol.http.api.RobotRulesParser.RobotRuleSet


       boolean[] allowed) {
    String agentsString= agents[0];
    for (int i= 1; i < agents.length; i++)
      agentsString= agentsString + "," + agents[i];
    RobotRulesParser p= new RobotRulesParser(agents);
    RobotRuleSet rules= p.parseRules(ROBOTS_STRINGS[robotsString] != null
                                     ? ROBOTS_STRINGS[robotsString].getBytes()
                                     : null);
    for (int i= 0; i < paths.length; i++) {
      assertTrue("testing robots file "+robotsString+", on agents ("
     + agentsString + "), and path " + TEST_PATHS[i] + "; got "
     + rules.isAllowed(TEST_PATHS[i]) + ", rules are: " + LF
           + rules,
     rules.isAllowed(TEST_PATHS[i]) == allowed[i]);
    }
  }
View Full Code Here


       boolean[] allowed) {
    String agentsString= agents[0];
    for (int i= 1; i < agents.length; i++)
      agentsString= agentsString + "," + agents[i];
    RobotRulesParser p= new RobotRulesParser(agents);
    RobotRuleSet rules= p.parseRules(ROBOTS_STRINGS[robotsString] != null
                                     ? ROBOTS_STRINGS[robotsString].getBytes()
                                     : null);
    for (int i= 0; i < paths.length; i++) {
      assertTrue("testing robots file "+robotsString+", on agents ("
     + agentsString + "), and path " + TEST_PATHS[i] + "; got "
     + rules.isAllowed(TEST_PATHS[i]) + ", rules are: " + LF
           + rules,
     rules.isAllowed(TEST_PATHS[i]) == allowed[i]);
    }
  }
View Full Code Here

                        "Disallow:/baz" + CR;
    String delayRule2 = "User-agent: foobot" + CR +
                        "Crawl-delay: 20" + CR +
                        "User-agent: *" + CR +
                        "Disallow:/baz" + CR;
    RobotRuleSet rules = p.parseRules(delayRule1.getBytes());
    long crawlDelay = rules.getCrawlDelay();
    assertTrue("testing crawl delay for agent nutchbot - rule 1", (crawlDelay == 10000));
    rules = p.parseRules(delayRule2.getBytes());
    crawlDelay = rules.getCrawlDelay();
    assertTrue("testing crawl delay for agent nutchbot - rule 2", (crawlDelay == -1));
  }
View Full Code Here

       boolean[] allowed) {
    String agentsString= agents[0];
    for (int i= 1; i < agents.length; i++)
      agentsString= agentsString + "," + agents[i];
    RobotRulesParser p= new RobotRulesParser(agents);
    RobotRuleSet rules= p.parseRules(ROBOTS_STRINGS[robotsString] != null
                                     ? ROBOTS_STRINGS[robotsString].getBytes()
                                     : null);
    for (int i= 0; i < paths.length; i++) {
      assertTrue("testing robots file "+robotsString+", on agents ("
     + agentsString + "), and path " + TEST_PATHS[i] + "; got "
     + rules.isAllowed(TEST_PATHS[i]) + ", rules are: " + LF
           + rules,
     rules.isAllowed(TEST_PATHS[i]) == allowed[i]);
    }
  }
View Full Code Here

TOP

Related Classes of org.apache.nutch.protocol.http.api.RobotRulesParser.RobotRuleSet

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.