Package us.codecraft.webmagic

Examples of us.codecraft.webmagic.Request


    @Test
    public void testMultiModel_should_not_skip_when_match() throws Exception {
        Page page = new Page();
        page.setRawText("<div foo='foo'></div>");
        page.setRequest(new Request("http://codecraft.us/foo"));
        page.setUrl(PlainText.create("http://codecraft.us/foo"));
        ModelPageProcessor modelPageProcessor = ModelPageProcessor.create(null, ModelFoo.class, ModelBar.class);
        modelPageProcessor.process(page);
        assertThat(page.getResultItems().isSkip()).isFalse();
    }
View Full Code Here


    @Test
    public void testExtractLinks() throws Exception {
        ModelPageProcessor modelPageProcessor = ModelPageProcessor.create(null, MockModel.class);
        Page page = getMockPage();
        modelPageProcessor.process(page);
        assertThat(page.getTargetRequests()).containsExactly(new Request("http://webmagic.io/list/1"), new Request("http://webmagic.io/list/2"), new Request("http://webmagic.io/post/1"), new Request("http://webmagic.io/post/2"));

    }
View Full Code Here

    }

    private Page getMockPage() throws IOException {
        Page page = new Page();
        page.setRawText(IOUtils.toString(getClass().getClassLoader().getResourceAsStream("html/mock-webmagic.html")));
        page.setRequest(new Request("http://webmagic.io/list/0"));
        page.setUrl(new PlainText("http://webmagic.io/list/0"));
        return page;
    }
View Full Code Here

    @Ignore
    @Test
    public void test() {
        ModelPageProcessor modelPageProcessor = ModelPageProcessor.create(Site.me().addStartUrl("http://my.oschina.net/flashsword/blog"), OschinaBlog.class);
        Page page = new Page();
        page.setRequest(new Request("http://my.oschina.net/flashsword/blog"));
        page.setUrl(new PlainText("http://my.oschina.net/flashsword/blog"));
        page.setHtml(new Html(html));
        long time = System.currentTimeMillis();
        for (int i = 0; i < 1000; i++) {
            modelPageProcessor.process(page);
View Full Code Here

            @Override
            public Site getSite() {
                return null;
            }
        };
        Request request = new Request("http://www.ibm.com/developerworks/cn/java/j-javadev2-22/");
        request.putExtra("1","2");
        redisScheduler.push(request, task);
        Request poll = redisScheduler.poll(task);
        System.out.println(poll);

    }
View Full Code Here

            "\n";
    @Override
    public Page download(Request request, Task task) {
        Page page = new Page();
        page.setHtml(new Html(html));
        page.setRequest(new Request("https://github.com/code4craft/webmagic"));
        page.setUrl(new PlainText("https://github.com/code4craft/webmagic"));
        return page;
    }
View Full Code Here

    @Ignore("infinite")
    @Test
    public void test() {
        DelayQueueScheduler delayQueueScheduler = new DelayQueueScheduler(1, TimeUnit.SECONDS);
        delayQueueScheduler.push(new Request("1"), null);
        while (true){
            Request poll = delayQueueScheduler.poll(null);
            System.out.println(System.currentTimeMillis()+"\t"+poll);
        }
    }
View Full Code Here

TOP

Related Classes of us.codecraft.webmagic.Request

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.