Package us.codecraft.webmagic.selector

Examples of us.codecraft.webmagic.selector.PlainText


        WebElement webElement = webDriver.findElement(By.xpath("/html"));
        String content = webElement.getAttribute("outerHTML");
        Page page = new Page();
        page.setRawText(content);
        page.setHtml(new Html(UrlUtils.fixAllRelativeHrefs(content, request.getUrl())));
        page.setUrl(new PlainText(request.getUrl()));
        page.setRequest(request);
        webDriverPool.returnToPool(webDriver);
        return page;
    }
View Full Code Here


    protected Page handleResponse(Request request, String charset, HttpResponse httpResponse, Task task) throws IOException {
        String content = getContent(charset, httpResponse);
        Page page = new Page();
        page.setRawText(content);
        page.setUrl(new PlainText(request.getUrl()));
        page.setRequest(request);
        page.setStatusCode(httpResponse.getStatusLine().getStatusCode());
        return page;
    }
View Full Code Here

    private Page getMockPage() throws IOException {
        Page page = new Page();
        page.setRawText(IOUtils.toString(getClass().getClassLoader().getResourceAsStream("html/mock-webmagic.html")));
        page.setRequest(new Request("http://webmagic.io/list/0"));
        page.setUrl(new PlainText("http://webmagic.io/list/0"));
        return page;
    }
View Full Code Here

    @Test
    public void test() {
        ModelPageProcessor modelPageProcessor = ModelPageProcessor.create(Site.me().addStartUrl("http://my.oschina.net/flashsword/blog"), OschinaBlog.class);
        Page page = new Page();
        page.setRequest(new Request("http://my.oschina.net/flashsword/blog"));
        page.setUrl(new PlainText("http://my.oschina.net/flashsword/blog"));
        page.setHtml(new Html(html));
        long time = System.currentTimeMillis();
        for (int i = 0; i < 1000; i++) {
            modelPageProcessor.process(page);
        }
View Full Code Here

    @Override
    public Page download(Request request, Task task) {
        Page page = new Page();
        page.setHtml(new Html(html));
        page.setRequest(new Request("https://github.com/code4craft/webmagic"));
        page.setUrl(new PlainText("https://github.com/code4craft/webmagic"));
        return page;
    }
View Full Code Here

TOP

Related Classes of us.codecraft.webmagic.selector.PlainText

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.