package us.codecraft.webmagic.configurable;
import org.junit.Test;
import us.codecraft.webmagic.ResultItems;
import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.Spider;
import us.codecraft.webmagic.downloader.MockGithubDownloader;
import java.util.ArrayList;
import java.util.List;
import static org.assertj.core.api.Assertions.assertThat;
/**
* @author code4crafter@gmail.com
* @date 14-4-5
*/
public class ConfigurablePageProcessorTest {
@Test
public void test() throws Exception {
List<ExtractRule> extractRules = new ArrayList<ExtractRule>();
ExtractRule extractRule = new ExtractRule();
extractRule.setExpressionType(ExpressionType.XPath);
extractRule.setExpressionValue("//title");
extractRule.setFieldName("title");
extractRules.add(extractRule);
extractRule = new ExtractRule();
extractRule.setExpressionType(ExpressionType.XPath);
extractRule.setExpressionValue("//ul[@class='pagehead-actions']/li[1]//a[@class='social-count js-social-count']/text()");
extractRule.setFieldName("star");
extractRules.add(extractRule);
ResultItems resultItems = Spider.create(new ConfigurablePageProcessor(Site.me(), extractRules))
.setDownloader(new MockGithubDownloader()).get("https://github.com/code4craft/webmagic");
assertThat(resultItems.getAll()).containsEntry("title", "<title>code4craft/webmagic ยท GitHub</title>");
assertThat(resultItems.getAll()).containsEntry("star", " 86 ");
}
}