Package us.codecraft.webmagic

Examples of us.codecraft.webmagic.Request


public class BloomFilterDuplicateRemoverTest {

    @Test
    public void testRemove() throws Exception {
        BloomFilterDuplicateRemover bloomFilterDuplicateRemover = new BloomFilterDuplicateRemover(10);
        boolean isDuplicate = bloomFilterDuplicateRemover.isDuplicate(new Request("a"), null);
        assertThat(isDuplicate).isFalse();
        isDuplicate = bloomFilterDuplicateRemover.isDuplicate(new Request("a"), null);
        assertThat(isDuplicate).isTrue();
        isDuplicate = bloomFilterDuplicateRemover.isDuplicate(new Request("b"), null);
        assertThat(isDuplicate).isFalse();
        isDuplicate = bloomFilterDuplicateRemover.isDuplicate(new Request("b"), null);
        assertThat(isDuplicate).isTrue();

    }
View Full Code Here


        int times = 5000000;
        DuplicateRemover duplicateRemover = new BloomFilterDuplicateRemover(times,0.005);
        long freeMemory = Runtime.getRuntime().freeMemory();
        long time = System.currentTimeMillis();
        for (int i = 0; i < times; i++) {
            duplicateRemover.isDuplicate(new Request(String.valueOf(i)), null);
        }
        System.out.println("Time used by bloomfilter:" + (System.currentTimeMillis() - time));
        System.out.println("Memory used by bloomfilter:" + (freeMemory - Runtime.getRuntime().freeMemory()));

        duplicateRemover = new HashSetDuplicateRemover();
        System.gc();
        freeMemory = Runtime.getRuntime().freeMemory();
        time = System.currentTimeMillis();
        for (int i = 0; i < times; i++) {
            duplicateRemover.isDuplicate(new Request(String.valueOf(i)), null);
        }
        System.out.println("Time used by hashset:" + (System.currentTimeMillis() - time));
        System.out.println("Memory used by hashset:" + (freeMemory - Runtime.getRuntime().freeMemory()));
    }
View Full Code Here

        DuplicateRemover duplicateRemover = new BloomFilterDuplicateRemover(times, 0.01);
        int right = 0;
        int wrong = 0;
        int missCheck = 0;
        for (int i = 0; i < times; i++) {
            boolean duplicate = duplicateRemover.isDuplicate(new Request(String.valueOf(i)), null);
            if (duplicate) {
                wrong++;
            } else {
                right++;
            }
            duplicate = duplicateRemover.isDuplicate(new Request(String.valueOf(i)), null);
            if (!duplicate) {
                missCheck++;
            }
        }
View Full Code Here

     *
     * @param url
     * @return html
     */
    public Html download(String url, String charset) {
        Page page = download(new Request(url), Site.me().setCharset(charset).toTask());
        return (Html) page.getHtml();
    }
View Full Code Here

    }

    public static List<Request> convertToRequests(Collection<String> urls) {
        List<Request> requestList = new ArrayList<Request>(urls.size());
        for (String url : urls) {
            requestList.add(new Request(url));
        }
        return requestList;
    }
View Full Code Here

        }
    }

    @Override
    public synchronized Request poll(Task task) {
        Request poll = priorityQueuePlus.poll();
        if (poll != null) {
            return poll;
        }
        poll = noPriorityQueue.poll();
        if (poll != null) {
View Full Code Here

  @Test
  public void test() {
    SeleniumDownloader seleniumDownloader = new SeleniumDownloader(chromeDriverPath);
    long time1 = System.currentTimeMillis();
    for (int i = 0; i < 100; i++) {
      Page page = seleniumDownloader.download(new Request("http://huaban.com/"), new Task() {
        @Override
        public String getUUID() {
          return "huaban.com";
        }
View Full Code Here

  @Test
  public void testBaiduWenku() {
    SeleniumDownloader seleniumDownloader = new SeleniumDownloader(chromeDriverPath);
        seleniumDownloader.setSleepTime(10000);
    long time1 = System.currentTimeMillis();
    Page page = seleniumDownloader.download(new Request("http://wenku.baidu.com/view/462933ff04a1b0717fd5ddc2.html"), new Task() {
      @Override
      public String getUUID() {
        return "huaban.com";
      }
View Full Code Here

            }
            String key = ITEM_PREFIX + task.getUUID();
            String field = DigestUtils.shaHex(url);
            byte[] bytes = jedis.hget(key.getBytes(), field.getBytes());
            if (bytes != null) {
                Request o = JSON.parseObject(new String(bytes), Request.class);
                return o;
            }
            Request request = new Request(url);
            return request;
        } finally {
            pool.returnResource(jedis);
        }
    }
View Full Code Here

            int lineReaded = 0;
            while ((line = fileUrlReader.readLine()) != null) {
                urls.add(line.trim());
                lineReaded++;
                if (lineReaded > cursor.get()) {
                    queue.add(new Request(line));
                }
            }
        } finally {
            if (fileUrlReader != null) {
                IOUtils.closeQuietly(fileUrlReader);
View Full Code Here

TOP

Related Classes of us.codecraft.webmagic.Request

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.