Package bixo.config.BaseFetchJobPolicy

Examples of bixo.config.BaseFetchJobPolicy.FetchSetInfo


        PartitioningKey newKey = new PartitioningKey(key, _numReduceTasks);
       
        while (safeHasNext()) {
            ScoredUrlDatum scoredDatum = new ScoredUrlDatum(new TupleEntry(values.next()));
            FetchSetInfo setInfo = _policy.nextFetchSet(scoredDatum);
            if (setInfo != null) {
                FetchSetDatum result = makeFetchSetDatum(setInfo, newKey, safeHasNext());
                collector.add(BixoPlatform.clone(result.getTuple(), process));
            }
        }
       
        // See if we have another partially built datum to add.
        FetchSetInfo setInfo = _policy.endFetchSet();
        if (setInfo != null) {
            FetchSetDatum result = makeFetchSetDatum(setInfo, newKey, false);
            collector.add(BixoPlatform.clone(result.getTuple(), process));
        }
    }
View Full Code Here


        // Should be nothing yet.
        assertNull(policy.endFetchSet());
       
        assertNull(policy.nextFetchSet(new ScoredUrlDatum("url1")));
       
        FetchSetInfo setInfo = policy.nextFetchSet(new ScoredUrlDatum("url2"));
        assertNotNull(setInfo);
        assertEquals(2, setInfo.getUrls().size());
        assertEquals("url1", setInfo.getUrls().get(0).getUrl());
        assertEquals("url2", setInfo.getUrls().get(1).getUrl());
        assertEquals(crawlDelay * 2, setInfo.getFetchDelay());
       
        FetchSetInfo setInfo2 = policy.nextFetchSet(new ScoredUrlDatum("url3"));
        assertNotNull(setInfo2);
        assertEquals(1, setInfo.getUrls().size());
        assertEquals("url3", setInfo.getUrls().get(0).getUrl());
       
        assertNull(policy.endFetchSet());
View Full Code Here

        BaseFetchJobPolicy policy = new DefaultFetchJobPolicy(maxUrlsPerSet, maxUrlsPerServer, BaseFetchJobPolicy.DEFAULT_CRAWL_DELAY);
       
        final int crawlDelay = 10000;
        policy.startFetchSet("groupingKey", crawlDelay);
       
        FetchSetInfo setInfo = policy.nextFetchSet(new ScoredUrlDatum("url1"));
        assertNotNull(setInfo);
        assertEquals(1, setInfo.getUrls().size());
        assertFalse(setInfo.isSkipping());
       
        assertNull(policy.nextFetchSet(new ScoredUrlDatum("url2")));
        assertNull(policy.nextFetchSet(new ScoredUrlDatum("url3")));
        assertNull(policy.nextFetchSet(new ScoredUrlDatum("url4")));
        assertNull(policy.nextFetchSet(new ScoredUrlDatum("url5")));

        setInfo = policy.endFetchSet();
        assertNotNull(setInfo);
        assertEquals(4, setInfo.getUrls().size());
        assertTrue(setInfo.isSkipping());
    }
View Full Code Here

        final int crawlDelay = 10000;
        policy.startFetchSet("groupingKey", crawlDelay);
       
        long curSortKey = 0;
        for (int i = 0; i < maxUrlsPerServer; i++) {
            FetchSetInfo setInfo = policy.nextFetchSet(new ScoredUrlDatum("url" + i));
            assertNotNull(setInfo);
            assertTrue(setInfo.getSortKey() > curSortKey);
            curSortKey = setInfo.getSortKey();
        }
    }
View Full Code Here

TOP

Related Classes of bixo.config.BaseFetchJobPolicy.FetchSetInfo

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.