Package bixo.datum

Examples of bixo.datum.FetchedDatum


    public final void testLargeContent() throws Exception {
        FetcherPolicy policy = new FetcherPolicy();
        Server server = startServer(new RandomResponseHandler(policy.getMaxContentSize() * 2), 8089);
        BaseFetcher fetcher = new SimpleHttpFetcher(1, policy, ConfigUtils.BIXO_TEST_AGENT);
        String url = "http://localhost:8089/test.html";
        FetchedDatum result = fetcher.get(new ScoredUrlDatum(url));
        server.stop();

        assertTrue("Content size should be truncated", result.getContentLength() <= policy.getMaxContentSize());
    }
View Full Code Here


        BaseFetcher fetcher = new SimpleHttpFetcher(1, policy, ConfigUtils.BIXO_TEST_AGENT);
        fetcher.setDefaultMaxContentSize(1000);
        fetcher.setMaxContentSize("image/png", 5000);
        ScoredUrlDatum datumToFetch = new ScoredUrlDatum("http://localhost:8089/karlie.html");
       
        FetchedDatum result1 = fetcher.get(datumToFetch);
        FetchedDatum result2 = fetcher.get(datumToFetch);
       
        // Verify that we got the same data from each fetch request.
        assertEquals(1000, result1.getContentLength());
        assertEquals(1000, result2.getContentLength());
        byte[] bytes1 = result1.getContentBytes();
        byte[] bytes2 = result2.getContentBytes();
        for (int i = 0; i < bytes1.length; i++) {
            assertEquals(bytes1[i], bytes2[i]);
        }

        datumToFetch = new ScoredUrlDatum("http://localhost:8089/bixolabs_mining.png");
        FetchedDatum result3 = fetcher.get(datumToFetch);
        assertTrue(result3.getContentLength() > 1000);
       
        fetcher.setMaxContentSize("image/png", 1500);
        try {
            fetcher.get(datumToFetch);
            fail("Aborted fetch exception not thrown");
View Full Code Here

    public final void testLargeHtml() throws Exception {
        FetcherPolicy policy = new FetcherPolicy();
        Server server = startServer(new ResourcesResponseHandler(), 8089);
        BaseFetcher fetcher = new SimpleHttpFetcher(1, policy, ConfigUtils.BIXO_TEST_AGENT);
        String url = "http://localhost:8089/karlie.html";
        FetchedDatum result = fetcher.get(new ScoredUrlDatum(url));
        server.stop();

        assertTrue("Content size should be truncated", result.getContentLength() <= policy.getMaxContentSize());

    }
View Full Code Here

    public final void testContentTypeHeader() throws Exception {
        FetcherPolicy policy = new FetcherPolicy();
        Server server = startServer(new ResourcesResponseHandler(), 8089);
        BaseFetcher fetcher = new SimpleHttpFetcher(1, policy, ConfigUtils.BIXO_TEST_AGENT);
        String url = "http://localhost:8089/simple-page.html";
        FetchedDatum result = fetcher.get(new ScoredUrlDatum(url));
        server.stop();
       
        String contentType = result.getHeaders().getFirst(HttpHeaderNames.CONTENT_TYPE);
        assertNotNull(contentType);
        assertEquals("text/html", contentType);
    }
View Full Code Here

    public final void testTempRedirectHandling() throws Exception {
        FetcherPolicy policy = new FetcherPolicy();
        Server server = startServer(new RedirectResponseHandler(), 8089);
        BaseFetcher fetcher = new SimpleHttpFetcher(1, policy, ConfigUtils.BIXO_TEST_AGENT);
        String url = "http://localhost:8089/base";
        FetchedDatum result = fetcher.get(new ScoredUrlDatum(url));
        server.stop();

        assertEquals("Redirected URL", "http://localhost:8089/redirect", result.getFetchedUrl());
        assertNull(result.getNewBaseUrl());
        assertEquals(1, result.getNumRedirects());
    }
View Full Code Here

        Server server = startServer(new RedirectResponseHandler(true), 8089);
        BaseFetcher fetcher = new SimpleHttpFetcher(1, policy, ConfigUtils.BIXO_TEST_AGENT);
        String url = "http://localhost:8089/base";
        ScoredUrlDatum scoredUrl = new ScoredUrlDatum(url);
        scoredUrl.setPayloadValue("payload-field-1", 1);
        FetchedDatum result = fetcher.get(scoredUrl);
        server.stop();

        assertEquals("Redirected URL", "http://localhost:8089/redirect", result.getFetchedUrl());
        assertEquals("New base URL", "http://localhost:8089/redirect", result.getNewBaseUrl());
        assertEquals(1, result.getNumRedirects());
        assertEquals(1, result.getPayloadValue("payload-field-1"));
    }
View Full Code Here

       
        FetcherPolicy policy = new FetcherPolicy();
        Server server = startServer(new LanguageResponseHandler(englishContent, foreignContent), 8089);
        BaseFetcher fetcher = new SimpleHttpFetcher(1, policy, ConfigUtils.BIXO_TEST_AGENT);
        String url = "http://localhost:8089/";
        FetchedDatum result = fetcher.get(new ScoredUrlDatum(url));
        server.stop();
        String contentStr = new String(result.getContentBytes(), 0, result.getContentLength());
        assertTrue( englishContent.equals(contentStr));
    }
View Full Code Here

    public final void testHostAddress() throws Exception {
        FetcherPolicy policy = new FetcherPolicy();
        Server server = startServer(new ResourcesResponseHandler(), 8089);
        BaseFetcher fetcher = new SimpleHttpFetcher(1, policy, ConfigUtils.BIXO_TEST_AGENT);
        String url = "http://localhost:8089/simple-page.html";
        FetchedDatum result = fetcher.get(new ScoredUrlDatum(url));
        server.stop();
       
        String hostAddress = result.getHostAddress();
        assertNotNull(hostAddress);
        assertEquals("127.0.0.1", hostAddress);
    }
View Full Code Here

        Tap validate = platform.makeTap(platform.makeBinaryScheme(FetchedDatum.FIELDS), contentPath);
        TupleEntryIterator tupleEntryIterator = validate.openForRead(platform.makeFlowProcess());
        int fetchedPages = 0;
        while (tupleEntryIterator.hasNext()) {
            TupleEntry entry = tupleEntryIterator.next();
            new FetchedDatum(entry);
            fetchedPages += 1;
        }

        Assert.assertEquals(10, fetchedPages);
    }
View Full Code Here

        super(FetchedDatum.FIELDS);
    }

    public void operate(FlowProcess flowProcess, FunctionCall<NullContext> functionCall) {
        TupleEntry arguments = functionCall.getArguments();
        FetchedDatum fetchedDatum = new FetchedDatum(arguments.getTuple());
       
        // Now, if the FetchedDatum mime-type is application/mbox, we want to split it into N FetchedDatum
        // tuples, one per mail message.
        if (fetchedDatum.getContentType().equals(MBOX_MIME_TYPE)) {
          splitIntoEmails(fetchedDatum, functionCall.getOutputCollector());
        } else {
          // Pass through as-is
          functionCall.getOutputCollector().add(arguments);
        }
View Full Code Here

TOP

Related Classes of bixo.datum.FetchedDatum

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.