Package bixo.datum

Examples of bixo.datum.ContentBytes


       
        String url = "http://domain.com/simple-content.html";
        String contentType = "text/html; charset=utf-8";
        HttpHeaders headers = new HttpHeaders();
        headers.add(HttpHeaderNames.CONTENT_TYPE, contentType);
        ContentBytes content = new ContentBytes(html.getBytes("utf-8"));
        FetchedDatum fetchedDatum = new FetchedDatum(url, url, System.currentTimeMillis(), headers, content, contentType, 0);
       
        SimpleParser parser = new SimpleParser(new BaseContentExtractor() {

            @Override
View Full Code Here


       
        String url = "http://domain.com/meta-nofollow.html";
        String contentType = "text/html; charset=utf-8";
        HttpHeaders headers = new HttpHeaders();
        headers.add(HttpHeaderNames.CONTENT_TYPE, contentType);
        ContentBytes content = new ContentBytes(html.getBytes("utf-8"));
        FetchedDatum fetchedDatum = new FetchedDatum(url, url, System.currentTimeMillis(), headers, content, contentType, 0);
       
        ParserPolicy policy = new ParserPolicy(Integer.MAX_VALUE);
        SimpleParser parser = new SimpleParser(policy);
        ParsedDatum parsedDatum = parser.parse(fetchedDatum);
View Full Code Here

    String contentType = "text/html; charset=utf-8";
    HttpHeaders headers = new HttpHeaders();
    headers.add(HttpHeaderNames.CONTENT_TYPE, contentType);
    headers.add(HttpHeaderNames.CONTENT_LANGUAGE, "en");

    ContentBytes content = new ContentBytes(html.getBytes("utf-8"));
    FetchedDatum fetchedDatum = new FetchedDatum(url, url, System.currentTimeMillis(), headers, content, contentType, 0);
   
    // Call parser.parse
    SimpleParser parser = new SimpleParser();
    ParsedDatum parsedDatum = parser.parse(fetchedDatum);
View Full Code Here

    String contentType = "text/html; charset=utf-8";
    HttpHeaders headers = new HttpHeaders();
    headers.add(HttpHeaderNames.CONTENT_TYPE, contentType);
    headers.add(HttpHeaderNames.CONTENT_LANGUAGE, "en");

    ContentBytes content = new ContentBytes(html.getBytes("utf-8"));
    FetchedDatum fetchedDatum = new FetchedDatum(url, url, System.currentTimeMillis(), headers, content, contentType, 0);
   
    // Call parser.parse
    SimpleParser parser = new SimpleParser();
    ParsedDatum parsedDatum = parser.parse(fetchedDatum);
View Full Code Here

    String contentType = "text/html; charset=utf-8";
    HttpHeaders headers = new HttpHeaders();
    headers.add(HttpHeaderNames.CONTENT_TYPE, contentType);
    headers.add(HttpHeaderNames.CONTENT_LANGUAGE, "en");

    ContentBytes content = new ContentBytes(html.getBytes("utf-8"));
    FetchedDatum fetchedDatum = new FetchedDatum(url, url, System.currentTimeMillis(), headers, content, contentType, 0);
   
    // Call parser.parse
    SimpleParser parser = new SimpleParser();
    ParsedDatum parsedDatum = parser.parse(fetchedDatum);
View Full Code Here

        // Create FetchedDatum using data
        String url = "http://domain.com/music.html";
        String contentType = "text/html; charset=utf-8";
        HttpHeaders headers = new HttpHeaders();
        headers.add(HttpHeaderNames.CONTENT_TYPE, contentType);
        ContentBytes content = new ContentBytes(html.getBytes("utf-8"));
        FetchedDatum fetchedDatum = new FetchedDatum(url, url, System.currentTimeMillis(), headers, content, contentType, 0);
       
        // Call parser.parse
        ParserPolicy policy = new ParserPolicy( ParserPolicy.NO_MAX_PARSE_DURATION,
                                                BaseLinkExtractor.ALL_LINK_TAGS,
View Full Code Here

        // Create FetchedDatum using data
        String url = "http://domain.com/page.html";
        String contentType = "text/html; charset=utf-8";
        HttpHeaders headers = new HttpHeaders();
        headers.add(HttpHeaderNames.CONTENT_TYPE, contentType);
        ContentBytes content = new ContentBytes(htmlText.getBytes("utf-8"));
        FetchedDatum fetchedDatum = new FetchedDatum(url, url, System.currentTimeMillis(), headers, content, contentType, 0);
       
        // Call parser.parse
        SimpleParser parser = new SimpleParser(new ParserPolicy(), true);
        ParsedDatum parsedDatum = parser.parse(fetchedDatum);
View Full Code Here

        byte[] bytes = new byte[(int) file.length()];
        DataInputStream in = new DataInputStream(new FileInputStream(file));
        in.readFully(bytes);

        String url = path.toExternalForm().toString();
        FetchedDatum fetchedDatum = new FetchedDatum(url, url, System.currentTimeMillis(), new HttpHeaders(), new ContentBytes(bytes), "text/html", 0);
        return fetchedDatum;
    }
View Full Code Here

 
  @Test
  public void testSplitterWithNonMbox() {
    MboxSplitterFunction splitter = new MboxSplitterFunction();
   
    FetchedDatum datum = new FetchedDatum("baseUrl", "redirectedUrl", 0, new HttpHeaders(), new ContentBytes(), "text/ascii", 0);
    TupleEntry value = new TupleEntry(datum.getTupleEntry());
   
    when(_funcCall.getArguments()).thenReturn(value);
    splitter.operate(_process, _funcCall);
   
View Full Code Here

  public void testSplitterTwoEmails() throws UnsupportedEncodingException {
    MboxSplitterFunction splitter = new MboxSplitterFunction();

    final String mboxString = "From 1\r\rContent 1\r\rFrom 2\r\rContent 2";
    byte[] mboxContent = mboxString.getBytes("us-ascii");
    FetchedDatum datum = new FetchedDatum("baseUrl", "redirectedUrl", 0, new HttpHeaders(), new ContentBytes(mboxContent), "application/mbox", 0);
    TupleEntry value = new TupleEntry(datum.getTupleEntry());
   
    when(_funcCall.getArguments()).thenReturn(value);
    splitter.operate(_process, _funcCall);
View Full Code Here

TOP

Related Classes of bixo.datum.ContentBytes

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.