Package net.nutch.protocol

Examples of net.nutch.protocol.Content


  protected void tearDown() {}

  public void testIt() throws ProtocolException, ParseException {
    String urlString;
    Protocol protocol;
    Content content;
    Parser parser;
    Parse parse;

    for (int i=0; i<sampleFiles.length; i++) {
      urlString = "file:" + sampleDir + fileSeparator + sampleFiles[i];

      protocol = ProtocolFactory.getProtocol(urlString);
      content = protocol.getContent(urlString);

      parser = ParserFactory.getParser(content.getContentType(), urlString);
      parse = parser.getParse(content);

      assertTrue(parse.getText().startsWith(expectedText));
    }
  }
View Full Code Here


  public void testIt() throws ProtocolException, ParseException {

    String urlString;
    Protocol protocol;
    Content content;
    Parser parser;
    Parse parse;

    urlString = "file:" + sampleDir + fileSeparator + rtfFile;
    protocol = ProtocolFactory.getProtocol(urlString);
    content = protocol.getContent(urlString);

    parser = ParserFactory.getParser(content.getContentType(), urlString);
    parse = parser.getParse(content);
    String text = parse.getText();
    assertEquals("The quick brown fox jumps over the lazy dog", text.trim());

    String title = parse.getData().getTitle();
View Full Code Here

      content.append("</body></html>");
      Properties meta = new Properties();
      meta.setProperty("Content-Type", "text/html");
      meta.setProperty("Host", "http://localhost");
      meta.setProperty("Connection", "Keep-alive, close");
      Content co = new Content(url, "http://www.example.com", content.toString().getBytes("UTF-8"), "text/html", meta);
      ParseData pd = new ParseData("Hello from Page " + i, new Outlink[0], meta);
      StringBuffer text = new StringBuffer("Hello from Page" + i);
      if (unique) {
        text.append("\nCreated at epoch time: " + System.currentTimeMillis() + ", " + r.nextLong());
      }
View Full Code Here

    try {

      /* loop through the test documents and validate result */
      for (int t = 0; t < docs.length; t++) {

        Content content = getContent(docs[t]);
        Parser parser = ParserFactory.getParser("text/html", URL);
        Parse parse = parser.getParse(content);

        assertEquals(metalanguages[t], (String) parse.getData().get(
            HTMLLanguageParser.META_LANG_NAME));
View Full Code Here

  private Content getContent(String text) {
    Properties p = new Properties();
    p.put("Content-Type", "text/html");

    Content content = new Content(URL, BASE, text.getBytes(), "text/html", p);
    return content;
  }
View Full Code Here

  public Content toContent() {
    String contentType = getHeader("Content-Type");
    if (contentType == null)
      contentType = "";
    return new Content(orig, base, content, contentType, headers);
  }
View Full Code Here

      ftp.setMaxContentLength(maxContentLength);

    // set log level
    LOG.setLevel(Level.parse((new String(logLevel)).toUpperCase()));

    Content content = ftp.getContent(urlString);

    System.err.println("Content-Type: " + content.getContentType());
    System.err.println("Content-Length: " + content.get("Content-Length"));
    System.err.println("Last-Modified: " + content.get("Last-Modified"));
    if (dumpContent) {
      System.out.print(new String(content.getContent()));
    }

    ftp = null;
  }
View Full Code Here

      file.setMaxContentLength(maxContentLength);

    // set log level
    LOG.setLevel(Level.parse((new String(logLevel)).toUpperCase()));

    Content content = file.getContent(urlString);

    System.err.println("Content-Type: " + content.getContentType());
    System.err.println("Content-Length: " + content.get("Content-Length"));
    System.err.println("Last-Modified: " + content.get("Last-Modified"));
    if (dumpContent) {
      System.out.print(new String(content.getContent()));
    }

    file = null;
  }
View Full Code Here

  }

  public byte[] getContent() { return content; }

  public Content toContent() {
    return new Content(orig, base, content,
                       getHeader("Content-Type"),
                       headers);
  }
View Full Code Here

  }

  public byte[] getContent() { return content; }

  public Content toContent() {
    return new Content(orig, base, content,
                       getHeader("Content-Type"),
                       headers);
  }
View Full Code Here

TOP

Related Classes of net.nutch.protocol.Content

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.