Package com.gistlabs.mechanize.document.html

Source Code of com.gistlabs.mechanize.document.html.HtmlDocument

/**
* Copyright (C) 2012-2014 Gist Labs, LLC. (http://gistlabs.com)
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*/
package com.gistlabs.mechanize.document.html;

import java.util.Arrays;
import java.util.Collection;
import java.util.List;

import org.apache.http.HttpResponse;
import org.apache.http.client.methods.HttpRequestBase;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;

import com.gistlabs.mechanize.Mechanize;
import com.gistlabs.mechanize.document.AbstractDocument;
import com.gistlabs.mechanize.document.html.form.Forms;
import com.gistlabs.mechanize.document.html.image.Images;
import com.gistlabs.mechanize.document.link.Links;
import com.gistlabs.mechanize.document.node.Node;
import com.gistlabs.mechanize.exceptions.MechanizeExceptionFactory;
import com.gistlabs.mechanize.util.apache.ContentType;

/**
* @author Martin Kersten <Martin.Kersten.mk@gmail.com>
*/
public class HtmlDocument extends AbstractDocument {
  public static Collection<String> CONTENT_MATCHERS = Arrays.asList(
          ContentType.TEXT_HTML.getMimeType(),
          ContentType.APPLICATION_ATOM_XML.getMimeType(),
          ContentType.APPLICATION_XHTML_XML.getMimeType(),
          ContentType.APPLICATION_XML.getMimeType());

  private HtmlElements htmlElements;

  private String baseUri;

  public HtmlDocument(final Mechanize agent, final HttpRequestBase request, final HttpResponse response) {
    super(agent, request, response);
  }

  @Override
  public HtmlElement getRoot() {
    return htmlElements().getRoot();
  }

  @Override
  public HtmlElement find(String csss) {
    return (HtmlElement)super.find(csss);
  }

  @SuppressWarnings("unchecked")
  @Override
  public List<? extends HtmlElement> findAll(String csss) {
    return (List<? extends HtmlElement>) super.findAll(csss);
  }
 
  @Override
  protected void loadPage() throws Exception {
    Document jsoup = Jsoup.parse(getInputStream(), getContentEncoding(response), getUri());
    setBaseUri(jsoup.head().baseUri());
    this.htmlElements = new HtmlElements(this, jsoup);
  }

  private void setBaseUri(final String baseUri) {
    if (! this.getUri().equals(baseUri))
      this.baseUri = baseUri;
  }

  @Override
  public String getUri() {
    return this.baseUri==null ? super.getUri() : this.baseUri;
  }

  @Override
  protected Links loadLinks() {
    List<? extends Node> links = htmlElements().findAll("a");
    return new Links(this, links);
  }

  @Override
  protected Forms loadForms() {
    List<? extends Node> forms = htmlElements().findAll("form");
    return new Forms(this, forms);
  }

  @Override
  protected Images loadImages() {
    List<HtmlElement> images = htmlElements().findAll("img");
    return new Images(this, images);
  }


  public HtmlElements htmlElements() {
    if(htmlElements == null)
      try {
        loadPage();
      } catch (Exception e) {
        throw MechanizeExceptionFactory.newException(e);
      }
    return htmlElements;
  }

  /**
   * Returns the title of the page or null.
   */
  @Override
  public String getTitle() {
    HtmlElement title = htmlElements().find("title");
    return title != null ? title.getText() : null;
  }

  /**
   * Serialize the contents of this page into a string
   *
   * @return
   */
  @Override
  public String asString() {
    return htmlElements.toString();
  }

}
TOP

Related Classes of com.gistlabs.mechanize.document.html.HtmlDocument

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.