Examples of org.jsoup.nodes.Document

org.jsoup.nodes.Document
A HTML Document. @author Jonathan Hedley, jonathan@hedley.net

        {
            URI baseURI = new URI( baseurl );
            // to make debugging easier, start with a string. This is assuming UTF-8, which might not be a safe
            // assumption.
            String content = IOUtils.toString( stream, "utf-8" );
            Document doc = Jsoup.parse( content, baseurl );
            Elements links = doc.getElementsByTag( "a" );
            Set<String> results = new HashSet<String>();
            for ( int lx = 0; lx < links.size(); lx++ )
            {
                Element link = links.get( lx );
                /*

View Full Code Here

 * @author Martin Kersten<Martin.Kersten.mk@gmail.com>
 */
public class JsoupUtilTest {
  @Test
  public void testFindFirstByTagSingleTag() {
    Document document = Jsoup.parse("<html><body><a href=\"A\">A</a><a href=\"B\">B</a></body></html>");
    assertNotNull(JsoupDataUtil.findFirstByTag(document, "a"));
    assertNotNull(JsoupDataUtil.findFirstByTag(document, "body"));
    assertNotNull(JsoupDataUtil.findFirstByTag(document, "body/a"));
    assertNotNull(JsoupDataUtil.findFirstByTag(document, "html/body/a"));
    assertNotNull(JsoupDataUtil.findFirstByTag(document, "html/a"));

View Full Code Here

    return (List<? extends HtmlElement>) super.findAll(csss);
  }
  
  @Override
  protected void loadPage() throws Exception {
    Document jsoup = Jsoup.parse(getInputStream(), getContentEncoding(response), getUri());
    setBaseUri(jsoup.head().baseUri());
    this.htmlElements = new HtmlElements(this, jsoup);
  }

View Full Code Here

          reporter.incrCounter(this._counterGroup, "Skipped - HTML Too Long", 1);
          return;
        }


        // Count all 'itemtype' attributes referencing 'schema.org'
        Document doc = value.getParsedHTML();


        if (doc == null) {
          reporter.incrCounter(this._counterGroup, "Skipped - Unable to Parse HTML", 1);
          return;
        }


        Elements mf = doc.select("[itemtype~=schema.org]");


        if (mf.size() > 0) {
          for (Element e : mf) {
            if (e.hasAttr("itemtype")) {
              output.collect(new Text(e.attr("itemtype").toLowerCase().trim()), new LongWritable(1));

View Full Code Here

        this.confluence = new Confluence(client);
    }


    public String loadResourceAsText(String resourcePath) {
        Page page = confluence.loadPage(resourcePath, false);
        Document doc = Jsoup.parse(page.getBody());
        StringBuilder builder = new StringBuilder();
        addTitle(doc, builder);
        addPanels(doc, builder);
        addExamples(doc, builder);
        return builder.toString();

View Full Code Here

   * @param input the html document
   * @return the processed html document
   */
  public String process(String input) {


    Document doc = Jsoup.parse(input);
    
    extractStyles(doc);
    applyStyles(doc);
    inlineImages(doc);


    doc.outputSettings(doc.outputSettings().prettyPrint(false).escapeMode(Entities.EscapeMode.xhtml));
    String output = doc.outerHtml();
    return output;
  }

View Full Code Here

          if (url.contains("playlist?")) {
            // fsyprint("Fetching %s...", url);
            String last = "";
            final String out = Constants.DATA_PATH + "playlist/"
                + playListName + ".plist";
            final Document doc = Jsoup.connect(url).get();
            final Elements links = doc.select("a[href]");
            final File playListOut = new File(out);
            final FileOutputStream fos = new FileOutputStream(
                playListOut);
            final BufferedWriter bw = new BufferedWriter(
                new OutputStreamWriter(fos));

View Full Code Here


    public JsonObject extractTweet(String html) 
  throws java.net.MalformedURLException, java.io.UnsupportedEncodingException {
  JsonObject status = new JsonObject();


  Document doc = Jsoup.parse(html);
  Element tweet_div = doc.select("div.permalink-tweet").first();


  String tweet_text = tweet_div.select("p.tweet-text").first().text();
  status.addProperty("text", tweet_text);


  String tweet_id = tweet_div.attr("data-tweet-id");
  status.addProperty("id_str", tweet_id);
  status.addProperty("id", Long.parseLong(tweet_id));


  String timestamp = doc.select("span.js-short-timestamp").first().attr("data-time");
  Date created_at = new Date();
  created_at.setTime(Long.parseLong(timestamp) * 1000);
  status.addProperty("created_at", date_fmt.format(created_at));


  Elements js_stats_retweets = doc.select("li.js-stat-retweets");
  if (!js_stats_retweets.isEmpty()) {
      status.addProperty("retweeted", true);
      String count = js_stats_retweets.select("strong").first().text();
      status.addProperty("retweet_count", Long.parseLong(count));
  } else {
      status.addProperty("retweeted", false);
      status.addProperty("retweet_count", 0);
  }
  Elements js_stats_favs = doc.select("li.js-stat-favorites");
  status.addProperty("favorited", !js_stats_favs.isEmpty());
      


  // User subfield
  JsonObject user = new JsonObject();
  String user_id = tweet_div.attr("data-user-id");
  user.addProperty("id_str", user_id);
  user.addProperty("id", Long.parseLong(user_id));
  String screen_name = tweet_div.attr("data-screen-name");
  user.addProperty("screen_name", screen_name);
  String user_name = tweet_div.attr("data-name");
  user.addProperty("name", user_name);
  
  status.add("user", user);
  
  // Geo information
  Elements tweet_loc = doc.select("a.tweet-geo-text");
  if (!tweet_loc.isEmpty()) {
      JsonObject location = new JsonObject();
      Element loc = tweet_loc.first();
      // Adding http to avoid malformed URL exception
      URL url = new URL("http:" + loc.attr("href"));

View Full Code Here

        jsonReports.add(new File(ReportBuilderTest.class.getClassLoader().getResource("net/masterthought/cucumber/project3.json").toURI()).getAbsolutePath());
        ReportBuilder reportBuilder = new ReportBuilder(jsonReports, rd, "", "1", "cucumber-reporting", false, false, true, true, false, "", false);
        reportBuilder.generateReports();


        File input = new File(rd, "feature-overview.html");
        Document doc = Jsoup.parse(input, "UTF-8", "");
        assertThat(fromId("overview-title", doc).text(), is("Feature Overview for Build: 1"));
        assertStatsHeader(doc);
        assertStatsFirstFeature(doc);
        assertNotNull(fromId("flash-charts", doc));
    }

View Full Code Here

        jsonReports.add(new File(ReportBuilderTest.class.getClassLoader().getResource("net/masterthought/cucumber/project3.json").toURI()).getAbsolutePath());
        ReportBuilder reportBuilder = new ReportBuilder(jsonReports, rd, "", "1", "cucumber-reporting", false, false, false, true, false, "", false);
        reportBuilder.generateReports();


        File input = new File(rd, "feature-overview.html");
        Document doc = Jsoup.parse(input, "UTF-8", "");
        assertThat(fromId("overview-title", doc).text(), is("Feature Overview for Build: 1"));
        assertStatsHeader(doc);
        assertStatsFirstFeature(doc);
        assertStatsTotals(doc);
        assertNotNull(fromId("js-charts", doc));

View Full Code Here

0 1 2 3 4 5 6 7 8 9

TOP

Related Classes of org.jsoup.nodes.Document

at.newmedialab.ldpath.model.functions.CssSelectFunction

br.com.objectos.way.bvmf.bdr.BdrWgetGuice

br.com.objectos.way.bvmf.bdr.BdrWgetGuice$Get

br.com.objectos.way.bvmf.bdr.TesteDeBdrParser

br.com.objectos.way.bvmf.fi.fii.FiiWgetGuice

br.com.objectos.way.bvmf.fi.fii.FiiWgetGuice$Get

by.dreamer.App

cn.edu.hfut.dmic.webcollector.parser.ParseUtils

com.astamuse.asta4d.Component

com.astamuse.asta4d.Page

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.