Package org.jsoup.nodes

Examples of org.jsoup.nodes.Element


        @Override
        public void head(Node node, int depth) {
            if (!(node instanceof Element)) {
                return;
            }
            Element tag = (Element) node;
            String tagName = tag.tagName().toLowerCase();
            if (tagName.equals(TAG_BODY)) {
                extractAttribute(tag, ATT_BACKGROUND);
            } else if (tagName.equals(TAG_SCRIPT)) {
                extractAttribute(tag, ATT_SRC);
            } else if (tagName.equals(TAG_BASE)) {
                String baseref = tag.attr(ATT_HREF);
                try {
                    if (!StringUtils.isEmpty(baseref))// Bugzilla 30713
                    {
                        baseUrl.url = ConversionUtils.makeRelativeURL(baseUrl.url, baseref);
                    }
                } catch (MalformedURLException e1) {
                    throw new RuntimeException(e1);
                }
            } else if (tagName.equals(TAG_IMAGE)) {
                extractAttribute(tag, ATT_SRC);
            } else if (tagName.equals(TAG_APPLET)) {
                extractAttribute(tag, ATT_CODE);
            } else if (tagName.equals(TAG_OBJECT)) {
                extractAttribute(tag, ATT_CODEBASE);
                extractAttribute(tag, ATT_DATA);
            } else if (tagName.equals(TAG_INPUT)) {
                // we check the input tag type for image
                if (ATT_IS_IMAGE.equalsIgnoreCase(tag.attr(ATT_TYPE))) {
                    // then we need to download the binary
                    extractAttribute(tag, ATT_SRC);
                }
            } else if (tagName.equals(TAG_SCRIPT)) {
                extractAttribute(tag, ATT_SRC);
                // Bug 51750
            } else if (tagName.equals(TAG_FRAME) || tagName.equals(TAG_IFRAME)) {
                extractAttribute(tag, ATT_SRC);
            } else if (tagName.equals(TAG_EMBED)) {
                extractAttribute(tag, ATT_SRC);
            } else if (tagName.equals(TAG_BGSOUND)){
                extractAttribute(tag, ATT_SRC);
            } else if (tagName.equals(TAG_LINK)) {
                // Putting the string first means it works even if the attribute is null
                if (STYLESHEET.equalsIgnoreCase(tag.attr(ATT_REL))) {
                    extractAttribute(tag, ATT_HREF);
                }
            } else {
                extractAttribute(tag, ATT_BACKGROUND);
            }


            // Now look for URLs in the STYLE attribute
            String styleTagStr = tag.attr(ATT_STYLE);
            if(styleTagStr != null) {
                HtmlParsingUtils.extractStyleURLs(baseUrl.url, urls, styleTagStr);
            }
        }
View Full Code Here


        @Override
        public void head(Node node, int depth) {
          if (!(node instanceof Element)) {
            return;
          }
          Element tag = (Element) node;
            String tagName = tag.tagName().toLowerCase();
            if (tagName.equals(TAG_BODY)) {
                extractAttribute(tag, ATT_BACKGROUND);
            } else if (tagName.equals(TAG_SCRIPT)) {
              extractAttribute(tag, ATT_SRC);
            } else if (tagName.equals(TAG_BASE)) {
                String baseref = tag.attr(ATT_HREF);
                try {
                    if (!StringUtils.isEmpty(baseref))// Bugzilla 30713
                    {
                        baseUrl.url = ConversionUtils.makeRelativeURL(baseUrl.url, baseref);
                    }
                } catch (MalformedURLException e1) {
                    throw new RuntimeException(e1);
                }
            } else if (tagName.equals(TAG_IMAGE)) {
                extractAttribute(tag, ATT_SRC);
            } else if (tagName.equals(TAG_APPLET)) {
                extractAttribute(tag, ATT_CODE);
            } else if (tagName.equals(TAG_OBJECT)) {
                extractAttribute(tag, ATT_CODEBASE);               
                extractAttribute(tag, ATT_DATA);                
            } else if (tagName.equals(TAG_INPUT)) {
                // we check the input tag type for image
                if (ATT_IS_IMAGE.equalsIgnoreCase(tag.attr(ATT_TYPE))) {
                    // then we need to download the binary
                    extractAttribute(tag, ATT_SRC);
                }
            } else if (tagName.equals(TAG_SCRIPT)) {
                extractAttribute(tag, ATT_SRC);
                // Bug 51750
            } else if (tagName.equals(TAG_FRAME) || tagName.equals(TAG_IFRAME)) {
                extractAttribute(tag, ATT_SRC);
            } else if (tagName.equals(TAG_EMBED)) {
                extractAttribute(tag, ATT_SRC);
            } else if (tagName.equals(TAG_BGSOUND)){
                extractAttribute(tag, ATT_SRC);
            } else if (tagName.equals(TAG_LINK)) {
                // Putting the string first means it works even if the attribute is null
                if (STYLESHEET.equalsIgnoreCase(tag.attr(ATT_REL))) {
                    extractAttribute(tag, ATT_HREF);
                }
            } else {
                extractAttribute(tag, ATT_BACKGROUND);
            }


            // Now look for URLs in the STYLE attribute
            String styleTagStr = tag.attr(ATT_STYLE);
            if(styleTagStr != null) {
                HtmlParsingUtils.extractStyleURLs(baseUrl.url, urls, styleTagStr);
            }
        }
View Full Code Here

    {
        assertThat( actualLinks ).hasSize( expectedLinks.size() );

        for ( int i = 0; i < actualLinks.size(); i++ )
        {
            Element element = actualLinks.get( i );
            assertEquals( "Link[" + i + "]", expectedLinks.get( i ), element.attr( "href" ) );
        }
    }
View Full Code Here

        String startPath = StringUtils.substringBefore( path, "/" );

        // replace all links !!
        Document document = Jsoup.parse( is, "UTF-8", "" );

        Element body = document.body().child( 0 );

        Elements links = body.select( "a[href]" );

        for ( Element link : links ) {
            link.attr( "href", "#" + startPath + "/" + link.attr( "href" ) );
        }

        Elements codes = body.select( "code" );

        for ( Element code : codes ) {
            code.attr( "class", code.attr( "class" ) + " nice-code" );
        }

        //default generated enunciate use h1/h2/h3 which is quite big so transform to h3/h4/h5

        Elements headers = body.select( "h1" );

        for ( Element header : headers ) {
            header.tagName( "h3" );
        }

        headers = body.select( "h2" );

        for ( Element header : headers ) {
            header.tagName( "h4" );
        }

        headers = body.select( "h3" );

        for ( Element header : headers ) {
            header.tagName( "h5" );
        }

        Document res = new Document( "" );
        res.appendChild( body.select( "div[id=main]" ).first() );

        resp.getOutputStream().write( res.outerHtml().getBytes() );

    }
View Full Code Here

//              将html中table提取出来,html头部等信息都不要
                Document doc = Jsoup.parse(code);
            StringBuffer sb = new StringBuffer();
            Elements elements = doc.select("table");
            for(int i=0;i<elements.size();i++){
              Element element = elements.get(i);
              sb.append(element.toString());
            }
           
//                System.out.print(code);
            code = sb.toString();
      }
View Full Code Here

  private static int setColumnWidth(Element table,Sheet sheet) {
    if(table==null||sheet==null){
      return 0;
    }
    Element colgroup = table.appendElement("colgroup");
    int firstRow = sheet.getFirstRowNum();
    int lastRow = sheet.getLastRowNum();
    int maxColumns = 0;
    for(int i=firstRow;i<=lastRow;i++){
      Row row = sheet.getRow(i);
      if(row!=null){
//        int firstColumn = row.getFirstCellNum();
        int lastColumn = row.getLastCellNum();
        if(lastColumn>maxColumns){
          maxColumns = lastColumn;
        }
      }
    }
    int totalwidth = 0;
    for(int i=0;i<maxColumns;i++){
      Element col = colgroup.appendElement("col");
//      System.out.println(sheet.getColumnWidth(i)+":"+MIN_COLUMN_EXCEL_WIDTH);
      if(sheet.getColumnWidth(i)<MIN_COLUMN_EXCEL_WIDTH){
        col.attr("width",String.valueOf(DEFAULT_COLUMN_WIDTH));
        totalwidth+=DEFAULT_COLUMN_WIDTH;
//        css(col,"width",String.valueOf(DEFAULT_COLUMN_WIDTH)+"px");
      }else{
        int widthPx = widthUnits2Pixel(sheet.getColumnWidth(i));
        col.attr("width",String.valueOf(widthPx));
        totalwidth+=widthPx;
//        css(col,"width",String.valueOf(widthUnits2Pixel(sheet.getColumnWidth(i)))+"px");
      }
     
    }
View Full Code Here

 
  private static void setColumnWidth(HSSFSheet sheet, Element table) {
    Elements colgroups = table.select("colgroup");
    // 首先设置列宽
    if (colgroups.size() > 0) {
      Element colgroup = colgroups.get(0);
      Elements cols = colgroup.children();
      for (int i = 0; i < cols.size(); i++) {
        Element col = cols.get(i);
        int width = DEFAULT_COLUMN_WIDTH;
        try {
          if (col.hasAttr("width")) {
            String widthS = col.attr("width");
            if(widthS.endsWith("px")){
              widthS = widthS.replaceAll("px", "");
            }
            width = Integer.parseInt(widthS);
          }
View Full Code Here

//    int maxColumns = 0;
    for(int i=0;i<=lastRow;i++){
      Row row = sheet.getRow(i);
      if(row!=null){
//        int firstColumn = row.getFirstCellNum();
        Element tr = table.appendElement("tr");
        if(row.getHeightInPoints()<DEFAULT_ROW_HEIGHT){
          tr.attr("height",String.valueOf(pointsToPixels(DEFAULT_ROW_HEIGHT-6))+"px");
        }else{
          tr.attr("height",String.valueOf(pointsToPixels(row.getHeightInPoints()))+"px");
        }
      }else{
        Element tr = table.appendElement("tr");
        tr.attr("height",String.valueOf(pointsToPixels(DEFAULT_ROW_HEIGHT))+"px");
      }
    }
  }
View Full Code Here

  private static void setRowHeight(HSSFSheet sheet, Element table) {
    Elements trs = table.select("tr");
    // 首先设置列宽
    if (trs.size() > 0) {
      for (int i = 0; i < trs.size(); i++) {
        Element tr = trs.get(i);
        int height = DEFAULT_ROW_HEIGHT;
        try {
          if (tr.hasAttr("height")) {
            String heightS = tr.attr("height");
            if (heightS.endsWith("px")) {
              heightS = StringUtils.replace(heightS, "px", "");
            }
            height = Integer.parseInt(heightS);
          }
View Full Code Here

  private static void hideHTMLCells(Element table,int startrow,int startcolumn,int endrow,int endcolumn){
    if(table!=null){
      Elements trs = table.select("tr");
      for(int i=startrow;i<=endrow&&i<trs.size();i++){
        Element tr = trs.get(i);
        Elements tds = tr.children();
        for(int j=startcolumn;j<=endcolumn&&j<tds.size();j++){
          Element td = tds.get(j);
          if(i==startrow&&j==startcolumn){
           
          }else{
            css(td,"display","none");
          }
View Full Code Here

TOP

Related Classes of org.jsoup.nodes.Element

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.