Package net.htmlparser.jericho

Examples of net.htmlparser.jericho.OutputDocument


    }
   
    System.out.print("Convering HTML file ("+filename+" -> "+targetFilename+")... ");
   
    Source source = new Source(htmlText);
    OutputDocument outputDocument = new OutputDocument(source);
   
    int index = 1;
    HashMap<String, String> diagramList = new HashMap<String, String>();
    for(Element element : source.getAllElements("pre")) {
      StartTag tag = element.getStartTag();
      Attribute classAttr = tag.getAttributes().get("class");
      if(classAttr != null
          && classAttr.hasValue()
          && classAttr.getValue().equals(TAG_CLASS)) {
       
        String baseFilename = imageBaseFilename;
       
        String URL;
        Attribute nameAttr = tag.getAttributes().get("id");
        if(nameAttr != null
            && nameAttr.hasValue()) {
          baseFilename = makeFilenameFromTagName(nameAttr.getValue());
          URL = imageDirName + "/" + baseFilename + ".png";
        } else {
          URL = imageDirName + "/" + baseFilename + "_" + index + ".png";
          index++;
        }

        outputDocument.replace(element, "<img src=\""+URL+"\" />");
        diagramList.put(URL, element.getContent().toString());
      }
    }
   
    if(diagramList.isEmpty()){
      System.out.println("\nHTML document does not contain any " +
        "<pre> tags with their class attribute set to \""+TAG_CLASS+"\". Nothing to do.");
     
      //TODO: should return the method with appropriate exit code instead
      System.exit(0);
    }
   
    FileWriter out;
    try {
      out = new FileWriter(targetFilename);
      outputDocument.writeTo(out);
      //out.flush();
      //out.close();
    } catch (IOException e2) {
      System.err.println("Error while writing to file " + targetFilename);
      return false;
View Full Code Here


        long timer = System.currentTimeMillis();
        boolean modified = false;

        Source src = new Source(content);
        OutputDocument out = new OutputDocument(src);
        for (String filteredTagName : filteredTags) {
            for (StartTag startTag : src.getAllStartTags(filteredTagName)) {
                if (startTag.getTagType() == StartTagType.NORMAL) {
                    Element element = startTag.getElement();
                    EndTag endTag = element.getEndTag();
                    if (removeContentBetweenTags && endTag != null) {
                        out.remove(element);
                    } else {
                        out.remove(startTag);
                        if (endTag != null) {
                            out.remove(endTag);
                        }
                    }
                    modified = true;
                }
            }
        }
        String result = modified ? out.toString() : content;

        if (logger.isDebugEnabled()) {
            logger.debug("Filter HTML tags took " + (System.currentTimeMillis() - timer) + " ms");
        }
View Full Code Here

    protected String processCss(String previousOut, String serverUrl, HttpServletRequest request,
            HttpServletResponse response) {

        Source source = new Source(previousOut);
        OutputDocument document = new OutputDocument(source);
        StringBuilder sb = new StringBuilder();
        List<StartTag> linkStartTags = source.getAllStartTags(HTMLElementName.LINK);
        for (StartTag linkTag : linkStartTags) {
            Attributes attributes = linkTag.getAttributes();
            String rel = attributes.getValue("rel");
            if (rel == null || !"stylesheet".equalsIgnoreCase(rel)) {
                continue;
            }
            String href = attributes.getValue("href");
            if (href == null) {
                continue;
            }
            String styleSheetContent = null;
            try {
                if (useServletContextResources || request == null || response == null) {
                    if (request != null && StringUtils.startsWith(href, request.getContextPath())) {
                        href = StringUtils.substringAfter(href, request.getContextPath());
                    }                   
                    styleSheetContent = httpClientService.getResourceAsString(href);
                } else {
                    styleSheetContent = httpClientService.getResourceAsString(href, request, response);
                }
            } catch (Exception e) {
                logger.warn("Unable to retrieve resource content for " + href + ".Cause: " + e.getMessage(), e);
            }

            if (StringUtils.isNotEmpty(styleSheetContent)) {
                sb.setLength(0);
                sb.append("<style");
                Attribute typeAttribute = attributes.get("type");
                if (typeAttribute != null) {
                    sb.append(' ').append(typeAttribute);
                }
                if (rewriteUrlsInCss) {
                    String baseUrl = HttpClientService.isAbsoluteUrl(href) ? href : serverUrl + href;
                    baseUrl = StringUtils.substringBeforeLast(baseUrl, "/") + "/";

                    styleSheetContent = rewriteCssUrls(styleSheetContent, baseUrl);
                }

                sb.append(">\n");
                if (request!=null && Boolean.valueOf(request.getParameter("debug"))) {
                  sb.append("/* ").append(href).append(" */\n");
                }
                sb.append(styleSheetContent)
                        .append("\n</style>");

                document.replace(linkTag, sb.toString());
            }
        }

        return document.toString();
    }
View Full Code Here

        return document.toString();
    }

    protected String removeJavaScript(String out) {
        Source source = new Source(out);
        OutputDocument document = new OutputDocument(source);
        List<Element> scriptTags = source.getAllElements(HTMLElementName.SCRIPT);
        for (Element scriptElement : scriptTags) {
            boolean doRemove = false;
            if (removeExternalScripts && removeInlinedScripts) {
                doRemove = true;
            } else {
                String srcAttr = scriptElement.getAttributeValue("src");
                doRemove = removeExternalScripts && StringUtils.isNotEmpty(srcAttr) || removeInlinedScripts
                        && StringUtils.isEmpty(srcAttr);
            }
            if (doRemove) {
                document.remove(scriptElement);
            }
        }

        return document.toString();
    }
View Full Code Here

        if (attributesToVisit == null || attributesToVisit.isEmpty() || visitors == null || visitors.length == 0) {
            return htmlContent;
        }

        Source source = new Source(htmlContent);
        OutputDocument document = new OutputDocument(source);

        for (Map.Entry<String, Set<String>> tag : attributesToVisit.entrySet()) {
            List<StartTag> tags = source.getAllStartTags(tag.getKey());
            for (StartTag startTag : tags) {
                final Attributes attributes = startTag.getAttributes();
                for (String attrName : tag.getValue()) {
                    Attribute attribute = attributes.get(attrName);
                    if (attribute != null) {
                        String originalValue = attribute.getValue();
                        String value = originalValue;
                        for (HtmlTagAttributeVisitor visitor : visitors) {
                            value = visitor.visit(value, context, resource);
                        }
                        if (originalValue != value && originalValue != null && !originalValue.equals(value)) {
                            document.replace(attribute.getValueSegment(), value);
                        }
                    }
                }
            }
        }

        return document.toString();
    }
View Full Code Here

  private void revealFields(HttpMessage msg) {
    boolean changed = false;
    String response = msg.getResponseHeader().toString() + msg.getResponseBody().toString();
    Source src = new Source(response);
    OutputDocument outputDocument = new OutputDocument(src);
   
    List<Element> formElements = src.getAllElements(HTMLElementName.FORM);
   
    if (formElements != null && formElements.size() > 0) {
      // Loop through all of the FORM tags
      logger.debug("Found " + formElements.size() + " forms");
     
      for (Element formElement : formElements) {
        List<Element> elements = formElement.getAllElements();
       
        if (elements != null && elements.size() > 0) {
          // Loop through all of the elements
          logger.debug("Found " + elements.size() + " inputs");
          for (Element element : elements) {
            Attributes atts = element.getAttributes();
           
            if(atts != null && atts.size() > 0) {
              Iterator<Attribute> iter = atts.iterator();
              while (iter.hasNext()) {
                Attribute att = iter.next();
                if (ATT_DISABLED.equalsIgnoreCase(att.getName()) ||
                  ATT_READONLY.equalsIgnoreCase(att.getName()) ||
                  (ATT_TYPE.equalsIgnoreCase(att.getName()) &&
                      TYPE_HIDDEN.equalsIgnoreCase(att.getValue()))) {
                  logger.debug("Removing " + att.getName() + ": " + response.substring(att.getBegin(), att.getEnd()));
                  outputDocument.remove(att);
                  changed = true;
                }
              }
            }
          }
        }
      }
    }
    if (changed) {
      response = outputDocument.toString();
     
      int i = response.indexOf(HttpHeader.CRLF + HttpHeader.CRLF);
      msg.setResponseBody(response.substring(i + (HttpHeader.CRLF + HttpHeader.CRLF).length()));
    }
  }
View Full Code Here

            return "WebClip module is only available in live";
        }
    }

    private String rewriteBody(String body, String url, String charset, Resource resource, RenderContext context) throws IOException {
        OutputDocument document;
        document = new WebClippingRewriter(url).rewriteBody(body, resource, context);
        return document.toString();
    }
View Full Code Here

  }

  public String changeTagCase(String contents, boolean uppercase) {
    Source source = new Source(contents);
    source.fullSequentialParse();
    OutputDocument outputDocument = new OutputDocument(source);
    List<Tag> tags = source.getAllTags();
    int pos = 0;
    for (Tag tag : tags) {
      Element tagElement = tag.getElement();
      if (tagElement == null) {
        System.out.println(tag.getName());
      } else {
        StartTag startTag = tagElement.getStartTag();
        Attributes attributes = startTag.getAttributes();
        if (attributes != null) {
          for (Attribute attribute : startTag.getAttributes()) {
            if (uppercase) {
              outputDocument.replace(attribute.getNameSegment(), attribute.getNameSegment().toString()
                  .toUpperCase());
            } else {
              outputDocument.replace(attribute.getNameSegment(), attribute.getNameSegment().toString()
                  .toLowerCase());
            }
          }
        }
        if (uppercase) {
          outputDocument.replace(tag.getNameSegment(), tag.getNameSegment().toString().toUpperCase());
        } else {
          outputDocument.replace(tag.getNameSegment(), tag.getNameSegment().toString().toLowerCase());
        }
        pos = tag.getEnd();
      }
    }
    return outputDocument.toString();
  }
View Full Code Here

                || StringUtils.inArray(name, macroDirective);
    }

    public String filter(String key, String value) {
        Source source = new Source(value);
        OutputDocument document = new OutputDocument(source);
        replaceChildren(source, source, document);
        return document.toString();
    }
View Full Code Here

    protected abstract String extractTextToReplaceReference(Element labelParentDiv, Element label);
   
    public String replace(String htmlContent) {
        Source source = new Source(htmlContent);
        source.fullSequentialParse();
        OutputDocument outputDocument = new OutputDocument(source);
        List<Element> references = source.getAllElementsByClass("reference");
        for (Element reference : references) {
            String labelId = reference.getAttributeValue("href").replace("#", "");
            Element label = source.getElementById(labelId);

            Element div = findLabelContainer(label);
            if (!isValidDiv(div)) {
                outputDocument.replace(reference, reference.toString().replace("*", "?"));
                LOG.warn("Could not resolve label: " + labelId);
                continue;
            }

            String text = extractTextToReplaceReference(div, label);
            outputDocument.replace(reference, reference.toString().replace("*", text));
        }
        return outputDocument.toString();
    }
View Full Code Here

TOP

Related Classes of net.htmlparser.jericho.OutputDocument

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.