Package net.htmlparser.jericho

Examples of net.htmlparser.jericho.Source


        return document.toString();
    }

    protected String removeJavaScript(String out) {
        Source source = new Source(out);
        OutputDocument document = new OutputDocument(source);
        List<Element> scriptTags = source.getAllElements(HTMLElementName.SCRIPT);
        for (Element scriptElement : scriptTags) {
            boolean doRemove = false;
            if (removeExternalScripts && removeInlinedScripts) {
                doRemove = true;
            } else {
View Full Code Here


            tmp = "<body>" + inputHTML + "</body>";
        } else {
            tmp = inputHTML;
        }

        final Source source = new Source(tmp);

        // The DOMFragmentParser generates a HTML and a BODY element which are
        // of no interest for us. We select the HTMLBodyElement which will then
        // be consumed in the validateHtml method. It is the starting root element
        // of the HTML fragment.
        linkToDest.clear();

        final List<Result> results = validateHtml(source.getChildElements().get(0), source);
        for (Result result : results) {
            if (Result.Type.ERROR.equals(result.getType())) {
                evh.addError(result);
            } else if (Result.Type.WARNING.equals(result.getType())) {
                evh.addWarning(result);
View Full Code Here

            //Search for primary field if present
            try {
                String itemName = getPrimaryNodeType().getPrimaryItemName();
                if (itemName != null) {
                    String s = getProperty(itemName).getValue().getString();
                    title = new TextExtractor(new Source(s != null ? s : getName())).toString();
                }
            } catch (RepositoryException e1) {
                title = null;
            }
        }
View Full Code Here

       
        String output = service.externalize(source, serverUrl);
       
        assertTrue("Output is empty", StringUtils.isNotEmpty(output));
       
        Source src = new Source(output);

        // check URLs
        List<StartTag> linkStartTags = src.getAllStartTags(HTMLElementName.A);
        for (StartTag startTag : linkStartTags) {
            String href = startTag.getAttributeValue("href");
            assertTrue("The URL was not rewritten correctly: " + href, href == null || !href.startsWith("/"));
        }
    }
View Full Code Here

        String output = service.externalize(source, serverUrl);
       
        assertTrue("Output is empty", StringUtils.isNotEmpty(output));
       
        Source src = new Source(output);

        // check CSS
        List<StartTag> cssStartTags = src.getAllStartTags(HTMLElementName.LINK);
        for (StartTag startTag : cssStartTags) {
            String rel = startTag.getAttributeValue("rel");
            assertTrue("CSS was not inlined correctly " + startTag.getAttributeValue("href"), rel == null || !"stylesheet".equalsIgnoreCase(rel));
        }
    }
View Full Code Here

       
        String output = service.externalize(source, serverUrl);
       
        assertTrue("Output is empty", StringUtils.isNotEmpty(output));
       
        Source src = new Source(output);

        // check JavaScript
        List<Element> scriptTags = src.getAllElements(HTMLElementName.SCRIPT);
        assertTrue("Not all script tags were removed. " + scriptTags.size() + " tags remain.", scriptTags.isEmpty());
       
    }
View Full Code Here

    public String execute(String previousOut, RenderContext renderContext, Resource resource, RenderChain chain)
            throws Exception {

        long timer = System.currentTimeMillis();
       
        final SourceFormatter sourceFormatter = new SourceFormatter(new Source(previousOut));
        sourceFormatter.setIndentString("  ");

        String out = sourceFormatter.toString();
       
        if (logger.isDebugEnabled()) {
View Full Code Here

            HtmlTagAttributeVisitor... visitors) {
        if (attributesToVisit == null || attributesToVisit.isEmpty() || visitors == null || visitors.length == 0) {
            return htmlContent;
        }

        Source source = new Source(htmlContent);
        OutputDocument document = new OutputDocument(source);

        for (Map.Entry<String, Set<String>> tag : attributesToVisit.entrySet()) {
            List<StartTag> tags = source.getAllStartTags(tag.getKey());
            for (StartTag startTag : tags) {
                final Attributes attributes = startTag.getAttributes();
                for (String attrName : tag.getValue()) {
                    Attribute attribute = attributes.get(attrName);
                    if (attribute != null) {
View Full Code Here

        // http://code.google.com/p/html5security/wiki/RedirectionMethods
        // <meta http-equiv="location" content="URL=http://evil.com" />
        // <meta http-equiv="refresh" content="0;url=http://evil.com/" />
        //
        String content = msg.getResponseBody().toString();
        Source htmlSrc = new Source(content);
        List<Element> metaElements = htmlSrc.getAllElements(HTMLElementName.META);
        for (Element el : metaElements) {

            value = el.getAttributeValue("http-equiv");

            if (value != null) {
                if (value.equalsIgnoreCase("location")) {
                    // Get the content attribute value
                    value = el.getAttributeValue("content");

                    // Check if the payload is inside the location attribute
                    if (checkPayload(value, payload)) {
                        return REDIRECT_LOCATION_META;
                    }

                } else if (value.equalsIgnoreCase("refresh")) {
                    // Get the content attribute value                       
                    value = el.getAttributeValue("content");

                    // If the content attribute isn't set go away
                    if (value != null) {
                            // Usually redirect content is configured with a delay
                        // so extract the url component                           
                        value = getRefreshUrl(value);

                        // Check if the payload is inside the location attribute
                        if (checkPayload(value, payload)) {
                            return REDIRECT_REFRESH_META;
                        }
                    }
                }
            }
        }
       
        // (4) Check if redirection occurs by Base Tag
        // http://code.google.com/p/html5security/wiki/RedirectionMethods
        // <base href="http://evil.com/" />
        //
       
        // (5) Check if redirection occurs by Javascript
        // http://code.google.com/p/html5security/wiki/RedirectionMethods
        // location='http://evil.com/';
        // location.href='http://evil.com/';
        // location.reload('http://evil.com/');
        // location.replace('http://evil.com/');
        // location.assign('http://evil.com/');
        // window.open('http://evil.com/');
        // window.navigate('http://evil.com/');
        //
        if (StringUtils.indexOfIgnoreCase(content, payload) != -1) {
            List<Element> jsElements = htmlSrc.getAllElements(HTMLElementName.SCRIPT);
            String matchingUrl = "(\\Q" + payload + "\\E|\\Qhttp://" + REDIRECT_SITE + "\\E)";
            Pattern pattern;
           
            for (Element el : jsElements) {
                value = el.getContent().toString();
View Full Code Here

  }

  private void revealFields(HttpMessage msg) {
    boolean changed = false;
    String response = msg.getResponseHeader().toString() + msg.getResponseBody().toString();
    Source src = new Source(response);
    OutputDocument outputDocument = new OutputDocument(src);
   
    List<Element> formElements = src.getAllElements(HTMLElementName.FORM);
   
    if (formElements != null && formElements.size() > 0) {
      // Loop through all of the FORM tags
      logger.debug("Found " + formElements.size() + " forms");
     
View Full Code Here

TOP

Related Classes of net.htmlparser.jericho.Source

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.