Package org.apache.jmeter.protocol.http.parser

Source Code of org.apache.jmeter.protocol.http.parser.LagartoBasedHtmlParser$JMeterTagVisitor

/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements.  See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License.  You may obtain a copy of the License at
*
*   http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/

package org.apache.jmeter.protocol.http.parser;

import java.net.MalformedURLException;
import java.net.URL;
import java.util.Collections;
import java.util.Iterator;
import java.util.Stack;

import jodd.lagarto.EmptyTagVisitor;
import jodd.lagarto.LagartoException;
import jodd.lagarto.LagartoParser;
import jodd.lagarto.LagartoParserConfig;
import jodd.lagarto.Tag;
import jodd.lagarto.TagType;
import jodd.lagarto.TagUtil;
import jodd.lagarto.dom.HtmlCCommentExpressionMatcher;
import jodd.log.LoggerFactory;
import jodd.log.impl.Slf4jLoggerFactory;

import org.apache.commons.lang3.StringUtils;
import org.apache.jmeter.protocol.http.util.ConversionUtils;
import org.apache.jorphan.logging.LoggingManager;
import org.apache.log.Logger;

/**
* Parser based on Lagarto
* @since 2.10
*/
public class LagartoBasedHtmlParser extends HTMLParser {
    private static final Logger log = LoggingManager.getLoggerForClass();
    static {
        LoggerFactory.setLoggerFactory(new Slf4jLoggerFactory());
    }

    /*
     * A dummy class to pass the pointer of URL.
     */
    private static class URLPointer {
        private URLPointer(URL newUrl) {
            url = newUrl;
        }
        private URL url;
    }
   
    private static final class JMeterTagVisitor extends EmptyTagVisitor {
        private HtmlCCommentExpressionMatcher htmlCCommentExpressionMatcher;
        private URLCollection urls;
        private URLPointer baseUrl;
        private Float ieVersion;
        private Stack<Boolean> enabled = new Stack<Boolean>();

        /**
         * @param baseUrl
         * @param urls
         * @param ieVersion
         */
        public JMeterTagVisitor(final URLPointer baseUrl, URLCollection urls, Float ieVersion) {
            this.urls = urls;
            this.baseUrl = baseUrl;
            this.ieVersion = ieVersion;
        }

        private final void extractAttribute(Tag tag, String attributeName) {
            CharSequence url = tag.getAttributeValue(attributeName);
            if (!StringUtils.isEmpty(url)) {
                urls.addURL(url.toString(), baseUrl.url);
            }
        }
        /*
         * (non-Javadoc)
         *
         * @see jodd.lagarto.EmptyTagVisitor#script(jodd.lagarto.Tag,
         * java.lang.CharSequence)
         */
        @Override
        public void script(Tag tag, CharSequence body) {
            if (!enabled.peek()) {
                return;
            }
            extractAttribute(tag, ATT_SRC);
        }

        /*
         * (non-Javadoc)
         *
         * @see jodd.lagarto.EmptyTagVisitor#tag(jodd.lagarto.Tag)
         */
        @Override
        public void tag(Tag tag) {
            if (!enabled.peek()) {
                return;
            }
            TagType tagType = tag.getType();
            switch (tagType) {
            case START:
            case SELF_CLOSING:
                if (tag.nameEquals(TAG_BODY)) {
                    extractAttribute(tag, ATT_BACKGROUND);
                } else if (tag.nameEquals(TAG_BASE)) {
                    CharSequence baseref = tag.getAttributeValue(ATT_HREF);
                    try {
                        if (!StringUtils.isEmpty(baseref))// Bugzilla 30713
                        {
                            baseUrl.url = ConversionUtils.makeRelativeURL(baseUrl.url, baseref.toString());
                        }
                    } catch (MalformedURLException e1) {
                        throw new RuntimeException(e1);
                    }
                } else if (tag.nameEquals(TAG_IMAGE)) {
                    extractAttribute(tag, ATT_SRC);
                } else if (tag.nameEquals(TAG_APPLET)) {
                    extractAttribute(tag, ATT_CODE);
                } else if (tag.nameEquals(TAG_OBJECT)) {
                    extractAttribute(tag, ATT_CODEBASE);               
                    extractAttribute(tag, ATT_DATA);                
                } else if (tag.nameEquals(TAG_INPUT)) {
                    // we check the input tag type for image
                    CharSequence type = tag.getAttributeValue(ATT_TYPE);
                    if (type != null && TagUtil.equalsIgnoreCase(ATT_IS_IMAGE, type)) {
                        // then we need to download the binary
                        extractAttribute(tag, ATT_SRC);
                    }
                } else if (tag.nameEquals(TAG_SCRIPT)) {
                    extractAttribute(tag, ATT_SRC);
                    // Bug 51750
                } else if (tag.nameEquals(TAG_FRAME) || tag.nameEquals(TAG_IFRAME)) {
                    extractAttribute(tag, ATT_SRC);
                } else if (tag.nameEquals(TAG_EMBED)) {
                    extractAttribute(tag, ATT_SRC);
                } else if (tag.nameEquals(TAG_BGSOUND)){
                    extractAttribute(tag, ATT_SRC);
                } else if (tag.nameEquals(TAG_LINK)) {
                    CharSequence relAttribute = tag.getAttributeValue(ATT_REL);
                    // Putting the string first means it works even if the attribute is null
                    if (relAttribute != null && TagUtil.equalsIgnoreCase(STYLESHEET,relAttribute)) {
                        extractAttribute(tag, ATT_HREF);
                    }
                } else {
                    extractAttribute(tag, ATT_BACKGROUND);
                }
   
   
                // Now look for URLs in the STYLE attribute
                CharSequence styleTagStr = tag.getAttributeValue(ATT_STYLE);
                if(!StringUtils.isEmpty(styleTagStr)) {
                    HtmlParsingUtils.extractStyleURLs(baseUrl.url, urls, styleTagStr.toString());
                }
                break;
            case END:
                break;
            }
        }

        /* (non-Javadoc)
         * @see jodd.lagarto.EmptyTagVisitor#condComment(java.lang.CharSequence, boolean, boolean, boolean)
         */
        @Override
        public void condComment(CharSequence expression, boolean isStartingTag,
                boolean isHidden, boolean isHiddenEndTag) {
            // See http://css-tricks.com/how-to-create-an-ie-only-stylesheet/
            if(!isStartingTag) {
                enabled.pop();
            } else {
                if (htmlCCommentExpressionMatcher == null) {
                    htmlCCommentExpressionMatcher = new HtmlCCommentExpressionMatcher();
                }
                String expressionString = expression.toString().trim();
                enabled.push(Boolean.valueOf(htmlCCommentExpressionMatcher.match(ieVersion.floatValue(),
                        expressionString)));               
            }
        }

        /* (non-Javadoc)
         * @see jodd.lagarto.EmptyTagVisitor#start()
         */
        @Override
        public void start() {
            super.start();
            enabled.clear();
            enabled.push(Boolean.TRUE);
        }
    }

    @Override
    public Iterator<URL> getEmbeddedResourceURLs(String userAgent, byte[] html, URL baseUrl,
            URLCollection coll, String encoding) throws HTMLParseException {
        try {
            Float ieVersion = extractIEVersion(userAgent);
           
            String contents = new String(html,encoding);
            // As per Jodd javadocs, emitStrings should be false for visitor for better performances
            LagartoParser lagartoParser = new LagartoParser(contents, false);
            LagartoParserConfig<LagartoParserConfig<?>> config = new LagartoParserConfig<LagartoParserConfig<?>>();
            config.setCaseSensitive(false);
            // Conditional comments only apply for IE < 10
            config.setEnableConditionalComments(isEnableConditionalComments(ieVersion));
           
            lagartoParser.setConfig(config);
            JMeterTagVisitor tagVisitor = new JMeterTagVisitor(new URLPointer(baseUrl), coll, ieVersion);
            lagartoParser.parse(tagVisitor);
            return coll.iterator();
        } catch (LagartoException e) {
            // TODO is it the best way ? https://issues.apache.org/bugzilla/show_bug.cgi?id=55634
            if(log.isDebugEnabled()) {
                log.debug("Error extracting embedded resource URLs from:'"+baseUrl+"', probably not text content, message:"+e.getMessage());
            }
            return Collections.<URL>emptyList().iterator();
        } catch (Exception e) {
            throw new HTMLParseException(e);
        }
    }

   



    /* (non-Javadoc)
     * @see org.apache.jmeter.protocol.http.parser.HTMLParser#isReusable()
     */
    @Override
    protected boolean isReusable() {
        return true;
    }
}
TOP

Related Classes of org.apache.jmeter.protocol.http.parser.LagartoBasedHtmlParser$JMeterTagVisitor

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.