Package org.apache.jmeter.protocol.http.parser

Source Code of org.apache.jmeter.protocol.http.parser.HtmlParsingUtils

/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements.  See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License.  You may obtain a copy of the License at
*
*   http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/

package org.apache.jmeter.protocol.http.parser;

import java.io.ByteArrayInputStream;
import java.io.UnsupportedEncodingException;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLDecoder;
import java.util.LinkedList;
import java.util.List;

import org.apache.jmeter.config.Argument;
import org.apache.jmeter.config.Arguments;
import org.apache.jmeter.protocol.http.sampler.HTTPSamplerBase;
import org.apache.jmeter.protocol.http.sampler.HTTPSamplerFactory;
import org.apache.jmeter.testelement.property.PropertyIterator;
import org.apache.jmeter.util.JMeterUtils;
import org.apache.jorphan.logging.LoggingManager;
import org.apache.log.Logger;
import org.apache.oro.text.PatternCacheLRU;
import org.apache.oro.text.regex.MatchResult;
import org.apache.oro.text.regex.Pattern;
import org.apache.oro.text.regex.PatternMatcherInput;
import org.apache.oro.text.regex.Perl5Compiler;
import org.apache.oro.text.regex.Perl5Matcher;
import org.w3c.dom.Document;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.w3c.tidy.Tidy;

// For Junit tests @see TestHtmlParsingUtils

/**
* @author Michael Stover Created June 14, 2001
*/
public final class HtmlParsingUtils {
  private static final Logger log = LoggingManager.getLoggerForClass();

  /**
   * Private constructor to prevent instantiation.
   */
  private HtmlParsingUtils() {
  }

  /**
   * Check if anchor matches by checking against:
   * - protocol
   * - domain
   * - path
   * - parameter names
   *
   * @param newLink target to match
   * @param config pattern to match against
   *
   * @return true if target URL matches pattern URL
   */
  public static boolean isAnchorMatched(HTTPSamplerBase newLink, HTTPSamplerBase config)
  {
    String query = null;
    try {
      query = URLDecoder.decode(newLink.getQueryString(), "UTF-8"); // $NON-NLS-1$
    } catch (UnsupportedEncodingException e) {
      // UTF-8 unsupported? You must be joking!
      log.error("UTF-8 encoding not supported!");
      throw new Error("Should not happen: " + e.toString());
    }

    final Arguments arguments = config.getArguments();
    if (query == null && arguments.getArgumentCount() > 0) {
      return false;// failed to convert query, so assume no match
    }

    final Perl5Matcher matcher = JMeterUtils.getMatcher();
    final PatternCacheLRU patternCache = JMeterUtils.getPatternCache();

    if (!isEqualOrMatches(newLink.getProtocol(), config.getProtocol(), matcher, patternCache)){
      return false;
    }

    final String domain = config.getDomain();
    if (domain != null && domain.length() > 0) {
      if (!isEqualOrMatches(newLink.getDomain(), domain, matcher, patternCache)){
        return false;
      }
    }

    final String path = config.getPath();
    if (!newLink.getPath().equals(path)
        && !matcher.matches(newLink.getPath(), patternCache.getPattern("[/]*" + path, // $NON-NLS-1$
            Perl5Compiler.READ_ONLY_MASK))) {
      return false;
    }

    PropertyIterator iter = arguments.iterator();
    while (iter.hasNext()) {
      Argument item = (Argument) iter.next().getObjectValue();
      final String name = item.getName();
      if (query.indexOf(name + "=") == -1) { // $NON-NLS-1$
        if (!(matcher.contains(query, patternCache.getPattern(name, Perl5Compiler.READ_ONLY_MASK)))) {
          return false;
        }
      }
    }

    return true;
  }

  /**
   * Arguments match if the input name matches the corresponding pattern name
   * and the input value matches the pattern value, where the matching is done
   * first using String equals, and then Regular Expression matching if the equals test fails.
   *
   * @param arg - input Argument
   * @param patternArg - pattern to match against
   * @return true if both name and value match
   */
  public static boolean isArgumentMatched(Argument arg, Argument patternArg) {
    final Perl5Matcher matcher = JMeterUtils.getMatcher();
    final PatternCacheLRU patternCache = JMeterUtils.getPatternCache();
    return
        isEqualOrMatches(arg.getName(), patternArg.getName(), matcher, patternCache)
    &&
        isEqualOrMatches(arg.getValue(), patternArg.getValue(), matcher, patternCache);
  }

  /**
   * Match the input argument against the pattern using String.equals() or pattern matching if that fails.
   *
   * @param arg input string
   * @param pat pattern string
   * @param matcher Perl5Matcher
   * @param cache PatternCache
   *
   * @return true if input matches the pattern
   */
  public static boolean isEqualOrMatches(String arg, String pat, Perl5Matcher matcher, PatternCacheLRU cache){
    return
        arg.equals(pat)
        ||
        matcher.matches(arg,cache.getPattern(pat,Perl5Compiler.READ_ONLY_MASK));
  }

  /**
   * Match the input argument against the pattern using String.equals() or pattern matching if that fails
   * using case-insenssitive matching.
   *
   * @param arg input string
   * @param pat pattern string
   * @param matcher Perl5Matcher
   * @param cache PatternCache
   *
   * @return true if input matches the pattern
   */
  public static boolean isEqualOrMatchesCaseBlind(String arg, String pat, Perl5Matcher matcher, PatternCacheLRU cache){
    return
        arg.equalsIgnoreCase(pat)
        ||
        matcher.matches(arg,cache.getPattern(pat,Perl5Compiler.READ_ONLY_MASK | Perl5Compiler.CASE_INSENSITIVE_MASK));
  }

  /**
   * Match the input argument against the pattern using String.equals() or pattern matching if that fails
   * using case-insensitive matching.
   *
   * @param arg input string
   * @param pat pattern string
   *
   * @return true if input matches the pattern
   */
  public static boolean isEqualOrMatches(String arg, String pat){
    return isEqualOrMatches(arg, pat, JMeterUtils.getMatcher(), JMeterUtils.getPatternCache());
  }

  /**
   * Match the input argument against the pattern using String.equals() or pattern matching if that fails
   * using case-insensitive matching.
   *
   * @param arg input string
   * @param pat pattern string
   *
   * @return true if input matches the pattern
   */
  public static boolean isEqualOrMatchesCaseBlind(String arg, String pat){
    return isEqualOrMatchesCaseBlind(arg, pat, JMeterUtils.getMatcher(), JMeterUtils.getPatternCache());
  }

  /**
   * Returns <code>tidy</code> as HTML parser.
   *
   * @return a <code>tidy</code> HTML parser
   */
  public static Tidy getParser() {
    log.debug("Start : getParser1");
    Tidy tidy = new Tidy();
    tidy.setCharEncoding(org.w3c.tidy.Configuration.UTF8);
    tidy.setQuiet(true);
    tidy.setShowWarnings(false);

    if (log.isDebugEnabled()) {
      log.debug("getParser1 : tidy parser created - " + tidy);
    }

    log.debug("End : getParser1");

    return tidy;
  }

  /**
   * Returns a node representing a whole xml given an xml document.
   *
   * @param text
   *            an xml document
   * @return a node representing a whole xml
   */
  public static Node getDOM(String text) {
    log.debug("Start : getDOM1");

    try {
      Node node = getParser().parseDOM(new ByteArrayInputStream(text.getBytes("UTF-8")), null);// $NON-NLS-1$

      if (log.isDebugEnabled()) {
        log.debug("node : " + node);
      }

      log.debug("End : getDOM1");

      return node;
    } catch (UnsupportedEncodingException e) {
      log.error("getDOM1 : Unsupported encoding exception - " + e);
      log.debug("End : getDOM1");
      throw new RuntimeException("UTF-8 encoding failed");
    }
  }

  public static Document createEmptyDoc() {
    return Tidy.createEmptyDocument();
  }

  /**
   * Create a new Sampler based on an HREF string plus a contextual URL
   * object. Given that an HREF string might be of three possible forms, some
   * processing is required.
   */
  public static HTTPSamplerBase createUrlFromAnchor(String parsedUrlString, URL context) throws MalformedURLException {
    if (log.isDebugEnabled()) {
      log.debug("Creating URL from Anchor: " + parsedUrlString + ", base: " + context);
    }
    URL url = new URL(context, parsedUrlString);
    HTTPSamplerBase sampler =HTTPSamplerFactory.newInstance();
    sampler.setDomain(url.getHost());
    sampler.setProtocol(url.getProtocol());
    sampler.setPort(url.getPort());
    sampler.setPath(url.getPath());
    sampler.parseArguments(url.getQuery());

    return sampler;
  }

  public static List createURLFromForm(Node doc, URL context) {
    String selectName = null;
    LinkedList urlConfigs = new LinkedList();
    recurseForm(doc, urlConfigs, context, selectName, false);
    /*
     * NamedNodeMap atts = formNode.getAttributes();
     * if(atts.getNamedItem("action") == null) { throw new
     * MalformedURLException(); } String action =
     * atts.getNamedItem("action").getNodeValue(); UrlConfig url =
     * createUrlFromAnchor(action, context); recurseForm(doc, url,
     * selectName,true,formStart);
     */
    return urlConfigs;
  }

    // N.B. Since the tags are extracted from an HTML Form, any values must already have been encoded
  private static boolean recurseForm(Node tempNode, LinkedList urlConfigs, URL context, String selectName,
      boolean inForm) {
    NamedNodeMap nodeAtts = tempNode.getAttributes();
    String tag = tempNode.getNodeName();
    try {
      if (inForm) {
        HTTPSamplerBase url = (HTTPSamplerBase) urlConfigs.getLast();
        if (tag.equalsIgnoreCase("form")) { // $NON-NLS-1$
          try {
            urlConfigs.add(createFormUrlConfig(tempNode, context));
          } catch (MalformedURLException e) {
            inForm = false;
          }
        } else if (tag.equalsIgnoreCase("input")) { // $NON-NLS-1$
          url.addEncodedArgument(getAttributeValue(nodeAtts, "name")// $NON-NLS-1$
              getAttributeValue(nodeAtts, "value")); // $NON-NLS-1$
        } else if (tag.equalsIgnoreCase("textarea")) { // $NON-NLS-1$
          try {
            url.addEncodedArgument(getAttributeValue(nodeAtts, "name")// $NON-NLS-1$
                tempNode.getFirstChild().getNodeValue());
          } catch (NullPointerException e) {
            url.addArgument(getAttributeValue(nodeAtts, "name"), ""); // $NON-NLS-1$
          }
        } else if (tag.equalsIgnoreCase("select")) { // $NON-NLS-1$
          selectName = getAttributeValue(nodeAtts, "name"); // $NON-NLS-1$
        } else if (tag.equalsIgnoreCase("option")) { // $NON-NLS-1$
          String value = getAttributeValue(nodeAtts, "value"); // $NON-NLS-1$
          if (value == null) {
            try {
              value = tempNode.getFirstChild().getNodeValue();
            } catch (NullPointerException e) {
              value = ""; // $NON-NLS-1$
            }
          }
          url.addEncodedArgument(selectName, value);
        }
      } else if (tag.equalsIgnoreCase("form")) { // $NON-NLS-1$
        try {
          urlConfigs.add(createFormUrlConfig(tempNode, context));
          inForm = true;
        } catch (MalformedURLException e) {
          inForm = false;
        }
      }
    } catch (Exception ex) {
      log.warn("Some bad HTML " + printNode(tempNode), ex);
    }
    NodeList childNodes = tempNode.getChildNodes();
    for (int x = 0; x < childNodes.getLength(); x++) {
      inForm = recurseForm(childNodes.item(x), urlConfigs, context, selectName, inForm);
    }
    return inForm;
  }

  private static String getAttributeValue(NamedNodeMap att, String attName) {
    try {
      return att.getNamedItem(attName).getNodeValue();
    } catch (Exception ex) {
      return ""; // $NON-NLS-1$
    }
  }

  private static String printNode(Node node) {
    StringBuffer buf = new StringBuffer();
    buf.append("<"); // $NON-NLS-1$
    buf.append(node.getNodeName());
    NamedNodeMap atts = node.getAttributes();
    for (int x = 0; x < atts.getLength(); x++) {
      buf.append(" "); // $NON-NLS-1$
      buf.append(atts.item(x).getNodeName());
      buf.append("=\""); // $NON-NLS-1$
      buf.append(atts.item(x).getNodeValue());
      buf.append("\""); // $NON-NLS-1$
    }

    buf.append(">"); // $NON-NLS-1$

    return buf.toString();
  }

  private static HTTPSamplerBase createFormUrlConfig(Node tempNode, URL context) throws MalformedURLException {
    NamedNodeMap atts = tempNode.getAttributes();
    if (atts.getNamedItem("action") == null) { // $NON-NLS-1$
      throw new MalformedURLException();
    }
    String action = atts.getNamedItem("action").getNodeValue(); // $NON-NLS-1$
    HTTPSamplerBase url = createUrlFromAnchor(action, context);
    return url;
  }
 
  public static void extractStyleURLs(final URL baseUrl, final URLCollection urls, String styleTagStr) {
    Perl5Matcher matcher = JMeterUtils.getMatcher();
    Pattern pattern = JMeterUtils.getPatternCache().getPattern(
        "URL\\(\\s*('|\")(.*)('|\")\\s*\\)", // $NON-NLS-1$
        Perl5Compiler.CASE_INSENSITIVE_MASK | Perl5Compiler.SINGLELINE_MASK | Perl5Compiler.READ_ONLY_MASK);
    PatternMatcherInput input = null;
    input = new PatternMatcherInput(styleTagStr);
    while (matcher.contains(input, pattern)) {
        MatchResult match = matcher.getMatch();
        // The value is in the second group
        String styleUrl = match.group(2);
        urls.addURL(styleUrl, baseUrl);
    }
  }
}
TOP

Related Classes of org.apache.jmeter.protocol.http.parser.HtmlParsingUtils

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.