/*
* Copyright 2003-2004 The Apache Software Foundation.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package org.apache.jmeter.protocol.http.parser;
import java.io.ByteArrayInputStream;
import java.io.UnsupportedEncodingException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.LinkedList;
import java.util.List;
import org.apache.jmeter.config.Argument;
import org.apache.jmeter.junit.JMeterTestCase;
import org.apache.jmeter.protocol.http.sampler.HTTPSampler;
import org.apache.jmeter.protocol.http.sampler.HTTPSamplerBase;
import org.apache.jmeter.testelement.property.PropertyIterator;
import org.apache.jorphan.logging.LoggingManager;
import org.apache.jorphan.util.JOrphanUtils;
import org.apache.log.Logger;
import org.apache.oro.text.PatternCacheLRU;
import org.apache.oro.text.regex.MalformedPatternException;
import org.apache.oro.text.regex.Perl5Compiler;
import org.apache.oro.text.regex.Perl5Matcher;
import org.w3c.dom.Document;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.w3c.tidy.Tidy;
import org.xml.sax.SAXException;
/**
* @author Michael Stover Created June 14, 2001
*/
public final class HtmlParsingUtils {
transient private static Logger log = LoggingManager.getLoggerForClass();
/*
* NOTUSED private int compilerOptions = Perl5Compiler.CASE_INSENSITIVE_MASK |
* Perl5Compiler.MULTILINE_MASK | Perl5Compiler.READ_ONLY_MASK;
*/
private static PatternCacheLRU patternCache = new PatternCacheLRU(1000, new Perl5Compiler());
private static ThreadLocal localMatcher = new ThreadLocal() {
protected Object initialValue() {
return new Perl5Matcher();
}
};
/**
* Private constructor to prevent instantiation.
*/
private HtmlParsingUtils() {
}
public static synchronized boolean isAnchorMatched(HTTPSamplerBase newLink, HTTPSamplerBase config)
throws MalformedPatternException {
boolean ok = true;
Perl5Matcher matcher = (Perl5Matcher) localMatcher.get();
PropertyIterator iter = config.getArguments().iterator();
String query = null;
try {
query = JOrphanUtils.decode(newLink.getQueryString(), "UTF-8");
} catch (UnsupportedEncodingException e) {
// UTF-8 unsupported? You must be joking!
log.error("UTF-8 encoding not supported!");
throw new Error("Should not happen: " + e.toString());
}
if (query == null && config.getArguments().getArgumentCount() > 0) {
return false;
}
while (iter.hasNext()) {
Argument item = (Argument) iter.next().getObjectValue();
if (query.indexOf(item.getName() + "=") == -1) {
if (!(ok = ok
&& matcher.contains(query, patternCache
.getPattern(item.getName(), Perl5Compiler.READ_ONLY_MASK)))) {
return false;
}
}
}
if (config.getDomain() != null && config.getDomain().length() > 0
&& !newLink.getDomain().equals(config.getDomain())) {
if (!(ok = ok
&& matcher.matches(newLink.getDomain(), patternCache.getPattern(config.getDomain(),
Perl5Compiler.READ_ONLY_MASK)))) {
return false;
}
}
if (!newLink.getPath().equals(config.getPath())
&& !matcher.matches(newLink.getPath(), patternCache.getPattern("[/]*" + config.getPath(),
Perl5Compiler.READ_ONLY_MASK))) {
return false;
}
if (!(ok = ok
&& matcher.matches(newLink.getProtocol(), patternCache.getPattern(config.getProtocol(),
Perl5Compiler.READ_ONLY_MASK)))) {
return false;
}
return ok;
}
public static synchronized boolean isArgumentMatched(Argument arg, Argument patternArg)
throws MalformedPatternException {
Perl5Matcher matcher = (Perl5Matcher) localMatcher.get();
return (arg.getName().equals(patternArg.getName()) || matcher.matches(arg.getName(), patternCache.getPattern(
patternArg.getName(), Perl5Compiler.READ_ONLY_MASK)))
&& (arg.getValue().equals(patternArg.getValue()) || matcher.matches(arg.getValue(), patternCache
.getPattern(patternArg.getValue(), Perl5Compiler.READ_ONLY_MASK)));
}
/**
* Returns <code>tidy</code> as HTML parser.
*
* @return a <code>tidy</code> HTML parser
*/
public static Tidy getParser() {
log.debug("Start : getParser1");
Tidy tidy = new Tidy();
tidy.setCharEncoding(org.w3c.tidy.Configuration.UTF8);
tidy.setQuiet(true);
tidy.setShowWarnings(false);
if (log.isDebugEnabled()) {
log.debug("getParser1 : tidy parser created - " + tidy);
}
log.debug("End : getParser1");
return tidy;
}
/**
* Returns a node representing a whole xml given an xml document.
*
* @param text
* an xml document
* @return a node representing a whole xml
*/
public static Node getDOM(String text) throws SAXException {
log.debug("Start : getDOM1");
try {
Node node = getParser().parseDOM(new ByteArrayInputStream(text.getBytes("UTF-8")), null);
if (log.isDebugEnabled()) {
log.debug("node : " + node);
}
log.debug("End : getDOM1");
return node;
} catch (UnsupportedEncodingException e) {
log.error("getDOM1 : Unsupported encoding exception - " + e);
log.debug("End : getDOM1");
throw new RuntimeException("UTF-8 encoding failed");
}
}
public static Document createEmptyDoc() {
return Tidy.createEmptyDocument();
}
/**
* Create a new Sampler based on an HREF string plus a contextual URL
* object. Given that an HREF string might be of three possible forms, some
* processing is required.
*/
public static HTTPSamplerBase createUrlFromAnchor(String parsedUrlString, URL context) throws MalformedURLException {
if (log.isDebugEnabled()) {
log.debug("Creating URL from Anchor: " + parsedUrlString + ", base: " + context);
}
URL url = new URL(context, parsedUrlString);
HTTPSamplerBase sampler = new HTTPSampler();// TODO create appropriate
// sampler
sampler.setDomain(url.getHost());
sampler.setProtocol(url.getProtocol());
sampler.setPort(url.getPort());
sampler.setPath(url.getPath());
sampler.parseArguments(url.getQuery());
return sampler;
}
public static List createURLFromForm(Node doc, URL context) {
String selectName = null;
LinkedList urlConfigs = new LinkedList();
recurseForm(doc, urlConfigs, context, selectName, false);
/*
* NamedNodeMap atts = formNode.getAttributes();
* if(atts.getNamedItem("action") == null) { throw new
* MalformedURLException(); } String action =
* atts.getNamedItem("action").getNodeValue(); UrlConfig url =
* createUrlFromAnchor(action, context); recurseForm(doc, url,
* selectName,true,formStart);
*/
return urlConfigs;
}
private static boolean recurseForm(Node tempNode, LinkedList urlConfigs, URL context, String selectName,
boolean inForm) {
NamedNodeMap nodeAtts = tempNode.getAttributes();
String tag = tempNode.getNodeName();
try {
if (inForm) {
HTTPSamplerBase url = (HTTPSamplerBase) urlConfigs.getLast();
if (tag.equalsIgnoreCase("form")) {
try {
urlConfigs.add(createFormUrlConfig(tempNode, context));
} catch (MalformedURLException e) {
inForm = false;
}
} else if (tag.equalsIgnoreCase("input")) {
url.addArgument(getAttributeValue(nodeAtts, "name"), getAttributeValue(nodeAtts, "value"));
} else if (tag.equalsIgnoreCase("textarea")) {
try {
url.addArgument(getAttributeValue(nodeAtts, "name"), tempNode.getFirstChild().getNodeValue());
} catch (NullPointerException e) {
url.addArgument(getAttributeValue(nodeAtts, "name"), "");
}
} else if (tag.equalsIgnoreCase("select")) {
selectName = getAttributeValue(nodeAtts, "name");
} else if (tag.equalsIgnoreCase("option")) {
String value = getAttributeValue(nodeAtts, "value");
if (value == null) {
try {
value = tempNode.getFirstChild().getNodeValue();
} catch (NullPointerException e) {
value = "";
}
}
url.addArgument(selectName, value);
}
} else if (tag.equalsIgnoreCase("form")) {
try {
urlConfigs.add(createFormUrlConfig(tempNode, context));
inForm = true;
} catch (MalformedURLException e) {
inForm = false;
}
// I can't see the point for this code being here. Looks like
// a really obscure performance optimization feature :-)
// Seriously: I'll comment it out... I just don't dare to
// remove it completely, in case there *is* a reason.
/*
* try { Thread.sleep(5000); } catch (Exception e) { }
*/
}
} catch (Exception ex) {
log.warn("Some bad HTML " + printNode(tempNode), ex);
}
NodeList childNodes = tempNode.getChildNodes();
for (int x = 0; x < childNodes.getLength(); x++) {
inForm = recurseForm(childNodes.item(x), urlConfigs, context, selectName, inForm);
}
return inForm;
}
private static String getAttributeValue(NamedNodeMap att, String attName) {
try {
return att.getNamedItem(attName).getNodeValue();
} catch (Exception ex) {
return "";
}
}
private static String printNode(Node node) {
StringBuffer buf = new StringBuffer();
buf.append("<");
buf.append(node.getNodeName());
NamedNodeMap atts = node.getAttributes();
for (int x = 0; x < atts.getLength(); x++) {
buf.append(" ");
buf.append(atts.item(x).getNodeName());
buf.append("=\"");
buf.append(atts.item(x).getNodeValue());
buf.append("\"");
}
buf.append(">");
return buf.toString();
}
private static HTTPSamplerBase createFormUrlConfig(Node tempNode, URL context) throws MalformedURLException {
NamedNodeMap atts = tempNode.getAttributes();
if (atts.getNamedItem("action") == null) {
throw new MalformedURLException();
}
String action = atts.getNamedItem("action").getNodeValue();
HTTPSamplerBase url = createUrlFromAnchor(action, context);
return url;
}
// /////////////////// Start of Test Code /////////////////
// TODO: need more tests
public static class Test extends JMeterTestCase {
public Test(String name) {
super(name);
}
protected void setUp() {
}
public void testGetParser() throws Exception {
getParser();
}
public void testGetDom() throws Exception {
getDOM("<HTML></HTML>");
getDOM("");
}
public void testIsArgumentMatched() throws Exception {
Argument arg = new Argument();
Argument argp = new Argument();
assertTrue(isArgumentMatched(arg, argp));
arg = new Argument("test", "abcd");
argp = new Argument("test", "a.*d");
assertTrue(isArgumentMatched(arg, argp));
arg = new Argument("test", "abcd");
argp = new Argument("test", "a.*e");
assertFalse(isArgumentMatched(arg, argp));
}
}
}