// $Header: /home/cvs/jakarta-jmeter/src/htmlparser/org/htmlparser/tests/parserHelperTests/StringParserTest.java,v 1.2 2004/02/10 13:41:10 woolfel Exp $
/*
* ====================================================================
* Copyright 2002-2004 The Apache Software Foundation.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
// The developers of JMeter and Apache are greatful to the developers
// of HTMLParser for giving Apache Software Foundation a non-exclusive
// license. The performance benefits of HTMLParser are clear and the
// users of JMeter will benefit from the hard work the HTMLParser
// team. For detailed information about HTMLParser, the project is
// hosted on sourceforge at http://htmlparser.sourceforge.net/.
//
// HTMLParser was originally created by Somik Raha in 2000. Since then
// a healthy community of users has formed and helped refine the
// design so that it is able to tackle the difficult task of parsing
// dirty HTML. Derrick Oswald is the current lead developer and was kind
// enough to assist JMeter.
package org.htmlparser.tests.parserHelperTests;
import org.htmlparser.Parser;
import org.htmlparser.RemarkNode;
import org.htmlparser.StringNode;
import org.htmlparser.scanners.LinkScanner;
import org.htmlparser.tags.LinkTag;
import org.htmlparser.tags.MetaTag;
import org.htmlparser.tests.ParserTestCase;
import org.htmlparser.util.ParserException;
public class StringParserTest extends ParserTestCase
{
public StringParserTest(String name)
{
super(name);
}
/**
* The bug being reproduced is this : <BR>
* <HTML><HEAD><TITLE>Google</TITLE> <BR>
* The above line is incorrectly parsed in that, the text Google is missed.
* The presence of this bug is typically when some tag is identified before the string node is. (usually seen
* with the end tag). The bug lies in NodeReader.readElement().
* Creation date: (6/17/2001 4:01:06 PM)
*/
public void testStringNodeBug1() throws ParserException
{
createParser("<HTML><HEAD><TITLE>Google</TITLE>");
parseAndAssertNodeCount(5);
// The fourth node should be a HTMLStringNode- with the text - Google
assertTrue(
"Fourth node should be a HTMLStringNode",
node[3] instanceof StringNode);
StringNode stringNode = (StringNode) node[3];
assertEquals("Text of the StringNode", "Google", stringNode.getText());
}
/**
* Bug reported by Kaarle Kaila of Nokia<br>
* For the following HTML :
* view these documents, you must have <A href='http://www.adobe.com'>Adobe <br>
* Acrobat Reader</A> installed on your computer.<br>
* The first string before the link is not identified, and the space after the link is also not identified
* Creation date: (8/2/2001 2:07:32 AM)
*/
public void testStringNodeBug2() throws ParserException
{
// Register the link scanner
createParser(
"view these documents, you must have <A href='http://www.adobe.com'>Adobe \n"
+ "Acrobat Reader</A> installed on your computer.");
Parser.setLineSeparator("\r\n");
parser.addScanner(new LinkScanner("-l"));
parseAndAssertNodeCount(3);
// The first node should be a HTMLStringNode- with the text - view these documents, you must have
assertTrue(
"First node should be a HTMLStringNode",
node[0] instanceof StringNode);
StringNode stringNode = (StringNode) node[0];
assertEquals(
"Text of the StringNode",
"view these documents, you must have ",
stringNode.getText());
assertTrue(
"Second node should be a link node",
node[1] instanceof LinkTag);
LinkTag linkNode = (LinkTag) node[1];
assertEquals("Link is", "http://www.adobe.com", linkNode.getLink());
assertEquals(
"Link text is",
"Adobe \r\nAcrobat Reader",
linkNode.getLinkText());
assertTrue(
"Third node should be a string node",
node[2] instanceof StringNode);
StringNode stringNode2 = (StringNode) node[2];
assertEquals(
"Contents of third node",
" installed on your computer.",
stringNode2.getText());
}
/**
* Bug reported by Roger Sollberger<br>
* For the following HTML :
* <a href="http://asgard.ch">[< ASGARD ></a><br>
* The string node is not correctly identified
*/
public void testTagCharsInStringNode() throws ParserException
{
createParser("<a href=\"http://asgard.ch\">[> ASGARD <]</a>");
parser.addScanner(new LinkScanner("-l"));
parseAndAssertNodeCount(1);
assertTrue(
"Node identified must be a link tag",
node[0] instanceof LinkTag);
LinkTag linkTag = (LinkTag) node[0];
assertEquals("[> ASGARD <]", linkTag.getLinkText());
assertEquals("http://asgard.ch", linkTag.getLink());
}
public void testToPlainTextString() throws ParserException
{
createParser("<HTML><HEAD><TITLE>This is the Title</TITLE></HEAD><BODY>Hello World, this is the HTML Parser</BODY></HTML>");
parseAndAssertNodeCount(10);
assertTrue(
"Fourth Node identified must be a string node",
node[3] instanceof StringNode);
StringNode stringNode = (StringNode) node[3];
assertEquals(
"First String Node",
"This is the Title",
stringNode.toPlainTextString());
assertTrue(
"Eighth Node identified must be a string node",
node[7] instanceof StringNode);
stringNode = (StringNode) node[7];
assertEquals(
"Second string node",
"Hello World, this is the HTML Parser",
stringNode.toPlainTextString());
}
public void testToHTML() throws ParserException
{
createParser("<HTML><HEAD><TITLE>This is the Title</TITLE></HEAD><BODY>Hello World, this is the HTML Parser</BODY></HTML>");
parseAndAssertNodeCount(10);
assertTrue(
"Fourth Node identified must be a string node",
node[3] instanceof StringNode);
StringNode stringNode = (StringNode) node[3];
assertEquals(
"First String Node",
"This is the Title",
stringNode.toHtml());
assertTrue(
"Eighth Node identified must be a string node",
node[7] instanceof StringNode);
stringNode = (StringNode) node[7];
assertEquals(
"Second string node",
"Hello World, this is the HTML Parser",
stringNode.toHtml());
}
public void testEmptyLines() throws ParserException
{
createParser(
"David Nirenberg (Center for Advanced Study in the Behavorial Sciences, Stanford).<br>\n"
+ " \n"
+ "<br>");
parseAndAssertNodeCount(4);
assertTrue(
"Third Node identified must be a string node",
node[2] instanceof StringNode);
}
/**
* This is a bug reported by John Zook (586222), where the first few chars
* before a remark is being missed, if its on the same line.
*/
public void testStringBeingMissedBug() throws ParserException
{
createParser("Before Comment <!-- Comment --> After Comment");
parseAndAssertNodeCount(3);
assertTrue(
"First node should be HTMLStringNode",
node[0] instanceof StringNode);
assertTrue(
"Second node should be HTMLRemarkNode",
node[1] instanceof RemarkNode);
assertTrue(
"Third node should be HTMLStringNode",
node[2] instanceof StringNode);
StringNode stringNode = (StringNode) node[0];
assertEquals(
"First String node contents",
"Before Comment ",
stringNode.getText());
StringNode stringNode2 = (StringNode) node[2];
assertEquals(
"Second String node contents",
" After Comment",
stringNode2.getText());
RemarkNode remarkNode = (RemarkNode) node[1];
assertEquals("Remark Node contents", " Comment ", remarkNode.getText());
}
/**
* Based on a bug report submitted by Cedric Rosa, if the last line contains a single character,
* StringNode does not return the string node correctly.
*/
public void testLastLineWithOneChar() throws ParserException
{
createParser("a");
parseAndAssertNodeCount(1);
assertTrue(
"First node should be HTMLStringNode",
node[0] instanceof StringNode);
StringNode stringNode = (StringNode) node[0];
assertEquals("First String node contents", "a", stringNode.getText());
}
public void testStringWithEmptyLine() throws ParserException
{
createParser("a\n\nb");
parseAndAssertNodeCount(1);
assertTrue(
"First node should be HTMLStringNode",
node[0] instanceof StringNode);
StringNode stringNode = (StringNode) node[0];
assertStringEquals(
"First String node contents",
"a\r\n\r\nb",
stringNode.getText());
}
/**
* An attempt to reproduce bug 677176, which passes.
* @throws Exception
*/
public void testStringParserBug() throws Exception
{
createParser(
"<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 "
+ "Transitional//EN\">"
+ "<html>"
+ "<head>"
+ "<title>Untitled Document</title>"
+ "<meta http-equiv=\"Content-Type\" content=\"text/html; "
+ "charset=iso-8859-1\">"
+ "</head>"
+ "<script language=\"JavaScript\" type=\"text/JavaScript\">"
+ "// if this fails, output a 'hello' "
+ "if (true) "
+ "{ "
+ "//something good... "
+ "} "
+ "</script>"
+ "<body>"
+ "</body>"
+ "</html>");
parser.registerScanners();
parseAndAssertNodeCount(10);
assertType("fourth node", MetaTag.class, node[4]);
MetaTag metaTag = (MetaTag) node[4];
assertStringEquals(
"content",
"text/html; charset=iso-8859-1",
metaTag.getAttribute("CONTENT"));
}
public void testStringWithLineBreaks() throws Exception
{
createParser("Testing &\nRefactoring");
parseAndAssertNodeCount(1);
assertType("first node", StringNode.class, node[0]);
StringNode stringNode = (StringNode) node[0];
assertStringEquals(
"text",
"Testing &\r\nRefactoring",
stringNode.toPlainTextString());
}
}