// $Header: /home/cvs/jakarta-jmeter/src/htmlparser/org/htmlparser/tests/utilTests/HTMLLinkProcessorTest.java,v 1.2 2004/02/11 02:16:57 woolfel Exp $
/*
* ====================================================================
* Copyright 2002-2004 The Apache Software Foundation.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
// The developers of JMeter and Apache are greatful to the developers
// of HTMLParser for giving Apache Software Foundation a non-exclusive
// license. The performance benefits of HTMLParser are clear and the
// users of JMeter will benefit from the hard work the HTMLParser
// team. For detailed information about HTMLParser, the project is
// hosted on sourceforge at http://htmlparser.sourceforge.net/.
//
// HTMLParser was originally created by Somik Raha in 2000. Since then
// a healthy community of users has formed and helped refine the
// design so that it is able to tackle the difficult task of parsing
// dirty HTML. Derrick Oswald is the current lead developer and was kind
// enough to assist JMeter.
package org.htmlparser.tests.utilTests;
import org.htmlparser.tags.LinkTag;
import org.htmlparser.tests.ParserTestCase;
import org.htmlparser.util.LinkProcessor;
import org.htmlparser.util.ParserException;
public class HTMLLinkProcessorTest extends ParserTestCase
{
private LinkProcessor lp;
public HTMLLinkProcessorTest(String name)
{
super(name);
}
protected void setUp()
{
lp = new LinkProcessor();
}
public void testIsURL()
{
String resourceLoc1 = "http://someurl.com";
String resourceLoc2 = "myfilehttp.dat";
assertTrue(
resourceLoc1 + " should be a url",
LinkProcessor.isURL(resourceLoc1));
assertTrue(
resourceLoc2 + " should not be a url",
!LinkProcessor.isURL(resourceLoc2));
String resourceLoc3 =
"file://localhost/D:/java/jdk1.3/docs/api/overview-summary.html";
assertTrue(
resourceLoc3 + " should be a url",
LinkProcessor.isURL(resourceLoc3));
}
public void testFixSpaces()
{
String url =
"http://htmlparser.sourceforge.net/test/This is a Test Page.html";
String fixedURL = LinkProcessor.fixSpaces(url);
int index = fixedURL.indexOf(" ");
assertEquals(
"Expected",
"http://htmlparser.sourceforge.net/test/This%20is%20a%20Test%20Page.html",
fixedURL);
}
/**
* Reproduction of bug 673379 reported by Joe Robbins. Parser goes into
* infinte loop if the link has no slashes.
*/
public void testLinkWithNoSlashes() throws Exception
{
createParser("<A HREF=\".foo.txt\">Foo</A>", "http://www.oygevalt.com");
parser.registerScanners();
parseAndAssertNodeCount(1);
assertTrue(node[0] instanceof LinkTag);
LinkTag linkTag = (LinkTag) node[0];
assertStringEquals(
"link",
"http://www.oygevalt.com/foo.txt",
linkTag.getLink());
assertEquals("link", "Foo", linkTag.getLinkText());
}
//
// Tests from Appendix C Examples of Resolving Relative URI References
// RFC 2396 Uniform Resource Identifiers (URI): Generic Syntax
// T. Berners-Lee et al.
// http://www.ietf.org/rfc/rfc2396.txt
// Within an object with a well-defined base URI of
static final String baseURI = "http://a/b/c/d;p?q";
// the relative URI would be resolved as follows:
// C.1. Normal Examples
// g:h = g:h
// g = http://a/b/c/g
// ./g = http://a/b/c/g
// g/ = http://a/b/c/g/
// /g = http://a/g
// //g = http://g
// ?y = http://a/b/c/?y
// g?y = http://a/b/c/g?y
// #s = (current document)#s
// g#s = http://a/b/c/g#s
// g?y#s = http://a/b/c/g?y#s
// ;x = http://a/b/c/;x
// g;x = http://a/b/c/g;x
// g;x?y#s = http://a/b/c/g;x?y#s
// . = http://a/b/c/
// ./ = http://a/b/c/
// .. = http://a/b/
// ../ = http://a/b/
// ../g = http://a/b/g
// ../.. = http://a/
// ../../ = http://a/
// ../../g = http://a/g
public void test1() throws ParserException
{
assertEquals(
"test1 failed",
"https:h",
(new LinkProcessor()).extract("https:h", baseURI));
}
public void test2() throws ParserException
{
assertEquals(
"test2 failed",
"http://a/b/c/g",
(new LinkProcessor()).extract("g", baseURI));
}
public void test3() throws ParserException
{
assertEquals(
"test3 failed",
"http://a/b/c/g",
(new LinkProcessor()).extract("./g", baseURI));
}
public void test4() throws ParserException
{
assertEquals(
"test4 failed",
"http://a/b/c/g/",
(new LinkProcessor()).extract("g/", baseURI));
}
public void test5() throws ParserException
{
assertEquals(
"test5 failed",
"http://a/g",
(new LinkProcessor()).extract("/g", baseURI));
}
public void test6() throws ParserException
{
assertEquals(
"test6 failed",
"http://g",
(new LinkProcessor()).extract("//g", baseURI));
}
public void test7() throws ParserException
{
assertEquals(
"test7 failed",
"http://a/b/c/?y",
(new LinkProcessor()).extract("?y", baseURI));
}
public void test8() throws ParserException
{
assertEquals(
"test8 failed",
"http://a/b/c/g?y",
(new LinkProcessor()).extract("g?y", baseURI));
}
public void test9() throws ParserException
{
assertEquals(
"test9 failed",
"https:h",
(new LinkProcessor()).extract("https:h", baseURI));
}
public void test10() throws ParserException
{
assertEquals(
"test10 failed",
"https:h",
(new LinkProcessor()).extract("https:h", baseURI));
}
// #s = (current document)#s
public void test11() throws ParserException
{
assertEquals(
"test11 failed",
"http://a/b/c/g#s",
(new LinkProcessor()).extract("g#s", baseURI));
}
public void test12() throws ParserException
{
assertEquals(
"test12 failed",
"http://a/b/c/g?y#s",
(new LinkProcessor()).extract("g?y#s", baseURI));
}
public void test13() throws ParserException
{
assertEquals(
"test13 failed",
"http://a/b/c/;x",
(new LinkProcessor()).extract(";x", baseURI));
}
public void test14() throws ParserException
{
assertEquals(
"test14 failed",
"http://a/b/c/g;x",
(new LinkProcessor()).extract("g;x", baseURI));
}
public void test15() throws ParserException
{
assertEquals(
"test15 failed",
"http://a/b/c/g;x?y#s",
(new LinkProcessor()).extract("g;x?y#s", baseURI));
}
public void test16() throws ParserException
{
assertEquals(
"test16 failed",
"http://a/b/c/",
(new LinkProcessor()).extract(".", baseURI));
}
public void test17() throws ParserException
{
assertEquals(
"test17 failed",
"http://a/b/c/",
(new LinkProcessor()).extract("./", baseURI));
}
public void test18() throws ParserException
{
assertEquals(
"test18 failed",
"http://a/b/",
(new LinkProcessor()).extract("..", baseURI));
}
public void test19() throws ParserException
{
assertEquals(
"test19 failed",
"http://a/b/",
(new LinkProcessor()).extract("../", baseURI));
}
public void test20() throws ParserException
{
assertEquals(
"test20 failed",
"http://a/b/g",
(new LinkProcessor()).extract("../g", baseURI));
}
public void test21() throws ParserException
{
assertEquals(
"test21 failed",
"http://a/",
(new LinkProcessor()).extract("../..", baseURI));
}
public void test22() throws ParserException
{
assertEquals(
"test22 failed",
"http://a/g",
(new LinkProcessor()).extract("../../g", baseURI));
}
// C.2. Abnormal Examples
// Although the following abnormal examples are unlikely to occur in
// normal practice, all URI parsers should be capable of resolving them
// consistently. Each example uses the same base as above.
//
// An empty reference refers to the start of the current document.
//
// <> = (current document)
//
// Parsers must be careful in handling the case where there are more
// relative path ".." segments than there are hierarchical levels in the
// base URI's path. Note that the ".." syntax cannot be used to change
// the authority component of a URI.
//
// ../../../g = http://a/../g
// ../../../../g = http://a/../../g
//
// In practice, some implementations strip leading relative symbolic
// elements (".", "..") after applying a relative URI calculation, based
// on the theory that compensating for obvious author errors is better
// than allowing the request to fail. Thus, the above two references
// will be interpreted as "http://a/g" by some implementations.
//
// Similarly, parsers must avoid treating "." and ".." as special when
// they are not complete components of a relative path.
//
// /./g = http://a/./g
// /../g = http://a/../g
// g. = http://a/b/c/g.
// .g = http://a/b/c/.g
// g.. = http://a/b/c/g..
// ..g = http://a/b/c/..g
//
// Less likely are cases where the relative URI uses unnecessary or
// nonsensical forms of the "." and ".." complete path segments.
//
// ./../g = http://a/b/g
// ./g/. = http://a/b/c/g/
// g/./h = http://a/b/c/g/h
// g/../h = http://a/b/c/h
// g;x=1/./y = http://a/b/c/g;x=1/y
// g;x=1/../y = http://a/b/c/y
//
// All client applications remove the query component from the base URI
// before resolving relative URI. However, some applications fail to
// separate the reference's query and/or fragment components from a
// relative path before merging it with the base path. This error is
// rarely noticed, since typical usage of a fragment never includes the
// hierarchy ("/") character, and the query component is not normally
// used within relative references.
//
// g?y/./x = http://a/b/c/g?y/./x
// g?y/../x = http://a/b/c/g?y/../x
// g#s/./x = http://a/b/c/g#s/./x
// g#s/../x = http://a/b/c/g#s/../x
//
// Some parsers allow the scheme name to be present in a relative URI if
// it is the same as the base URI scheme. This is considered to be a
// loophole in prior specifications of partial URI [RFC1630]. Its use
// should be avoided.
//
// http:g = http:g ; for validating parsers
// | http://a/b/c/g ; for backwards compatibility
// public void test23 () throws HTMLParserException
// {
// assertEquals ("test23 failed", "http://a/../g", (new HTMLLinkProcessor ()).extract ("../../../g", baseURI));
// }
// public void test24 () throws HTMLParserException
// {
// assertEquals ("test24 failed", "http://a/../../g", (new HTMLLinkProcessor ()).extract ("../../../../g", baseURI));
// }
public void test23() throws ParserException
{
assertEquals(
"test23 failed",
"http://a/g",
(new LinkProcessor()).extract("../../../g", baseURI));
}
public void test24() throws ParserException
{
assertEquals(
"test24 failed",
"http://a/g",
(new LinkProcessor()).extract("../../../../g", baseURI));
}
public void test25() throws ParserException
{
assertEquals(
"test25 failed",
"http://a/./g",
(new LinkProcessor()).extract("/./g", baseURI));
}
public void test26() throws ParserException
{
assertEquals(
"test26 failed",
"http://a/../g",
(new LinkProcessor()).extract("/../g", baseURI));
}
public void test27() throws ParserException
{
assertEquals(
"test27 failed",
"http://a/b/c/g.",
(new LinkProcessor()).extract("g.", baseURI));
}
public void test28() throws ParserException
{
assertEquals(
"test28 failed",
"http://a/b/c/.g",
(new LinkProcessor()).extract(".g", baseURI));
}
public void test29() throws ParserException
{
assertEquals(
"test29 failed",
"http://a/b/c/g..",
(new LinkProcessor()).extract("g..", baseURI));
}
public void test30() throws ParserException
{
assertEquals(
"test30 failed",
"http://a/b/c/..g",
(new LinkProcessor()).extract("..g", baseURI));
}
public void test31() throws ParserException
{
assertEquals(
"test31 failed",
"http://a/b/g",
(new LinkProcessor()).extract("./../g", baseURI));
}
public void test32() throws ParserException
{
assertEquals(
"test32 failed",
"http://a/b/c/g/",
(new LinkProcessor()).extract("./g/.", baseURI));
}
public void test33() throws ParserException
{
assertEquals(
"test33 failed",
"http://a/b/c/g/h",
(new LinkProcessor()).extract("g/./h", baseURI));
}
public void test34() throws ParserException
{
assertEquals(
"test34 failed",
"http://a/b/c/h",
(new LinkProcessor()).extract("g/../h", baseURI));
}
public void test35() throws ParserException
{
assertEquals(
"test35 failed",
"http://a/b/c/g;x=1/y",
(new LinkProcessor()).extract("g;x=1/./y", baseURI));
}
public void test36() throws ParserException
{
assertEquals(
"test36 failed",
"http://a/b/c/y",
(new LinkProcessor()).extract("g;x=1/../y", baseURI));
}
public void test37() throws ParserException
{
assertEquals(
"test37 failed",
"http://a/b/c/g?y/./x",
(new LinkProcessor()).extract("g?y/./x", baseURI));
}
public void test38() throws ParserException
{
assertEquals(
"test38 failed",
"http://a/b/c/g?y/../x",
(new LinkProcessor()).extract("g?y/../x", baseURI));
}
public void test39() throws ParserException
{
assertEquals(
"test39 failed",
"http://a/b/c/g#s/./x",
(new LinkProcessor()).extract("g#s/./x", baseURI));
}
public void test40() throws ParserException
{
assertEquals(
"test40 failed",
"http://a/b/c/g#s/../x",
(new LinkProcessor()).extract("g#s/../x", baseURI));
}
// public void test41 () throws HTMLParserException
// {
// assertEquals ("test41 failed", "http:g", (new HTMLLinkProcessor ()).extract ("http:g", baseURI));
// }
public void test41() throws ParserException
{
assertEquals(
"test41 failed",
"http://a/b/c/g",
(new LinkProcessor()).extract("http:g", baseURI));
}
}