*/
public void testCommaListCharset() throws ParserException
{
URL url;
URLConnection connection;
Parser parser;
String idiots = "http://users.aol.com/geinster/rej.htm";
try
{
url = new URL(idiots);
connection = url.openConnection();
// this little subclass just gets around normal JDK 1.4 processing
// that filters out bogus character sets
parser = new Parser()
{
protected String getCharset(String content)
{
int index;
String ret;
ret = DEFAULT_CHARSET;
if (null != content)
{
index = content.indexOf(CHARSET_STRING);
if (index != -1)
{
content =
content
.substring(index + CHARSET_STRING.length())
.trim();
if (content.startsWith("="))
{
content = content.substring(1).trim();
index = content.indexOf(";");
if (index != -1)
content = content.substring(0, index);
//remove any double quotes from around charset string
if (content.startsWith("\"")
&& content.endsWith("\"")
&& (1 < content.length()))
content =
content.substring(
1,
content.length() - 1);
//remove any single quote from around charset string
if (content.startsWith("'")
&& content.endsWith("'")
&& (1 < content.length()))
content =
content.substring(
1,
content.length() - 1);
ret = content;
// short circuit findCharset() processing
}
}
}
return (ret);
}
};
parser.setConnection(connection);
// must be the default
assertTrue(
"Wrong encoding",
parser.getEncoding().equals("ISO-8859-1"));
for (NodeIterator e = parser.elements(); e.hasMoreNodes();)
e.nextNode();
assertTrue(
"Wrong encoding",
parser.getEncoding().equals("windows-1252"));
}
catch (Exception e)
{
fail(e.getMessage());
}