// Copyright (c) 2003-2014, Jodd Team (jodd.org). All Rights Reserved.
package jodd.lagarto;
import jodd.io.FileUtil;
import jodd.lagarto.dom.Element;
import jodd.lagarto.dom.LagartoDOMBuilder;
import jodd.jerry.Jerry;
import jodd.jerry.JerryFunction;
import jodd.lagarto.dom.Document;
import jodd.util.StringUtil;
import org.junit.Before;
import org.junit.Test;
import java.io.File;
import java.io.IOException;
import java.net.URL;
import static org.junit.Assert.*;
public class ParsingProblemsTest {
protected String testDataRoot;
@Before
public void setUp() throws Exception {
if (testDataRoot != null) {
return;
}
URL data = LagartoParserTest.class.getResource("data");
testDataRoot = data.getFile();
}
@Test
public void testInvalidTag() {
String html = "<html>text1<=>text2</html>";
LagartoParser lagartoParser = new LagartoParser(html, false);
final StringBuilder sb = new StringBuilder();
try {
lagartoParser.parse(new EmptyTagVisitor() {
@Override
public void tag(Tag tag) {
sb.append(tag.getName()).append(' ');
}
@Override
public void text(CharSequence text) {
sb.append(text).append(' ');
}
@Override
public void error(String message) {
System.out.println(message);
}
});
} catch (LagartoException lex) {
lex.printStackTrace();
fail();
}
assertEquals("html text1 <=>text2 html ", sb.toString());
}
@Test
public void testNonQuotedAttributeValue() {
String html = "<a href=123>xxx</a>";
LagartoDOMBuilder lagartoDOMBuilder = new LagartoDOMBuilder();
lagartoDOMBuilder.getConfig().setCalculatePosition(true);
Document document = lagartoDOMBuilder.parse(html);
assertEquals("<a href=\"123\">xxx</a>", document.getHtml());
assertTrue(document.check());
html = "<a href=../org/w3c/dom/'http://www.w3.org/TR/2001/REC-xmlschema-1-20010502/#element-list'>xxx</a>";
lagartoDOMBuilder = new LagartoDOMBuilder();
lagartoDOMBuilder.getConfig().setCalculatePosition(true);
document = lagartoDOMBuilder.parse(html);
assertTrue(document.check());
assertEquals("<a href=\"../org/w3c/dom/'http://www.w3.org/TR/2001/REC-xmlschema-1-20010502/#element-list'\">xxx</a>", document.getHtml());
}
@Test
public void testIssue23_0() throws IOException {
File file = new File(testDataRoot, "index-4-v0.html");
LagartoDOMBuilder lagartoDOMBuilder = new LagartoDOMBuilder();
lagartoDOMBuilder.getConfig().setCalculatePosition(true);
lagartoDOMBuilder.getConfig().setCollectErrors(true);
Document doc = lagartoDOMBuilder.parse(FileUtil.readString(file));
assertTrue(doc.check());
assertEquals(1, doc.getErrors().size());
}
@Test
public void testIssue23_1() throws IOException {
File file = new File(testDataRoot, "index-4-v1.html");
LagartoDOMBuilder lagartoDOMBuilder = new LagartoDOMBuilder();
lagartoDOMBuilder.getConfig().setCalculatePosition(true);
lagartoDOMBuilder.getConfig().setCollectErrors(true);
Document doc = lagartoDOMBuilder.parse(FileUtil.readString(file));
assertTrue(doc.check());
assertEquals(1, doc.getErrors().size());
}
@Test
public void testIssue23() throws IOException {
File file = new File(testDataRoot, "index-4.html");
LagartoDOMBuilder lagartoDOMBuilder = new LagartoDOMBuilder();
lagartoDOMBuilder.getConfig().setCalculatePosition(true);
lagartoDOMBuilder.getConfig().setCollectErrors(true);
Document document = lagartoDOMBuilder.parse(FileUtil.readString(file));
assertTrue(document.check());
// (1564 open DTs + 1564 open DDs) 1 open P
assertEquals(19, document.getErrors().size());
Jerry doc = Jerry.jerry(FileUtil.readString(file));
assertEquals(16, doc.$("td.NavBarCell1").size());
assertEquals(2, doc.$("table td.NavBarCell1Rev").size());
assertEquals(1, doc.$("dl").size());
assertEquals(1564, doc.$("dd").size());
assertEquals(1564, doc.$("dt").size());
assertEquals(3144, doc.$("dt a").size());
// http://docs.oracle.com/javase/6/docs/api/index-files/index-4.html
file = new File(testDataRoot, "index-4-eng.html");
doc = Jerry.jerry(FileUtil.readString(file));
assertEquals(16, doc.$("td.NavBarCell1").size());
assertEquals(2, doc.$("table td.NavBarCell1Rev").size());
final StringBuilder sb = new StringBuilder();
doc.$("td.NavBarCell1").each(new JerryFunction() {
public boolean onNode(Jerry $this, int index) {
sb.append("---\n");
sb.append($this.text().trim());
sb.append('\n');
return true;
}
});
String s = sb.toString();
s = StringUtil.remove(s, ' ');
s = StringUtil.remove(s, '\r');
s = StringUtil.remove(s, '\u00A0');
s = StringUtil.remove(s, " ");
assertEquals(
"---\n" +
"Overview\n" +
"Package\n" +
"Class\n" +
"Use\n" +
"Tree\n" +
"Deprecated\n" +
"Index\n" +
"Help\n" +
"---\n" +
"Overview\n" +
"---\n" +
"Package\n" +
"---\n" +
"Class\n" +
"---\n" +
"Use\n" +
"---\n" +
"Tree\n" +
"---\n" +
"Deprecated\n" +
"---\n" +
"Help\n" +
"---\n" +
"Overview\n" +
"Package\n" +
"Class\n" +
"Use\n" +
"Tree\n" +
"Deprecated\n" +
"Index\n" +
"Help\n" +
"---\n" +
"Overview\n" +
"---\n" +
"Package\n" +
"---\n" +
"Class\n" +
"---\n" +
"Use\n" +
"---\n" +
"Tree\n" +
"---\n" +
"Deprecated\n" +
"---\n" +
"Help\n",
s);
}
@Test
public void testNamespaces() throws IOException {
File file = new File(testDataRoot, "namespace.xml");
LagartoDOMBuilder lagartoDOMBuilder = new LagartoDOMBuilder();
lagartoDOMBuilder.enableXmlMode();
lagartoDOMBuilder.getConfig().setCalculatePosition(true);
Document doc = lagartoDOMBuilder.parse(FileUtil.readString(file));
assertTrue(doc.check());
Element cfgTestElement = (Element) doc.getChild(1);
assertEquals("cfg:test", cfgTestElement.getNodeName());
Element cfgNode = (Element) cfgTestElement.getChild(0);
assertEquals("cfg:node", cfgNode.getNodeName());
Jerry.JerryParser jerryParser = new Jerry.JerryParser();
((LagartoDOMBuilder) jerryParser.getDOMBuilder()).enableXmlMode();
Jerry jerry = jerryParser.parse(FileUtil.readString(file));
final StringBuilder result = new StringBuilder();
jerry.$("cfg\\:test").each(new JerryFunction() {
public boolean onNode(Jerry $this, int index) {
result.append($this.$("cfg\\:node").text());
return true;
}
});
assertEquals("This is a text", result.toString());
}
@Test
public void testPreserveCC() throws IOException {
File file = new File(testDataRoot, "preserve-cc.html");
String expectedResult = FileUtil.readString(file);
Jerry.JerryParser jerryParser = new Jerry.JerryParser();
((LagartoDOMBuilder) jerryParser.getDOMBuilder()).enableHtmlMode();
((LagartoDOMBuilder) jerryParser.getDOMBuilder()).getConfig().setEnableConditionalComments(false);
Jerry jerry = jerryParser.parse(expectedResult);
String result = jerry.html();
assertEquals(expectedResult, result);
}
@Test
public void testKelkoo() throws Exception {
File file = new File(testDataRoot, "kelkoo.html");
Jerry jerry;
try {
jerry = Jerry.jerry().parse(FileUtil.readString(file));
} catch (Exception ex) {
fail(ex.toString());
throw ex;
}
Element script = (Element) jerry.$("script").get(0);
assertEquals("script", script.getNodeName());
assertEquals(6, script.getAttributesCount());
assertEquals("src", script.getAttribute(0).getName());
assertEquals("data-config", script.getAttribute(1).getName());
assertEquals("ext\\u00e9rieur|barbecue,", script.getAttribute(2).getName());
assertEquals("planchaaccessoires\":\"http:\\", script.getAttribute(3).getName());
assertEquals("www.kelkoo.fr\"}'", script.getAttribute(4).getName());
assertEquals("data-adsense-append", script.getAttribute(5).getName());
}
}