package org.jsoup.nodes;
import org.jsoup.Jsoup;
import org.junit.Test;
import static org.jsoup.nodes.Document.OutputSettings;
import static org.jsoup.nodes.Entities.EscapeMode.*;
import static org.junit.Assert.*;
import java.nio.charset.Charset;
public class EntitiesTest {
@Test public void escape() {
String text = "Hello &<> Å å π 新 there ¾ © »";
String escapedAscii = Entities.escape(text, new OutputSettings().charset("ascii").escapeMode(base));
String escapedAsciiFull = Entities.escape(text, new OutputSettings().charset("ascii").escapeMode(extended));
String escapedAsciiXhtml = Entities.escape(text, new OutputSettings().charset("ascii").escapeMode(xhtml));
String escapedUtfFull = Entities.escape(text, new OutputSettings().charset("UTF-8").escapeMode(base));
String escapedUtfMin = Entities.escape(text, new OutputSettings().charset("UTF-8").escapeMode(xhtml));
assertEquals("Hello &<> Å å π 新 there ¾ © »", escapedAscii);
assertEquals("Hello &<> Å å π 新 there ¾ © »", escapedAsciiFull);
assertEquals("Hello &<> Å å π 新 there ¾ © »", escapedAsciiXhtml);
assertEquals("Hello &<> Å å π 新 there ¾ © »", escapedUtfFull);
assertEquals("Hello &<> Å å π 新 there ¾ © »", escapedUtfMin);
// odd that it's defined as aring in base but angst in full
// round trip
assertEquals(text, Entities.unescape(escapedAscii));
assertEquals(text, Entities.unescape(escapedAsciiFull));
assertEquals(text, Entities.unescape(escapedAsciiXhtml));
assertEquals(text, Entities.unescape(escapedUtfFull));
assertEquals(text, Entities.unescape(escapedUtfMin));
}
@Test public void escapeSupplementaryCharacter(){
String text = new String(Character.toChars(135361));
String escapedAscii = Entities.escape(text, new OutputSettings().charset("ascii").escapeMode(base));
assertEquals("𡃁", escapedAscii);
String escapedUtf = Entities.escape(text, new OutputSettings().charset("UTF-8").escapeMode(base));
assertEquals(text, escapedUtf);
}
@Test public void unescape() {
String text = "Hello &<> ® Å &angst π π 新 there &! ¾ © ©";
assertEquals("Hello &<> ® Å &angst π π 新 there &! ¾ © ©", Entities.unescape(text));
assertEquals("&0987654321; &unknown", Entities.unescape("&0987654321; &unknown"));
}
@Test public void strictUnescape() { // for attributes, enforce strict unescaping (must look like &#xxx; , not just &#xxx)
String text = "Hello &= &";
assertEquals("Hello &= &", Entities.unescape(text, true));
assertEquals("Hello &= &", Entities.unescape(text));
assertEquals("Hello &= &", Entities.unescape(text, false));
}
@Test public void caseSensitive() {
String unescaped = "Ü ü & &";
assertEquals("Ü ü & &",
Entities.escape(unescaped, new OutputSettings().charset("ascii").escapeMode(extended)));
String escaped = "Ü ü & &";
assertEquals("Ü ü & &", Entities.unescape(escaped));
}
@Test public void quoteReplacements() {
String escaped = "\ $";
String unescaped = "\\ $";
assertEquals(unescaped, Entities.unescape(escaped));
}
@Test public void letterDigitEntities() {
String html = "<p>¹²³¼½¾</p>";
Document doc = Jsoup.parse(html);
doc.outputSettings().charset("ascii");
Element p = doc.select("p").first();
assertEquals("¹²³¼½¾", p.html());
assertEquals("¹²³¼½¾", p.text());
doc.outputSettings().charset("UTF-8");
assertEquals("¹²³¼½¾", p.html());
}
@Test public void noSpuriousDecodes() {
String string = "http://www.foo.com?a=1&num_rooms=1&children=0&int=VA&b=2";
assertEquals(string, Entities.unescape(string));
}
}