/*
* Copyright (C) 2004 TiongHiang Lee
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
* Email: thlee@onemindsoft.org
*/
package org.onemind.swingweb.util;
import java.text.StringCharacterIterator;
import org.onemind.commons.java.datastructure.BiMap;
public class HtmlUtils
{
private static BiMap HTML_ESCAPE_TO_CHAR = new BiMap();
private static BiMap HTML_CHAR_TO_ESCAPE = HTML_ESCAPE_TO_CHAR.getInverse();
private static BiMap XML_ESCAPE_TO_CHAR = new BiMap();
private static BiMap XML_CHAR_TO_ESCAPE = XML_ESCAPE_TO_CHAR.getInverse();
static
{
String[][] htmlEscapes = {{"<", "<"}, {">", ">"}, {"&", "&"}, {""", "\""}, {"à", "à"},
{"À", "À"}, {"â", "â"}, {"Â", "Â"}, {"ä", "ä"}, {"Ä", "Ä"}, {"å", "å"},
{"Å", "Å"}, {"æ", "æ"}, {"Æ", "Æ"}, {"ç", "ç"}, {"Ç", "Ç"}, {"é", "é"},
{"É", "É"}, {"è", "è"}, {"È", "È"}, {"ê", "ê"}, {"Ê", "Ê"}, {"ë", "ë"},
{"Ë", "Ë"}, {"ï", "ï"}, {"Ï", "Ï"}, {"ô", "ô"}, {"Ô", "Ô"}, {"ö", "ö"},
{"Ö", "Ö"}, {"ø", "ø"}, {"Ø", "Ø"}, {"ß", "ß"}, {"ù", "ù"}, {"Ù", "Ù"},
{"û", "û"}, {"Û", "Û"}, {"ü", "ü"}, {"Ü", "Ü"}, {" ", " "}, {"®", "\u00a9"},
{"©", "\u00ae"}, {"€", "\u20a0"}};
for (int i = 0; i < htmlEscapes.length; i++)
{
String[] str = htmlEscapes[i];
HTML_ESCAPE_TO_CHAR.put(str[0], str[1]);
}
String[][] xmlEscapes = {{"\"", """}, {"'", "'"}, {"&", "&"}, {"<", "<"}, {">", ">"}};
for (int i = 0; i < xmlEscapes.length; i++)
{
String[] str = xmlEscapes[i];
XML_CHAR_TO_ESCAPE.put(str[0], str[1]);
}
}
public static String stripHtml(String html)
{
int idx = html.indexOf("<body>");
if (idx != -1)
{
html = html.substring(idx + 6);
idx = html.indexOf("</body>");
if (idx != -1)
{
html = html.substring(0, idx);
}
} else
{
idx = html.indexOf("<html>");
if (idx != -1)
{
html = html.substring(idx + 6);
idx = html.indexOf("</html>");
if (idx != -1)
{
html = html.substring(0, idx);
}
}
}
return html;
}
public static final String escapeXML(String s)
{
return escape(s, XML_CHAR_TO_ESCAPE);
}
public static final String unescapeXML(String s)
{
return unescape(s, XML_ESCAPE_TO_CHAR);
}
public static final String escapeHTML(String s)
{
return escape(s, HTML_CHAR_TO_ESCAPE);
}
public static final String unescapeHTML(String s)
{
return unescape(s, HTML_ESCAPE_TO_CHAR);
}
private static String escape(String s, BiMap charToEscape)
{
StringBuffer sb = new StringBuffer();
StringCharacterIterator it = new StringCharacterIterator(s);
for (int i = 0; i < it.getEndIndex(); i++)
{
it.setIndex(i);
char c = it.current();
String escaped = (String) charToEscape.get(new Character(c).toString());
if (escaped != null)
{
sb.append(escaped);
} else
{
sb.append(c);
}
}
return sb.toString();
}
private static String unescape(String s, BiMap escapeToChar)
{
StringBuffer sb = new StringBuffer();
int start = 0;
int current = s.indexOf("&", start);
while (current != -1)
{
if (start != current)
{
sb.append(s.substring(start, current));
}
int currentEnd = s.indexOf(";", current);
if (currentEnd != -1)
{//got valid start and end
String escaped = s.substring(current, currentEnd + 1);
start = currentEnd + 1;
String c = (String) escapeToChar.get(escaped);
if (c != null)
{
sb.append(c);
} else
{
throw new IllegalArgumentException("Unknown escape sequence " + escaped);
}
} else
{
throw new IllegalArgumentException("not a valid html");
}
current = s.indexOf("&", start);
}
if (start != s.length() - 1)
{
sb.append(s.substring(start));
}
return sb.toString();
}
}