/*
* This file is part of NixNote
* Copyright 2009 Randy Baumgarte
*
* This file may be licensed under the terms of of the
* GNU General Public License Version 2 (the ``GPL'').
*
* Software distributed under the License is distributed
* on an ``AS IS'' basis, WITHOUT WARRANTY OF ANY KIND, either
* express or implied. See the GPL for the specific language
* governing rights and limitations.
*
* You should have received a copy of the GPL along with this
* program. If not, go to http://www.gnu.org/licenses/gpl.html
* or write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*
*/
package cx.fbn.nevernote.xml;
import java.util.ArrayList;
import java.util.List;
import com.trolltech.qt.xml.QDomDocument;
import com.trolltech.qt.xml.QDomElement;
import com.trolltech.qt.xml.QDomNode;
import com.trolltech.qt.xml.QDomNodeList;
import com.trolltech.qt.xml.QDomText;
import cx.fbn.nevernote.Global;
public class XMLCleanup {
private String content;
private QDomDocument doc;
private final List<String> resources;
public XMLCleanup() {
resources = new ArrayList<String>();
}
public void setValue(String text) {
content = text;
}
public String getValue() {
return content;
}
// Validate the contents of the note. Change unsupported things
public void validate() {
doc = new QDomDocument();
int br = content.lastIndexOf("</en-note>");
content = new String(content.substring(0,br));
String newContent;
int k = content.indexOf("<en-note");
newContent = new String(content.substring(k));
// Fix the background color
newContent = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"
+"<!DOCTYPE en-note SYSTEM \"http://xml.evernote.com/pub/enml2.dtd\">\n"
+newContent
+"</en-note>";
QDomDocument.Result result = doc.setContent(newContent);
if (!result.success) {
System.out.println("DOM error in XMLValidator.validate()");
System.out.println(newContent);
System.out.println("Location : Line-"+result.errorLine +" Column-" + result.errorColumn);
System.out.println("Exiting");
content = null;
return;
}
QDomNodeList noteAnchors = doc.elementsByTagName("en-note");
int noteCount = noteAnchors.length();
for (int i=noteCount-1; i>=0; i--) {
if (noteAnchors.at(i).toElement().hasAttribute("style")) {
String style = noteAnchors.at(i).toElement().attribute("style");
int startColor = style.indexOf("background-color:");
if (startColor > -1) {
String color = style.substring(startColor+17);
color = color.substring(0,color.indexOf(";"));
noteAnchors.at(i).toElement().setAttribute("bgcolor", color);
}
}
}
// Scan through tags node by node
scanTags();
// Scan again making sure we didn't miss any <a> tags. Sometimes we do
QDomNodeList anchorList = doc.elementsByTagName("a");
int anchorCount = anchorList.length();
for (int i=anchorCount-1; i>=0; i--) {
QDomNode link = anchorList.at(i);
link = fixLinkNode(link);
}
// Remove invalid elements & attributes
// Modify en-media tags
QDomNodeList anchors;
for (String key : Global.invalidAttributes.keySet()) {
anchors = doc.elementsByTagName(key);
int enMediaCount = anchors.length();
for (int i=enMediaCount-1; i>=0; i--) {
QDomElement element = anchors.at(i).toElement();
ArrayList<String> names = Global.invalidAttributes.get(element.nodeName().toLowerCase());
if (names != null) {
for (int j=0; j<names.size(); j++) {
element.removeAttribute(names.get(j));
}
}
}
}
List<String> elements = Global.invalidElements;
for (int j=0; j<elements.size(); j++) {
anchors = doc.elementsByTagName(elements.get(j));
int enMediaCount = anchors.length();
for (int i=enMediaCount-1; i>=0; i--) {
QDomElement element = anchors.at(i).toElement();
element.setTagName("span");
}
}
// Replace the XML carrage returns that the toString() creates.
content = doc.toString().replace( "
", "" );
}
// Start looking through the tree.
private void scanTags() {
if (doc.hasChildNodes())
parseNodes(doc.childNodes());
return;
}
private void parseNodes(QDomNodeList nodes) {
for (int i=0; i<nodes.size(); i++) {
QDomNode node = nodes.at(i);
if (node.hasChildNodes())
parseNodes(node.childNodes());
fixNode(node);
}
}
// Fix the contents of the node back to ENML.
private void fixNode(QDomNode node) {
QDomElement scanChecked = node.toElement();
if (scanChecked.hasAttribute("checked")) {
System.out.println(scanChecked.attribute("checked"));
if (!scanChecked.attribute("checked").equalsIgnoreCase("true"))
scanChecked.setAttribute("checked", "false");
}
if (node.nodeName().equalsIgnoreCase("#comment") || node.nodeName().equalsIgnoreCase("script")) {
node.parentNode().removeChild(node);
}
if (node.nodeName().equalsIgnoreCase("input")) {
QDomElement e = node.toElement();
e.setTagName("en-todo");
String value = e.attribute("value");
if (value.trim().equals(""))
value = "false";
e.removeAttribute("value");
e.removeAttribute("unchecked");
e.setAttribute("checked", value);
e.removeAttribute("onclick");
e.removeAttribute("onmouseover");
e.removeAttribute("type");
}
if (node.nodeName().equalsIgnoreCase("a")) {
node = fixLinkNode(node);
}
// Restore image resources
if (node.nodeName().equalsIgnoreCase("img")) {
QDomElement e = node.toElement();
String enType = e.attribute("en-tag");
// Check if we have an en-crypt tag. Change it from an img to en-crypt
if (enType.equalsIgnoreCase("en-crypt")) {
String encrypted = e.attribute("alt");
QDomText crypt = doc.createTextNode(encrypted);
e.appendChild(crypt);
e.removeAttribute("v:shapes");
e.removeAttribute("en-tag");
e.removeAttribute("contenteditable");
e.removeAttribute("alt");
e.removeAttribute("src");
e.removeAttribute("id");
e.removeAttribute("onclick");
e.removeAttribute("onmouseover");
e.setTagName("en-crypt");
node.removeChild(e);
return;
}
// Check if we have a LaTeX image. Remove the parent link tag
if (enType.equalsIgnoreCase("en-latex")) {
enType = "en-media";
QDomNode parent = e.parentNode();
parent.removeChild(e);
parent.parentNode().replaceChild(e, parent);
}
// If we've gotten this far, we have an en-media tag
e.setTagName(enType);
resources.add(e.attribute("guid"));
e.removeAttribute("guid");
e.removeAttribute("src");
e.removeAttribute("en-new");
e.removeAttribute("en-tag");
}
// Tags like <ul><ul><li>1</li></ul></ul> are technically valid, but Evernote
// expects that a <ul> tag only has a <li>, so we will need to change them
// to this: <ul><li><ul><li>1</li></ul></li></ul>
if (node.nodeName().equalsIgnoreCase("ul")) {
QDomNode firstChild = node.firstChild();
QDomElement childElement = firstChild.toElement();
if (childElement.nodeName().equalsIgnoreCase("ul")) {
QDomElement newElement = doc.createElement("li");
node.insertBefore(newElement, firstChild);
node.removeChild(firstChild);
newElement.appendChild(firstChild);
}
}
if (node.nodeName().equalsIgnoreCase("en-hilight")) {
QDomElement e = node.toElement();
QDomText newText = doc.createTextNode(e.text());
e.parentNode().replaceChild(newText,e);
}
if (node.nodeName().equalsIgnoreCase("span")) {
QDomElement e = node.toElement();
if (e.attribute("class").equalsIgnoreCase("en-hilight") || e.attribute("class").equalsIgnoreCase("en-spell")) {
QDomText newText = doc.createTextNode(e.text());
e.parentNode().replaceChild(newText,e);
}
if (e.attribute("pdfnavigationtable").equalsIgnoreCase("true")) {
node.parentNode().removeChild(node);
}
}
// Fix up encryption tag
if (node.nodeName().equalsIgnoreCase("en-crypt-temp")) {
QDomElement e = node.toElement();
e.setTagName("en-crypt");
String crypt = e.attribute("value");
e.removeAttribute("value");
QDomText cryptValue = doc.createTextNode(crypt);
e.appendChild(cryptValue);
}
}
private QDomNode fixLinkNode(QDomNode node) {
QDomElement e = node.toElement();
String enTag = e.attribute("en-tag");
if (enTag.equalsIgnoreCase("en-media")) {
e.setTagName("en-media");
e.removeAttribute("en-type");
e.removeAttribute("en-tag");
e.removeAttribute("en-new");
resources.add(e.attribute("guid"));
e.removeAttribute("href");
e.removeAttribute("guid");
e.setNodeValue("");
e.removeChild(e.firstChildElement());
}
return e;
}
// Return old resources we've found
public List<String> getResources() {
return resources;
}
}