Package cx.fbn.nevernote.evernote

Source Code of cx.fbn.nevernote.evernote.EnmlConverter$TidyListener

/*
* This file is part of NixNote
* Copyright 2009 Randy Baumgarte
*
* This file may be licensed under the terms of of the
* GNU General Public License Version 2 (the ``GPL'').
*
* Software distributed under the License is distributed
* on an ``AS IS'' basis, WITHOUT WARRANTY OF ANY KIND, either
* express or implied. See the GPL for the specific language
* governing rights and limitations.
*
* You should have received a copy of the GPL along with this
* program. If not, go to http://www.gnu.org/licenses/gpl.html
* or write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*
*/
package cx.fbn.nevernote.evernote;

//**********************************************
//**********************************************
//* This is used to turn HTML into ENML compliant
//* data.
//**********************************************
//**********************************************

import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.util.ArrayList;
import java.util.List;

import org.w3c.tidy.Tidy;
import org.w3c.tidy.TidyMessage;

import com.trolltech.qt.core.QByteArray;
import com.trolltech.qt.core.QTextCodec;

import cx.fbn.nevernote.Global;
import cx.fbn.nevernote.utilities.ApplicationLogger;
import cx.fbn.nevernote.utilities.Pair;
import cx.fbn.nevernote.xml.XMLCleanup;
import cx.fbn.nevernote.xml.XMLNoteRepair;

public class EnmlConverter {
  private final ApplicationLogger logger;
  private List<String>      resources;
  public boolean saveInvalidXML;
 
  private class TidyListener implements org.w3c.tidy.TidyMessageListener {
   
    ApplicationLogger logger;
    public boolean errorFound;
   
    public TidyListener(ApplicationLogger logger) {
      this.logger = logger;
      errorFound = false;
    }
    @Override
    public void messageReceived(TidyMessage msg) {
      if (msg.getLevel() == TidyMessage.Level.ERROR) {
        logger.log(logger.LOW, "******* JTIDY ERORR *******");
        logger.log(logger.LOW, "Error Code: " +msg.getErrorCode());
        logger.log(logger.LOW, "Column: " +msg.getColumn());
        logger.log(logger.LOW, "Column: " +msg.getColumn());
        logger.log(logger.LOW, "Line: " +msg.getLine());
        logger.log(logger.LOW, "Message: " +msg.getMessage());
        logger.log(logger.LOW, "***************************");
        errorFound = true;
      } else
        logger.log(logger.EXTREME, "JTidy Results: "+msg.getMessage());
    }
   
  }
 
  public EnmlConverter(ApplicationLogger l) {
    logger = l;
//    conn = c;
    saveInvalidXML = false;
    resources = new ArrayList<String>();
  }

  public List<String> getResources() {
    return resources;
  }
  public String convert(String noteGuid, String content) {
    logger.log(logger.HIGH, "Entering DBRunner.convertToEnml");
    logger.log(logger.EXTREME, "Note Text:" +content);
   
    // Replace the en-note tags with body tags in case we came from
    // someplace other than the editor (for example, if we are merging notes).
    content = content.replace("<en-note>", "<body>");
    content = content.replace("</en-note>", "</body>");
    // Start removing stuff we don't need or want
    int br = content.lastIndexOf("</body>");
    if (br > 0)
      content = new String(content.substring(0,br));
    String newContent;
    int k = content.indexOf("<body");
    if (k>-1)
      newContent = new String(content.substring(k));
    else
      newContent = "<body>"+content;

   
    // Check that we have a vaild header.  Normally we should not
    // but sometimes it seems that we can.  I don't see how, but it is
    // easy enough to check.
    if (!newContent.startsWith("<?xml"))
      newContent = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"
        +"<!DOCTYPE en-note SYSTEM \"http://xml.evernote.com/pub/enml2.dtd\">\n"
        +newContent
        +"</body>";
   

    // Fix the more common XML problems that Webkit creates, but are not considered
    // valid XML.
    newContent = fixStupidXMLProblems(newContent);
   
   
    // Change the contents to have enml instead of body tags or
    // we'll fail validation later.
    newContent = newContent.replace("<body", "<en-note");
    newContent = newContent.replace("</body>", "</en-note>");
   
    // First pass through the data.  The goal of this pass is to
    // validate that we have a good XML document and to repair
    // any problems found.
   
    XMLNoteRepair repair = new XMLNoteRepair();
//    logger.log(logger.HIGH, "Checking XML Structure");
//    newContent = repair.parse(newContent, false);
//    logger.log(logger.HIGH, "Check complete");
 
        logger.log(logger.HIGH, "Fixing encryption tags");
        newContent = fixEncryptionTags(newContent);
   
    Tidy tidy = new Tidy();
    TidyListener tidyListener = new TidyListener(logger);
    tidy.setMessageListener(tidyListener);
    tidy.getStderr().close()// the listener will capture messages
    tidy.setXmlTags(true);
    tidy.setXHTML(true);
   
    QTextCodec codec;
    codec = QTextCodec.codecForName("UTF-8");
        QByteArray unicode =  codec.fromUnicode(newContent);
       
//    byte html[] = newContent.getBytes();
//    ByteArrayInputStream is = new ByteArrayInputStream(html);
        logger.log(logger.HIGH, "Starting JTidy check");
        logger.log(logger.EXTREME, "Start of JTidy Input");
        logger.log(logger.EXTREME, newContent);
        logger.log(logger.EXTREME, "End Of JTidy Input");
    ByteArrayInputStream is = new ByteArrayInputStream(unicode.toByteArray());
        ByteArrayOutputStream os = new ByteArrayOutputStream();
        tidy.setInputEncoding("UTF-8");
    tidy.parse(is, os);
    String tidyContent = os.toString();
    if (tidyListener.errorFound) {
      logger.log(logger.LOW, "Note Contents Begin");
      logger.log(logger.LOW, content);
      logger.log(logger.LOW, "Note Contents End");
      tidyContent = null;
    } else {
      if (newContent.trim().equals(""))
        tidyContent = null;
    }

    // If the repair above returned null, then the XML is foobar.
    // We are done here.
    if (tidyContent != null) {
      newContent = tidyContent;
    } else {
      // Houston, we've had a problem.  Fall back to old method
      logger.log(logger.HIGH, "Error converting to JTidy.  Falling back to old method");
      String repairedContent = repair.parse(newContent, false);
      if (repairedContent == null) {
        logger.log(logger.EXTREME, "Null returned from repair.parse()");
        logger.log(logger.LOW, "Parse error when converting to ENML. Aborting save");
        return null;
      }
      newContent = repairedContent;
      logger.log(logger.EXTREME, "Start of repaired content");
      logger.log(logger.EXTREME, repairedContent);
      logger.log(logger.EXTREME, "End of repaired content");
    }
   
    // Second pass through the data.  The goal of this pass is to
    // remove any things we added in NixNote that do not match
    // the ENML schema
    XMLCleanup v = new XMLCleanup();
    v.setValue(newContent);
    logger.log(logger.HIGH, "Beginning ENML Cleanup");
    v.validate();
    logger.log(logger.HIGH, "Cleanup complete.");
   
 
     
    // Final pass through the data.  In this one we
    // remove any invalid attributes and to save the
    // new resources.
    logger.log(logger.EXTREME, "Rebuilt ENML:");
    logger.log(logger.EXTREME, v.getValue())
    logger.log(logger.EXTREME, "End Of Rebuilt ENML:");
    resources = v.getResources();

   
    // The XML has the dtd to validate set against Evernote's web
    // address.  We change it to a local one because otherwise it would
    // fail if the user doesn't have internet connectivity.  The local copy
    // also contains the 3 other PUBLIC definitions at the beginning of the dtd.
    newContent = v.getValue();
    File dtdFile = Global.getFileManager().getXMLDirFile("enml2.dtd");
    String dtd = dtdFile.toURI().toString();
    newContent = newContent.replace("<!DOCTYPE en-note SYSTEM \'http://xml.evernote.com/pub/enml2.dtd'>",
        "<!DOCTYPE en-note SYSTEM \"" +dtd +"\">");
   
    logger.log(logger.HIGH, "Validating ENML");
    String repairedContent = repair.parse(newContent, true);
    if (repairedContent == null)
      logger.log(logger.EXTREME, "Null returned from repair.parse()");
    else
      newContent = repairedContent;
    logger.log(logger.HIGH, "Validation complete");
    saveInvalidXML = repair.saveInvalidXML;
   
    // Restore the correct XML header.
    newContent = newContent.replace("<!DOCTYPE en-note SYSTEM \"" +dtd +"\">",
        "<!DOCTYPE en-note SYSTEM 'http://xml.evernote.com/pub/enml2.dtd'>");
   
   
    logger.log(logger.EXTREME, "Leaving ENMLConverter.convert()");
    return newContent;
  }

 
  private String fixEncryptionTags(String content) {
    // Fix the problem that the document body isn't properly closed
    String newContent = new String(content);
    logger.log(logger.MEDIUM, "Inside EnmlConverter.fixEncryptionTags");
    logger.log(logger.EXTREME, content);
   
    // Fix the problem that the img tag isn't properly closed
    int endPos, startPos, endData,slotStart, slotEnd;
    logger.log(logger.MEDIUM, "Checking table encryption tags");
    String eTag = "<table class=\"en-crypt-temp\"";
    for (int i=newContent.indexOf(eTag); i>0; i = newContent.indexOf(eTag,i+1)) {
      slotStart = newContent.indexOf("slot", i+1)+6;
      slotEnd = newContent.indexOf("\"",slotStart);
      String slot = newContent.substring(slotStart, slotEnd);
      startPos = newContent.indexOf("<td>", i+1)+4;
      endData = newContent.indexOf("</td>",startPos);
      String text = newContent.substring(startPos,endData);
      endPos = newContent.indexOf("</table>",i+1)+8;
      // Encrypt the text
      Pair<String,String> pair = Global.passwordSafe.get(slot);
      String password = pair.getFirst();
      String hint = pair.getSecond();
      EnCrypt crypt = new EnCrypt();
      String encrypted = crypt.encrypt(text, password, 64);

      // replace the table with an en-crypt tag.
      newContent = newContent.substring(0,i-1) +
        "<en-crypt-temp cipher=\"RC2\" length=\"64\" hint=\""+
        hint +"\" value=\""+
        encrypted +
        "\" />" +
        newContent.substring(endPos);
    }
   
    return newContent;
  }
 
  // Fix XML problems that Qt can't deal with
  public String fixStupidXMLProblems(String content) {
    logger.log(logger.HIGH, "Entering DBRunner.fixStupidXMLProblems");

    // Fix the problem that the document body isn't properly closed
    String newContent = new String(content);
    logger.log(logger.MEDIUM, "Inside fixStupidXMLProblems.  Old content:");
    logger.log(logger.EXTREME, content);
   
    // Fix the problem that the img tag isn't properly closed
    int endPos;
    logger.log(logger.MEDIUM, "Checking img tags");
    for (int i=newContent.indexOf("<img"); i>0; i = newContent.indexOf("<img",i+1)) {
      endPos = newContent.indexOf(">",i+1);
      String end = newContent.substring(endPos+1);
      newContent = newContent.subSequence(0,endPos) +"/>"+end;
    }
   
    // Fix the problem that the input tag isn't properly closed
    logger.log(logger.MEDIUM, "Checking input tags");
    for (int i=newContent.indexOf("<input"); i>0; i = newContent.indexOf("<input",i+1)) {
      endPos = newContent.indexOf(">",i+1);
      String end = newContent.substring(endPos+1);
      newContent = newContent.subSequence(0,endPos) +"/>"+end;
    }
   
   
    // Fix the problem that the <br> tag isn't properly closed
    logger.log(logger.MEDIUM, "Checking br tags");
    for (int i=newContent.indexOf("<br"); i>0; i = newContent.indexOf("<br",i+1)) {
      endPos = newContent.indexOf(">",i+1);
      String end = newContent.substring(endPos+1);
      newContent = newContent.subSequence(0,endPos) +"/>"+end;
    }
     
    // Fix the problem that the <hr> tag isn't properly closed
    logger.log(logger.MEDIUM, "Checking hr tags");
    for (int i=newContent.indexOf("<hr"); i>0; i = newContent.indexOf("<hr",i+1)) {
      endPos = newContent.indexOf(">",i+1);
      String end = newContent.substring(endPos+1);
      newContent = newContent.subSequence(0,endPos) +"/>"+end;
    }
   
    // Fix the problem that the <meta> tag isn't properly closed
    logger.log(logger.MEDIUM, "Checking meta tags");
    for (int i=newContent.indexOf("<meta"); i>0; i = newContent.indexOf("<meta",i+1)) {
      endPos = newContent.indexOf(">",i+1);
      String end = newContent.substring(endPos+1);
      newContent = newContent.subSequence(0,endPos) +"/>"+end;
    }
   
    logger.log(logger.MEDIUM, "Leaving fixStupidXMLProblems");
    logger.log(logger.HIGH, "Leaving DBRunner.fixStupidXMLProblems");
    return newContent.toString();
  }


  // Fix XML that Evernote thinks is invalid
  public String fixEnXMLCrap(String note) {
    logger.log(logger.EXTREME, "Entering EnmlConverter.fixEnXMLCrap");
    if (note == null)
      return null;
   
    int pos;
    StringBuffer buffer = new StringBuffer(note);
   
    logger.log(logger.EXTREME, "Converting <b/>");
    // change all <b/> to <b></b> because Evernote hates them if they happen in <span>
    pos = buffer.indexOf("<b/>");
    for (; pos>-1; ) {
      buffer.replace(pos, pos+4, "<b></b>");
      pos = buffer.indexOf("<b/>",pos);
    }
    // change all <br/> to <br></br> because Evernote hates them if they happen in <span>
    logger.log(logger.EXTREME, "converting <br/>");
    pos = buffer.indexOf("<br/>");
    for (; pos>-1; ) {
      buffer.replace(pos, pos+5, "<br></br>");
      pos = buffer.indexOf("<br/>",pos);
    }
   
    // change all <span> elements in lists because Evernote hates them if they happen
    int endPos = 0;
    int spanPos;
    pos = buffer.indexOf("<li>");
    spanPos = buffer.indexOf("<span>");
    // Get rid of empty spans in <li> elements
    pos = buffer.indexOf("<li>");
    spanPos = buffer.indexOf("<span/>");
    for (; pos>-1 && spanPos >-1;) {
      endPos = buffer.indexOf("</li>",pos);
      if (spanPos > pos && spanPos < endPos) {
        buffer.replace(spanPos,spanPos+7,"");
      }
      pos=buffer.indexOf("<li>",pos+1);
      spanPos = buffer.indexOf("<span/>",spanPos);
    }
   
    logger.log(logger.EXTREME, "Leaving EnmlConverter.fixEnXMLCrap");
    return buffer.toString();
  }
 
   // Fix stupid en-media problems
  public String fixEnMediaCrap(String note) {
    if (note == null)
      return null;
   
    StringBuffer buffer = new StringBuffer(note);
    // get rid of any </en-media> tags since they shouldn't exist.
    int pos = buffer.indexOf("</en-media>");
    for (; pos>-1; ) {
      buffer.replace(pos, pos+11, "");
      pos = buffer.indexOf("</en-media>",pos);
    }
   
   
    // Make sure we have a proper /> ending the en-media tag
    pos = buffer.indexOf("<en-media");
    for (; pos>-1; ) {
      pos=buffer.indexOf(">", pos);
      if (!buffer.substring(pos-1,pos).equals("/"))
      buffer.replace(pos, pos+1, " />");
      pos = buffer.indexOf("<en-media",pos);
    }
   
    return buffer.toString();
  }
}
TOP

Related Classes of cx.fbn.nevernote.evernote.EnmlConverter$TidyListener

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.