Package com.dappit.Dapper.parser

Source Code of com.dappit.Dapper.parser.DomDocumentBuilder

package com.dappit.Dapper.parser;

import java.util.Iterator;
import java.util.List;
import java.util.Vector;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;

import org.w3c.dom.CDATASection;
import org.w3c.dom.Comment;
import org.w3c.dom.DOMException;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.EntityReference;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.w3c.dom.Text;

* @author Ohad Serfaty
* A class for building DOM documents from mozilla's content sink instructions
* supported operations are : OpenNode <tag name> CloseNode <tag name> AddText
* <content> AddLeaf <tag name> WriteAttributeKey <key> - in pair with the next
* op : WriteAttributeValue <value> CloseLead AddComment AddEntity
* Unsupported ( fot the time being ) : AddInstruction AddTitle
* Note that this class is reusable , you can use reset() to clear the content
* of the dom.
public class DomDocumentBuilder
  private static DocumentBuilder documentBuilder = null;

  private static char charMinusOne = (char) -1;

  public static String getCDATASection(String domArgument)
    if (!domArgument.contains("CDATA"))
      return null;
    Pattern pat = Pattern.compile("(.*)\\<\\!(\\s*)\\[CDATA(.*)\\]\\]\\>(.*)", Pattern.DOTALL + Pattern.MULTILINE);
    Matcher mat = pat.matcher(domArgument);
    if (mat.find())
      String group3 =;
      if (group3.startsWith("["))
        group3 = group3.replaceFirst("\\[", "");
      String result = + group3 +;
      return result;
    return null;

   * Finalize and build the dom document.
   * @return
  public Document buildDocument(InstructionsPool instructionsPool)
    throws ParserConfigurationException
    if( documentBuilder == null )
      documentBuilder = DocumentBuilderFactory.newInstance().newDocumentBuilder();

    // System.out.println("building document...");
    Document resultDocument = documentBuilder.newDocument();
    Element currentElement = null;
    List<Integer> operations = instructionsPool.operations;
    List<String> arguments = instructionsPool.arguments;
    boolean isInLeaf = false;
    boolean closeHtml = true;

    for (int i=0; i<operations.size(); i++)
      int domOperation = operations.get(i);
      String domArgument = arguments.get(i);
      //System.out.println("Operation :" + ParserInstruction.getOperationString(domOperation)+" Arg:~" + domArgument+"~");
      String currentNamespaceURI = currentElement == null ? null : currentElement.getNamespaceURI();
      switch (domOperation)
      // Open node :
      case ParserInstruction.OpenNode:
        closeHtml = true;
        Element childNode = createElementNS(resultDocument, currentNamespaceURI, domArgument.toLowerCase());
        if (currentElement == null)
          currentElement = childNode;
          if (!domArgument.equalsIgnoreCase("html"))
            currentElement = childNode;
            closeHtml = false;
      // Close node :
      case ParserInstruction.CloseNode:
        if (currentElement == null)
          System.err.println("Error : Close Node where no OpenNode was called. trying to fix...");
          // this.dump();
        else if (closeHtml)
          Node parentNode = currentElement.getParentNode();
          if( parentNode == resultDocument )
            currentElement = null;
            currentElement = (Element) parentNode;

      case ParserInstruction.AddText:
      case ParserInstruction.AddContent:
        // System.out.println(currentElement.getNodeName() +" : Adding
        // text :" + domArgument);
        // check : try and resolve this with a <newline> from mozilla
        // instead :
        boolean script = false;
        boolean style = false;

        if (currentElement.getNodeName().equalsIgnoreCase("script"))
          script = true;
        else if (currentElement.getNodeName().equalsIgnoreCase("style"))
          style = true;
          domArgument = DomDocumentBuilder.fixText(domArgument);

        // System.out.println("Body content :" + domArgument);
//         System.out.println("AddText "+domArgument.length());
        if (domArgument.length() >= 1)
          if (!script && !style)
            Text textNode = resultDocument.createTextNode(domArgument);
            domArgument = domArgument.trim();
            String cdata = getCDATASection(domArgument);

            if (cdata != null)
              if (script)
                cdata = DomDocumentBuilder.fixText(cdata);
                cdata = DomDocumentBuilder.fixText(domArgument);
              CDATASection cdataSection = resultDocument.createCDATASection(cdata);
              domArgument = DomDocumentBuilder.fixText(domArgument);
              Text textNode = resultDocument.createTextNode(domArgument);
      case ParserInstruction.AddLeaf:
        Element leafElement = createElementNS(resultDocument, currentNamespaceURI, domArgument);
        currentElement = leafElement;
        isInLeaf = true;
      case ParserInstruction.WriteAttributeKey:
        // add an attribute with the next lookahead operation :
        String key = domArgument;

        domOperation = operations.get(i); // Fetch the next operation , must be WriteAttributeValue
        String value = arguments.get(i); // Feth the attributes value.

        if( key.trim().equalsIgnoreCase("xmlns") )
          currentElement = (Element) resultDocument.renameNode( currentElement, value, currentElement.getNodeName() );
        else if( !domArgument.trim().equalsIgnoreCase("_moz-userdefined") )
          trySetAttribute( currentElement, domArgument.toLowerCase(), DomDocumentBuilder.fixText(value) );
      case ParserInstruction.CloseLeaf:
        if (isInLeaf)
          currentElement = (Element) currentElement.getParentNode();
          isInLeaf = false;
      case ParserInstruction.AddEntity:
        EntityReference entity = resultDocument.createEntityReference(domArgument);
        // a bugfix for a c++ problem in the mozilla parser:
        if (!Character.isDigit(domArgument.charAt(0)))
          entity.appendChild(resultDocument.createTextNode("&" + domArgument + ";"));
      case ParserInstruction.AddComment:
        Comment comment = resultDocument.createComment(domArgument);
      case ParserInstruction.SetTitle:
        Element titleNode = createElementNS(resultDocument, currentNamespaceURI, "title");
        NodeList headElements = resultDocument.getElementsByTagName("head");
        // Add the title with the new text :
        if (headElements.getLength() > 0)
    return resultDocument;

  public static String fixText(String text)
    return text;
//    StringBuilder fixedText = new StringBuilder(text.length()+6);
//    char[] charArray = text.toCharArray();
//    boolean textChanged = false;
//    for (int i = 0; i < charArray.length; i++)
//    {
//      char ch = charArray[i];
//      if (ch == '&')
//      {
//        char ch2 = charArray.length >= i + 2 ? charArray[i + 1] : charMinusOne;
//        char ch3 = charArray.length >= i + 3 ? charArray[i + 2] : charMinusOne;
//        char ch4 = charArray.length >= i + 4 ? charArray[i + 3] : charMinusOne;
//        char ch5 = charArray.length >= i + 5 ? charArray[i + 4] : charMinusOne;
//        char ch6 = charArray.length >= i + 6 ? charArray[i + 5] : charMinusOne;
//        // char ch7 = charArray.length > i+7 ? charArray[i+6] :
//        // charMinusOne;
//        // System.out.println("ch3:" + ch3 +" ch7:" + ch6);
//        if (ch2 == '#')
//        {
//          if (ch3 == '1')
//          {
//            if ((ch4 == '0' && ch5 == ';'))
//              i = i + 4;
//          }
//          else if (ch3 == '9' && ch4 == ';')
//          {
//            i = i + 3;
//          }
//        }
//        else if (ch2 == 'l' && ch3 == 't' && ch4 == ';')
//        {
//          fixedText.append('>');
//          i = i + 2;
//        }
//        else if (ch2 == 'g' && ch3 == 't' && ch4 == ';')
//        {
//          fixedText.append('<');
//          i = i + 3;
//        }
//        else if (ch2 == 'a' && ch3 == 'm' && ch4 == 'p' && ch5 == ';')
//        {
//          fixedText.append('&');
//          i = i + 4;
//        }
//        else if (ch2 == 'q' && ch3 == 'u' && ch4 == 'o' && ch5 == 't' && ch6 == ';')
//        {
//          fixedText.append('"');
//          i = i + 5;
//        }
//        textChanged = true;
//      }
//      else if (ch == '\n')
//        fixedText.append(ch);
//      else if (ch == 32)
//      {
//        fixedText.append(' ');
//        textChanged = true;
//      }
//      else if (ch < 32 && ch > 0 && ch != 9)
//        textChanged = true;
//      else
//        fixedText.append(ch);
//    }
//    if (!textChanged)
//      return text;
//    return fixedText.toString();
//  Old version of the fixText function :
//  private static final String String32 = new String(new byte[]{ 32 });
//  private static final String String0 = new String(new byte[]{ 0 });
//  private static final String String1 = new String(new byte[]{ 0x1 });
//  private static final String String14 = new String(new byte[]{ 0x14 });
//  private static final String String1d = new String(new byte[]{ 0x1d });
//  private static final String String0xf = new String(new byte[]{ 0xf });
//  private static final String String0x1A = new String(new byte[]{ 0x1A });
//  private static final String String0x12 = new String(new byte[]{ 0x12 });
//  private static final String String0x8 = new String(new byte[]{ 0x8 });
//  private static final String String0x1f = new String(new byte[]{ 0x1f });
//  private static final String String0x2 = new String(new byte[]{ 0x2 });
//  private static final String String0x7 = new String(new byte[]{ 0x7 });
//  private static final String String0x18 = new String(new byte[]{ 0x18 });
//  private static final String String0x19 = new String(new byte[]{ 0x19 });
//  private static final String String0x1B = new String(new byte[]{ 0x1B });
//  private static final String String0x1C = new String(new byte[]{ 0x1C });
//  private static final String String0x11 = new String(new byte[]{ 0x11 });
//  private static final String String0x10 = new String(new byte[]{ 0x10 });
//  private static final String String0x13 = new String(new byte[]{ 0x13 });
//  private static String fixTextOld(String text)
//  {
//    String fixedText = new String(text);
//    fixedText = fixedText.replaceAll("&#10;", "");
//    fixedText = fixedText.replaceAll("&#9;", "");
//    fixedText = fixedText.replaceAll("&#160;", " ");
//    fixedText = fixedText.replaceAll("&#194;", "\"");
//    fixedText = fixedText.replaceAll("&quot;", "\"");
//    fixedText = fixedText.replaceAll("&lt;", "<");
//    fixedText = fixedText.replaceAll("&gt;", ">");
//    fixedText = fixedText.replaceAll("&amp;", "&");
//    fixedText = fixedText.replaceAll(String32, " ");
//    fixedText = fixedText.replaceAll("["+String0+String1+String14+String1d+String0xf + String0xf+
//        String0x1A + String0x12 + String0x8 + String0x1f+ String0x2+String0x7+String0x18+String0x19+String0x1B+String0x1C+
//        String0x11+String0x10+String0x13+"]" , "");
//    return fixedText;
//  }

   * Try setting an attribute on a given Node, but don't throw any exceptions
   * if the operation fails because of an invalid character
  private static void trySetAttribute( Element element, String name, String value )
      element.setAttribute( name, value );
    catch( DOMException e )
      System.err.println( "Failed to set attribute: " + name + " => " + value + "; continuing anyway" );

   * Wrapper around createElementNS that correctly handles a qualifiedName
   * containing a prefix (<tt>&lt;foo:bar&gt;</tt>) with no namespaceURI
  private static Element createElementNS( Document doc, String namespaceURI, String qualifiedName )
    if( namespaceURI == null )
      return doc.createElement( qualifiedName );
      return doc.createElementNS( namespaceURI, qualifiedName );

Related Classes of com.dappit.Dapper.parser.DomDocumentBuilder

Copyright © 2018 All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact