Source Code of org.openoffice.xmerge.converter.xml.OfficeDocument

/************************************************************************
 *
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 * 
 * Copyright 2008 by Sun Microsystems, Inc.
 *
 * OpenOffice.org - a multi-platform office productivity suite
 *
 * $RCSfile: OfficeDocument.java,v $
 * $Revision: 1.18 $
 *
 * This file is part of OpenOffice.org.
 *
 * OpenOffice.org is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License version 3
 * only, as published by the Free Software Foundation.
 *
 * OpenOffice.org is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Lesser General Public License version 3 for more details
 * (a copy is included in the LICENSE file that accompanied this code).
 *
 * You should have received a copy of the GNU Lesser General Public License
 * version 3 along with OpenOffice.org.  If not, see
 * <http://www.openoffice.org/license.html>
 * for a copy of the LGPLv3 License.
 *
 ************************************************************************/
package org.openoffice.xmerge.converter.xml;


import java.io.InputStream;
import java.io.OutputStream;
import java.io.Reader;
import java.io.BufferedReader;
import java.io.StringReader;
import java.io.InputStreamReader;
import java.io.BufferedInputStream;
import java.io.ByteArrayOutputStream;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.util.zip.ZipEntry;
import java.util.zip.ZipInputStream;
import java.util.Iterator;
import java.util.Map;
import java.util.HashMap;


import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.ParserConfigurationException;


import org.w3c.dom.Node;
import org.w3c.dom.Element;
import org.w3c.dom.Document;
import org.w3c.dom.DOMImplementation;
import org.w3c.dom.DocumentType;
import org.w3c.dom.NodeList;
import org.xml.sax.InputSource;
import org.w3c.dom.NamedNodeMap;
import org.xml.sax.SAXException;
import org.xml.sax.SAXParseException;


import org.openoffice.xmerge.util.Resources;
import org.openoffice.xmerge.util.Debug;


/**
 *  An implementation of <code>Document</code> for
 *  StarOffice documents.
 */
public abstract class OfficeDocument
    implements org.openoffice.xmerge.Document,
               OfficeConstants {


    /** Factory for <code>DocumentBuilder</code> objects. */
    private static DocumentBuilderFactory factory =
       DocumentBuilderFactory.newInstance();


    /** DOM <code>Document</code> of content.xml. */
    private Document contentDoc = null;


   /** DOM <code>Document</code> of meta.xml. */
    private Document metaDoc = null;


   /** DOM <code>Document</code> of settings.xml. */
    private Document settingsDoc = null;


    /** DOM <code>Document</code> of content.xml. */
    private Document styleDoc = null;
    
    /** DOM <code>Docuemtn</code> of META-INF/manifest.xml. */
    private Document manifestDoc = null;


    private String documentName = null;
    private String fileName = null;


    /** Resources object. */
    private Resources res = null;


    /**
     *  <code>OfficeZip</code> object to store zip contents from
     *  read <code>InputStream</code>.  Note that this member
     *  will still be null if it was initialized using a template
     *  file instead of reading from a StarOffice zipped
     *  XML file.
     */
    private OfficeZip zip = null;
       
    /** Collection to keep track of the embedded objects in the document. */
    private Map embeddedObjects = null;


    /**
     *  Default constructor.
     *
     *  @param  name  <code>Document</code> name.
     */
    public OfficeDocument(String name)
    {
        this(name, true, false);
    }




    /**
     *  Constructor with arguments to set <code>namespaceAware</code>
     *  and <code>validating</code> flags.
     *
     *  @param  name            <code>Document</code> name (may or may not
     *                          contain extension).
     *  @param  namespaceAware  Value for <code>namespaceAware</code> flag.
     *  @param  validating      Value for <code>validating</code> flag.
     */
    public OfficeDocument(String name, boolean namespaceAware, boolean validating) {


        res = Resources.getInstance();
        factory.setValidating(validating);
        factory.setNamespaceAware(namespaceAware);
        this.documentName = trimDocumentName(name);
        this.fileName = documentName + getFileExtension();
    }




    /**
     *  Removes the file extension from the <code>Document</code>
     *  name.
     *
     *  @param  name  Full <code>Document</code> name with extension.
     *
     *  @return  Name of <code>Document</code> without the extension.
     */
    private String trimDocumentName(String name) {
        String temp = name.toLowerCase();
        String ext = getFileExtension();


        if (temp.endsWith(ext)) {
            // strip the extension
            int nlen = name.length();
            int endIndex = nlen - ext.length();
            name = name.substring(0,endIndex);
        }


        return name;
    }




    /**
     *  Return a DOM <code>Document</code> object of the content.xml
     *  file.  Note that a content DOM is not created when the constructor
     *  is called.  So, either the <code>read</code> method or the
     *  <code>initContentDOM</code> method will need to be called ahead
     *  on this object before calling this method.
     *
     *  @return  DOM <code>Document</code> object.
     */
    public Document getContentDOM() {


        return contentDoc;
    }


 /**
     *  Return a DOM <code>Document</code> object of the meta.xml
     *  file.  Note that a content DOM is not created when the constructor
     *  is called.  So, either the <code>read</code> method or the
     *  <code>initContentDOM</code> method will need to be called ahead
     *  on this object before calling this method.
     *
     *  @return  DOM <code>Document</code> object.
     */
    public Document getMetaDOM() {


        return metaDoc;
    }




 /**
     *  Return a DOM <code>Document</code> object of the settings.xml
     *  file.  Note that a content DOM is not created when the constructor
     *  is called.  So, either the <code>read</code> method or the
     *  <code>initContentDOM</code> method will need to be called ahead
     *  on this object before calling this method.
     *
     *  @return  DOM <code>Document</code> object.
     */
    public Document getSettingsDOM() {


        return settingsDoc;
    }




    /**
     * Sets the content tree of the document.
     *
     * @param   newDom  <code>Node</code> containing the new content tree.
     */
    public void setContentDOM( Node newDom) {
        contentDoc = (Document)newDom;
    }


    
    /**
     * Sets the meta tree of the document.
     *
     * @param   newDom  <code>Node</code> containing the new meta tree.
     */
    public void setMetaDOM (Node newDom) {
        metaDoc = (Document)newDom;
    }


    
    /**
     * Sets the settings tree of the document.
     *
     * @param   newDom  <code>Node</code> containing the new settings tree.
     */
    public void setSettingsDOM (Node newDom) {
        settingsDoc = (Document)newDom;
    }
    


    /**
     * Sets the style tree of the document.
     *
     * @param   newDom  <code>Node</code> containing the new style tree.
     */
    public void setStyleDOM (Node newDom) {
        styleDoc = (Document)newDom;
    }
    


    /**
     *  Return a DOM <code>Document</code> object of the style.xml file.
     *  Note that this may return null if there is no style DOM.
     *  Note that a style DOM is not created when the constructor
     *  is called.  Depending on the <code>InputStream</code>, a
     *  <code>read</code> method may or may not build a style DOM.  When
     *  creating a new style DOM, call the <code>initStyleDOM</code> method
     *  first.
     *
     *  @return  DOM <code>Document</code> object.
     */
    public Document getStyleDOM() {


        return styleDoc;
    }




    /**
     *  Return the name of the <code>Document</code>.
     *
     *  @return  The name of <code>Document</code>.
     */
    public String getName() {


        return documentName;
    }




    /**
     *  Return the file name of the <code>Document</code>, possibly
     *  with the standard extension.
     *
     *  @return  The file name of <code>Document</code>.
     */
    public String getFileName() {


        return fileName;
    }




    /**
     *  Returns the file extension for this type of
     *  <code>Document</code>.
     *
     *  @return  The file extension of <code>Document</code>.
     */
    protected abstract String getFileExtension();




    /**
     * Returns all the embedded objects (graphics, formulae, etc.) present in
     * this document.
     *
     * @return An <code>Iterator</code> of <code>EmbeddedObject</code> objects.
     */
    public Iterator getEmbeddedObjects() {
        
        if (embeddedObjects == null && manifestDoc != null) {            
            embeddedObjects = new HashMap();           
            
            // Need to read the manifest file and construct a list of objects                       
            NodeList nl = manifestDoc.getElementsByTagName(TAG_MANIFEST_FILE);
                      
            // Dont create the HashMap if there are no embedded objects
            int len = nl.getLength();
            for (int i = 0; i < len; i++) {
                Node n = nl.item(i);
                
                NamedNodeMap attrs = n.getAttributes();
                
                String type = attrs.getNamedItem(ATTRIBUTE_MANIFEST_FILE_TYPE).getNodeValue();
                String path = attrs.getNamedItem(ATTRIBUTE_MANIFEST_FILE_PATH).getNodeValue();
                
                
                /*
                 * According to OpenOffice.org XML File Format document (ver. 1)
                 * there are only two types of embedded object:
                 *
                 *      Objects with an XML representation.
                 *      Objects without an XML representation.
                 *
                 * The former are represented by one or more XML files.
                 * The latter are in binary form.
                 */
                if (type.startsWith("application/vnd.sun.xml"))       
                {
                    if (path.equals("/")) {
                        // Exclude the main document entries
                        continue;
                    }
                    // Take off the trailing '/'
                    String name = path.substring(0, path.length() - 1);
                    embeddedObjects.put(name, new EmbeddedXMLObject(name, type, zip));
                }
                else if (type.equals("text/xml")) {
                    // XML entries are either embedded StarOffice doc entries or main
                    // document entries
                    continue;
                }
                else { // FIX (HJ): allows empty MIME type      
                    embeddedObjects.put(path, new EmbeddedBinaryObject(path, type, zip));
                }
            }
        }
        
        return embeddedObjects.values().iterator();
    }
    
    /**
     * Returns the embedded object corresponding to the name provided.
     * The name should be stripped of any preceding path characters, such as 
     * '/', '.' or '#'.
     *
     * @param   name    The name of the embedded object to retrieve.
     *
     * @return  An <code>EmbeddedObject</code> instance representing the named 
     *          object.
     */
    public EmbeddedObject getEmbeddedObject(String name) {
        if (name == null) {
            return null;
        }
        
        if (embeddedObjects == null) {
            getEmbeddedObjects();
        }
        
        if (embeddedObjects.containsKey(name)) {
            return (EmbeddedObject)embeddedObjects.get(name);
        }
        else {
            return null;
        }
    }
    
    
    /**
     * Adds a new embedded object to the document.
     *
     * @param   embObj  An instance of <code>EmbeddedObject</code>.
     */
    public void addEmbeddedObject(EmbeddedObject embObj) {
        if (embObj == null) {
            return;
        }
        
        if (embeddedObjects == null) {
            embeddedObjects = new HashMap();
        }
        
        embeddedObjects.put(embObj.getName(), embObj);
    }
    
    
    /**
     *  Read the Office <code>Document</code> from the given
     *  <code>InputStream</code>.
     *
     *  @param  is  Office document <code>InputStream</code>.
     *
     *  @throws  IOException  If any I/O error occurs.
     */
    public void read(InputStream is) throws IOException {


        Debug.log(Debug.INFO, "reading Office file");


        DocumentBuilder builder = null;


        try {
            builder = factory.newDocumentBuilder();
        } catch (ParserConfigurationException ex) {
            throw new OfficeDocumentException(ex);
        }


        // read in Office zip file format


        zip = new OfficeZip();
        zip.read(is);


        // grab the content.xml and
        // parse it into contentDoc.


        byte contentBytes[] = zip.getContentXMLBytes();


        if (contentBytes == null) {


            throw new OfficeDocumentException("Entry content.xml not found in file");
        }


        try {


            contentDoc = parse(builder, contentBytes);


        } catch (SAXException ex) {


            throw new OfficeDocumentException(ex);
        }


        // if style.xml exists, grab the style.xml
        // parse it into styleDoc.


        byte styleBytes[] = zip.getStyleXMLBytes();


        if (styleBytes != null) {


            try {


                styleDoc = parse(builder, styleBytes);


            } catch (SAXException ex) {


                throw new OfficeDocumentException(ex);
            }
        }


  byte metaBytes[] = zip.getMetaXMLBytes();


        if (metaBytes != null) {


            try {


                metaDoc = parse(builder, metaBytes);


            } catch (SAXException ex) {


                throw new OfficeDocumentException(ex);
            }
        }


  byte settingsBytes[] = zip.getSettingsXMLBytes();


        if (settingsBytes != null) {


            try {


                settingsDoc = parse(builder, settingsBytes);


            } catch (SAXException ex) {


                throw new OfficeDocumentException(ex);
            }
        }


        
        // Read in the META-INF/manifest.xml file
        byte manifestBytes[] = zip.getManifestXMLBytes();
        
        if (manifestBytes != null) {
            
            try {
                manifestDoc = parse(builder, manifestBytes);
            } catch (SAXException ex) {
                throw new OfficeDocumentException(ex);
            }
        }


    }




    /**
     *  Read the Office <code>Document</code> from the given
     *  <code>InputStream</code>.
     *
     *  @param  is  Office document <code>InputStream</code>.
     *  @param  isZip <code>boolean</code> Identifies whether 
     *                 a file is zipped or not
     *
     *  @throws  IOException  If any I/O error occurs.
     */
    public void read(InputStream is, boolean isZip) throws IOException {


        Debug.log(Debug.INFO, "reading Office file");


        DocumentBuilder builder = null;


        try {
            builder = factory.newDocumentBuilder();
        } catch (ParserConfigurationException ex) {
            throw new OfficeDocumentException(ex);
        }
  
  if (isZip)
  {
            read(is);
  }
  else{
      try{
    //System.out.println("\nParsing Input stream, validating?: "+builder.isValidating());
    //contentDoc=  builder.parse((InputStream)is);


               Reader r = secondHack(is);
               InputSource ins = new InputSource(r);
          org.w3c.dom.Document newDoc = builder.parse(ins);
          //org.w3c.dom.Document newDoc = builder.parse((InputStream)is);
          Element rootElement=newDoc.getDocumentElement();
                
          NodeList nodeList;
          Node tmpNode;
          Node rootNode = (Node)rootElement;
                if (newDoc !=null){
        /*content*/
                   contentDoc = createDOM(TAG_OFFICE_DOCUMENT_CONTENT);
                   rootElement=contentDoc.getDocumentElement();
                   rootNode = (Node)rootElement;
                   
                   // FIX (HJ): Include office:font-decls in content DOM
                   nodeList= newDoc.getElementsByTagName(TAG_OFFICE_FONT_DECLS);
                   if (nodeList.getLength()>0){
                       tmpNode = contentDoc.importNode(nodeList.item(0),true);
                       rootNode.appendChild(tmpNode);
                   }
                   
                   nodeList= newDoc.getElementsByTagName(TAG_OFFICE_AUTOMATIC_STYLES);
                   if (nodeList.getLength()>0){
                tmpNode = contentDoc.importNode(nodeList.item(0),true);
                rootNode.appendChild(tmpNode);
                   }
                   
                    nodeList= newDoc.getElementsByTagName(TAG_OFFICE_BODY);
                   if (nodeList.getLength()>0){
                tmpNode = contentDoc.importNode(nodeList.item(0),true);
                rootNode.appendChild(tmpNode);
                   }
                  
       /*Styles*/
                   styleDoc = createDOM(TAG_OFFICE_DOCUMENT_STYLES);
                   rootElement=styleDoc.getDocumentElement();
                   rootNode = (Node)rootElement;
           
                   // FIX (HJ): Include office:font-decls in styles DOM
                   nodeList= newDoc.getElementsByTagName(TAG_OFFICE_FONT_DECLS);
                   if (nodeList.getLength()>0){
                    tmpNode = styleDoc.importNode(nodeList.item(0),true);
                      rootNode.appendChild(tmpNode);
                   }
           
                   nodeList= newDoc.getElementsByTagName(TAG_OFFICE_STYLES);
                   if (nodeList.getLength()>0){
                tmpNode = styleDoc.importNode(nodeList.item(0),true);
                rootNode.appendChild(tmpNode);
                   }


                   // FIX (HJ): Include office:automatic-styles in styles DOM
                   nodeList= newDoc.getElementsByTagName(TAG_OFFICE_AUTOMATIC_STYLES);
                   if (nodeList.getLength()>0){
                      tmpNode = styleDoc.importNode(nodeList.item(0),true);
                      rootNode.appendChild(tmpNode);
                   }
           
                   // FIX (HJ): Include office:master-styles in styles DOM
                   nodeList= newDoc.getElementsByTagName(TAG_OFFICE_MASTER_STYLES);
                   if (nodeList.getLength()>0){
                       tmpNode = styleDoc.importNode(nodeList.item(0),true);
                       rootNode.appendChild(tmpNode);
                   }


       /*Settings*/
                   settingsDoc = createDOM(TAG_OFFICE_DOCUMENT_SETTINGS);
                   rootElement=settingsDoc.getDocumentElement();
                   rootNode = (Node)rootElement;
                   nodeList= newDoc.getElementsByTagName(TAG_OFFICE_SETTINGS);
                   if (nodeList.getLength()>0){
                tmpNode = settingsDoc.importNode(nodeList.item(0),true);
                rootNode.appendChild(tmpNode);
                   } 
       /*Meta*/
                   metaDoc = createDOM(TAG_OFFICE_DOCUMENT_META);
                   rootElement=metaDoc.getDocumentElement();
                   rootNode = (Node)rootElement;
                   nodeList= newDoc.getElementsByTagName(TAG_OFFICE_META);
                   if (nodeList.getLength()>0){
                tmpNode = metaDoc.importNode(nodeList.item(0),true);
                rootNode.appendChild(tmpNode);
                   } 
                }
      }
      catch (SAXException ex) {
    throw new OfficeDocumentException(ex);
      }
  }
      
    }






    /**
     *  Parse given <code>byte</code> array into a DOM
     *  <code>Document</code> object using the
     *  <code>DocumentBuilder</code> object.
     *
     *  @param  builder  <code>DocumentBuilder</code> object for parsing.
     *  @param  bytes    <code>byte</code> array for parsing.
     *
     *  @return  Resulting DOM <code>Document</code> object.
     *
     *  @throws  SAXException  If any parsing error occurs.
     */
    static Document parse(DocumentBuilder builder, byte bytes[])
        throws SAXException, IOException {


        Document doc = null;


        ByteArrayInputStream is = new ByteArrayInputStream(bytes);


        // TODO:  replace hack with a more appropriate fix.


        Reader r = hack(is);
        InputSource ins = new InputSource(r);
        doc = builder.parse(ins);


        return doc;
    }
    
    
    /** 
     * Method to return the MIME type of the document.
     *
     * @return  String  The document's MIME type.
     */
    protected abstract String getDocumentMimeType();




    /**
     *  Write out Office ZIP file format.
     *
     *  @param  os  XML <code>OutputStream</code>.
     *
     *  @throws  IOException  If any I/O error occurs.
     */
    public void write(OutputStream os) throws IOException {
        if (zip == null) {  
            zip = new OfficeZip();
      }
        
        initManifestDOM();
        
        Element domEntry;
        Element manifestRoot = manifestDoc.getDocumentElement();
                   
        // The EmbeddedObjects come first.
        Iterator embObjs = getEmbeddedObjects();
        while (embObjs.hasNext()) {
            EmbeddedObject obj = (EmbeddedObject)embObjs.next();
            obj.writeManifestData(manifestDoc);
            
            obj.write(zip);
        }
        
        // Add in the entry for the Pictures directory.  Always present.
        domEntry = manifestDoc.createElement(TAG_MANIFEST_FILE);
        domEntry.setAttribute(ATTRIBUTE_MANIFEST_FILE_PATH, "Pictures/");
        domEntry.setAttribute(ATTRIBUTE_MANIFEST_FILE_TYPE, "");
        manifestRoot.appendChild(domEntry);
        
  // Write content to the Zip file and then write any of the optional
        // data, if it exists.
  zip.setContentXMLBytes(docToBytes(contentDoc));
        
        domEntry = manifestDoc.createElement(TAG_MANIFEST_FILE);
        domEntry.setAttribute(ATTRIBUTE_MANIFEST_FILE_PATH, "content.xml");
        domEntry.setAttribute(ATTRIBUTE_MANIFEST_FILE_TYPE, "text/xml");
               
        manifestRoot.appendChild(domEntry);
      
  if (styleDoc != null) {
            zip.setStyleXMLBytes(docToBytes(styleDoc));
            
            domEntry = manifestDoc.createElement(TAG_MANIFEST_FILE);
            domEntry.setAttribute(ATTRIBUTE_MANIFEST_FILE_PATH, "styles.xml");
            domEntry.setAttribute(ATTRIBUTE_MANIFEST_FILE_TYPE, "text/xml");
            manifestRoot.appendChild(domEntry);
        }
        
        if (metaDoc != null) {
            zip.setMetaXMLBytes(docToBytes(metaDoc));
            
            domEntry = manifestDoc.createElement(TAG_MANIFEST_FILE);
            domEntry.setAttribute(ATTRIBUTE_MANIFEST_FILE_PATH, "meta.xml");            
            domEntry.setAttribute(ATTRIBUTE_MANIFEST_FILE_TYPE, "text/xml");
            manifestRoot.appendChild(domEntry);
        }
        
        if (settingsDoc != null) {
            zip.setSettingsXMLBytes(docToBytes(settingsDoc));
            
            domEntry = manifestDoc.createElement(TAG_MANIFEST_FILE);
            domEntry.setAttribute(ATTRIBUTE_MANIFEST_FILE_PATH, "settings.xml");
            domEntry.setAttribute(ATTRIBUTE_MANIFEST_FILE_TYPE, "text/xml");
            manifestRoot.appendChild(domEntry);
        }
        
        zip.setManifestXMLBytes(docToBytes(manifestDoc));


        zip.write(os);
    }




     /**
     *  Write out Office ZIP file format.
     *
     *  @param  os  XML <code>OutputStream</code>.
     *  @param  isZip <code>boolean</code>
     *
     *  @throws  IOException  If any I/O error occurs.
     */
    public void write(OutputStream os, boolean isZip) throws IOException {


        // Create an OfficeZip object if one does not exist.
        if (isZip){
            write(os);
  }
  else{
      try{
    DocumentBuilderFactory builderFactory = DocumentBuilderFactory.newInstance();
    DocumentBuilder builder= builderFactory.newDocumentBuilder();
    DOMImplementation domImpl = builder.getDOMImplementation();
    DocumentType docType =domImpl.createDocumentType("office:document","-//OpenOffice.org//DTD OfficeDocument 1.0//EN",null);
    org.w3c.dom.Document newDoc = domImpl.createDocument("http://openoffice.org/2000/office","office:document",null);
    
    
    Element rootElement=newDoc.getDocumentElement();
    rootElement.setAttribute("xmlns:office","http://openoffice.org/2000/office"); 
    rootElement.setAttribute("xmlns:style","http://openoffice.org/2000/style" ); 
    rootElement.setAttribute("xmlns:text","http://openoffice.org/2000/text"); 
    rootElement.setAttribute("xmlns:table","http://openoffice.org/2000/table"); 
    
    rootElement.setAttribute("xmlns:draw","http://openoffice.org/2000/drawing"); 
    rootElement.setAttribute("xmlns:fo","http://www.w3.org/1999/XSL/Format" ); 
    rootElement.setAttribute("xmlns:xlink","http://www.w3.org/1999/xlink" ); 
    rootElement.setAttribute("xmlns:dc","http://purl.org/dc/elements/1.1/" ); 
    rootElement.setAttribute("xmlns:meta","http://openoffice.org/2000/meta" ); 
    rootElement.setAttribute("xmlns:number","http://openoffice.org/2000/datastyle" ); 
    rootElement.setAttribute("xmlns:svg","http://www.w3.org/2000/svg" ); 
    rootElement.setAttribute("xmlns:chart","http://openoffice.org/2000/chart" ); 
    rootElement.setAttribute("xmlns:dr3d","http://openoffice.org/2000/dr3d" ); 
    rootElement.setAttribute("xmlns:math","http://www.w3.org/1998/Math/MathML" ); 
    rootElement.setAttribute("xmlns:form","http://openoffice.org/2000/form" ); 
    rootElement.setAttribute("xmlns:script","http://openoffice.org/2000/script" ); 
    rootElement.setAttribute("xmlns:config","http://openoffice.org/2001/config" ); 
    // #i41033# OASIS format needs the "office:class" set.
    if(getDocumentMimeType() == SXC_MIME_TYPE)
        rootElement.setAttribute("office:class","spreadsheet" ); 
    else if(getDocumentMimeType() == SXW_MIME_TYPE)
        rootElement.setAttribute("office:class","text" ); 
    rootElement.setAttribute("office:version","1.0"); 
            
    
    NodeList nodeList;
    Node tmpNode;
    Node rootNode = (Node)rootElement;
    if (metaDoc !=null){
        nodeList= metaDoc.getElementsByTagName(TAG_OFFICE_META);
        if (nodeList.getLength()>0){
      tmpNode = newDoc.importNode(nodeList.item(0),true);
      rootNode.appendChild(tmpNode);
        }
    }if (styleDoc !=null){
        nodeList= styleDoc.getElementsByTagName(TAG_OFFICE_STYLES);
        if (nodeList.getLength()>0){
      tmpNode = newDoc.importNode(nodeList.item(0),true);
      rootNode.appendChild(tmpNode);
                } 
        
    }if (settingsDoc !=null){
        nodeList= settingsDoc.getElementsByTagName(TAG_OFFICE_SETTINGS);
        if (nodeList.getLength()>0){
      tmpNode = newDoc.importNode(nodeList.item(0),true);
      rootNode.appendChild(tmpNode);
        }
    }
    if (contentDoc !=null){
        nodeList= contentDoc.getElementsByTagName(TAG_OFFICE_AUTOMATIC_STYLES);
        if (nodeList.getLength()>0){
      tmpNode = newDoc.importNode(nodeList.item(0),true);
               rootNode.appendChild(tmpNode);
        }
        
        nodeList= contentDoc.getElementsByTagName(TAG_OFFICE_BODY);  
        if (nodeList.getLength()>0){
      tmpNode = newDoc.importNode(nodeList.item(0),true);
      rootNode.appendChild(tmpNode);
        }
    }
    
    byte contentBytes[] = docToBytes(newDoc);
    //System.out.println(new String(contentBytes));
    os.write(contentBytes);
            } 
            catch(Exception exc){
    System.out.println("\nException in OfficeDocument.write():" +exc);
            }
      //byte contentBytes[] = docToBytes(contentDoc);
  }
    }




    /**
     *  <p>Write out a <code>org.w3c.dom.Document</code> object into a
     *  <code>byte</code> array.</p>
     *
     *  <p>TODO: remove dependency on com.sun.xml.tree.XmlDocument
     *  package!</p>
     *
     *  @param  Document  DOM <code>Document</code> object.
     *
     *  @return  <code>byte</code> array of DOM <code>Document</code>
     *           object.
     *
     *  @throws  IOException  If any I/O error occurs.
     */
    static byte[] docToBytes(Document doc)
        throws IOException {


        ByteArrayOutputStream baos = new ByteArrayOutputStream();


        java.lang.reflect.Constructor con;
        java.lang.reflect.Method meth;
        
        String domImpl = doc.getClass().getName();
        
        /*
         * We may have multiple XML parsers in the Classpath.
         * Depending on which one is first, the actual type of
         * doc may vary.  Need a way to find out which API is being
         * used and use an appropriate serialization method.
         */
        try {
            // First of all try for JAXP 1.0
            if (domImpl.equals("com.sun.xml.tree.XmlDocument")) {
                
                Debug.log(Debug.INFO, "Using JAXP");
                
                Class jaxpDoc = Class.forName("com.sun.xml.tree.XmlDocument");
            
                // The method is in the XMLDocument class itself, not a helper
                meth = jaxpDoc.getMethod("write", 
                            new Class[] { Class.forName("java.io.OutputStream") } );
                                     
                meth.invoke(doc, new Object [] { baos } );
            }
       else if (domImpl.equals("org.apache.crimson.tree.XmlDocument"))
      {
                Debug.log(Debug.INFO, "Using Crimson");
                
     Class crimsonDoc = Class.forName("org.apache.crimson.tree.XmlDocument");
     // The method is in the XMLDocument class itself, not a helper
                meth = crimsonDoc.getMethod("write", 
                            new Class[] { Class.forName("java.io.OutputStream") } );
                                     
                meth.invoke(doc, new Object [] { baos } );  
      }
            else if (domImpl.equals("org.apache.xerces.dom.DocumentImpl") 
            || domImpl.equals("org.apache.xerces.dom.DeferredDocumentImpl")) {
                
                Debug.log(Debug.INFO, "Using Xerces");
                
                // Try for Xerces
                Class xercesSer = 
                        Class.forName("org.apache.xml.serialize.XMLSerializer");
                
                // Get the OutputStream constructor
                // May want to use the OutputFormat parameter at some stage too
                con = xercesSer.getConstructor(new Class [] 
                        { Class.forName("java.io.OutputStream"),
                          Class.forName("org.apache.xml.serialize.OutputFormat") } );
                              
                
                // Get the serialize method
                meth = xercesSer.getMethod("serialize", 
                            new Class [] { Class.forName("org.w3c.dom.Document") } );                                           
                                           
                          
                // Get an instance
                Object serializer = con.newInstance(new Object [] { baos, null } );
                
                
                // Now call serialize to write the document
                meth.invoke(serializer, new Object [] { doc } );
            }
            else {
                // We don't have another parser  
                throw new IOException("No appropriate API (JAXP/Xerces) to serialize XML document: " + domImpl);
            }
        }
        catch (ClassNotFoundException cnfe) {
            throw new IOException(cnfe.toString());
        }
        catch (Exception e) {
            // We may get some other errors, but the bottom line is that
            // the steps being executed no longer work
            throw new IOException(e.toString());
        }


        byte bytes[] = baos.toByteArray();


        return bytes;
    }




    /**
     *  Initializes a new DOM <code>Document</code> with the content
     *  containing minimum OpenOffice XML tags.
     *
     *  @throws  IOException  If any I/O error occurs.
     */
    public final void initContentDOM() throws IOException {


        contentDoc = createDOM(TAG_OFFICE_DOCUMENT_CONTENT);


        // this is a work-around for a bug in Office6.0 - not really 
        // needed but StarCalc 6.0 will crash without this tag.
        Element root = contentDoc.getDocumentElement();


        Element child = contentDoc.createElement(TAG_OFFICE_FONT_DECLS);
        root.appendChild(child);


        child = contentDoc.createElement(TAG_OFFICE_AUTOMATIC_STYLES);
        root.appendChild(child);


        child = contentDoc.createElement(TAG_OFFICE_BODY);
        root.appendChild(child);
    }


    /**
     *  Initializes a new DOM <code>Document</code> with the content
     *  containing minimum OpenOffice XML tags.
     *
     *  @throws  IOException  If any I/O error occurs.
     */
    public final void initSettingsDOM() throws IOException {


        settingsDoc = createSettingsDOM(TAG_OFFICE_DOCUMENT_SETTINGS);


        // this is a work-around for a bug in Office6.0 - not really 
        // needed but StarCalc 6.0 will crash without this tag.
        Element root = settingsDoc.getDocumentElement();


        Element child = settingsDoc.createElement(TAG_OFFICE_SETTINGS);
        root.appendChild(child);
    }


    /**
     *  Initializes a new DOM Document with styles
     *  containing minimum OpenOffice XML tags.
     *
     *  @throws  IOException  If any I/O error occurs.
     */
    public final void initStyleDOM() throws IOException {


        styleDoc = createDOM(TAG_OFFICE_DOCUMENT_STYLES);
    }


    /**
     *  <p>Creates a new DOM <code>Document</code> containing minimum
     *  OpenOffice XML tags.</p>
     *
     *  <p>This method uses the subclass
     *  <code>getOfficeClassAttribute</code> method to get the
     *  attribute for <i>office:class</i>.</p>
     *
     *  @param  rootName  root name of <code>Document</code>.
     *
     *  @throws  IOException  If any I/O error occurs.
     */
    private final Document createSettingsDOM(String rootName) throws IOException {


        Document doc = null;


        try {


            DocumentBuilder builder = factory.newDocumentBuilder();
            doc = builder.newDocument();


        } catch (ParserConfigurationException ex) {


            throw new OfficeDocumentException(ex);


        }


        Element root = (Element) doc.createElement(rootName);
        doc.appendChild(root);


        root.setAttribute("xmlns:office", "http://openoffice.org/2000/office");
        root.setAttribute("xmlns:xlink", "http://openoffice.org/1999/xlink");
        root.setAttribute("xmlns:config", "http://openoffice.org/2001/config");
        root.setAttribute("office:version", "1.0");


        return doc;
    }




    /**
     *  <p>Creates a new DOM <code>Document</code> containing minimum
     *  OpenOffice XML tags.</p>
     *
     *  <p>This method uses the subclass
     *  <code>getOfficeClassAttribute</code> method to get the
     *  attribute for <i>office:class</i>.</p>
     *
     *  @param  rootName  root name of <code>Document</code>.
     *
     *  @throws  IOException  If any I/O error occurs.
     */
    private final Document createDOM(String rootName) throws IOException {


        Document doc = null;


        try {


            DocumentBuilder builder = factory.newDocumentBuilder();
            doc = builder.newDocument();


        } catch (ParserConfigurationException ex) {


            throw new OfficeDocumentException(ex);


        }


        Element root = (Element) doc.createElement(rootName);
        doc.appendChild(root);


        root.setAttribute("xmlns:office", "http://openoffice.org/2000/office");
        root.setAttribute("xmlns:style", "http://openoffice.org/2000/style");
        root.setAttribute("xmlns:text", "http://openoffice.org/2000/text");
        root.setAttribute("xmlns:table", "http://openoffice.org/2000/table");
        root.setAttribute("xmlns:draw", "http://openoffice.org/2000/drawing");
        root.setAttribute("xmlns:fo", "http://www.w3.org/1999/XSL/Format");
        root.setAttribute("xmlns:xlink", "http://www.w3.org/1999/xlink");
        root.setAttribute("xmlns:number", "http://openoffice.org/2000/datastyle");
        root.setAttribute("xmlns:svg", "http://www.w3.org/2000/svg");
        root.setAttribute("xmlns:chart", "http://openoffice.org/2000/chart");
        root.setAttribute("xmlns:dr3d", "http://openoffice.org/2000/dr3d");
        root.setAttribute("xmlns:math", "http://www.w3.org/1998/Math/MathML");
        root.setAttribute("xmlns:form", "http://openoffice.org/2000/form");
        root.setAttribute("xmlns:script", "http://openoffice.org/2000/script");
        root.setAttribute("office:class", getOfficeClassAttribute());
        root.setAttribute("office:version", "1.0");


        return doc;
    }




    /**
     *  Return the <i>office:class</i> attribute value.
     *
     *  @return  The attribute value.
     */
    protected abstract String getOfficeClassAttribute();




    /**
     *  <p>Hacked code to filter <!DOCTYPE> tag before
     *  sending stream to parser.</p>
     *
     *  <p>This hacked code needs to be changed later on.</p>
     *
     *  <p>Issue: using current jaxp1.0 parser, there is no way
     *  to turn off processing of dtds.  Current set of dtds
     *  have bugs, processing them will throw exceptions.</p>
     *
     *  <p>This is a simple hack that assumes the whole <!DOCTYPE>
     *  tag are all in the same line.  This is sufficient for
     *  current StarOffice 6.0 generated XML files.  Since this
     *  hack really needs to go away, I don't want to spend
     *  too much time in making it a perfect hack.</p>
     *  FIX (HJ): Removed requirement for DOCTYPE to be in one line
     *  FIX (HJ): No longer removes newlines
     *
     *  @param  is  <code>InputStream</code> to be filtered.
     *
     *  @return  Reader value without the <!DOCTYPE> tag.
     *
     *  @throws  IOException  If any I/O error occurs.
     */
    private static Reader hack(InputStream is) throws IOException {


        BufferedReader br = new BufferedReader(new InputStreamReader(is, "UTF-8"));
        StringBuffer buffer = new StringBuffer(is.available());


        String str = null;


        while ((str = br.readLine()) != null) {


            int sIndex = str.indexOf("<!DOCTYPE");


            if (sIndex > -1) {


                buffer.append(str.substring(0, sIndex));


                int eIndex = str.indexOf('>', sIndex + 8 );


                if (eIndex > -1) {


                    buffer.append(str.substring(eIndex + 1, str.length()));
                    // FIX (HJ): Preserve the newline
                    buffer.append("\n");


                } else {


                    // FIX (HJ): More than one line. Search for '>' in following lines
                    boolean bOK = false;
                    while ((str = br.readLine())!=null) {
                        eIndex = str.indexOf('>');
                        if (eIndex>-1) {
                            buffer.append(str.substring(eIndex+1));
                            // FIX (HJ): Preserve the newline
                            buffer.append("\n");
                            bOK = true;
              break;
                        }
                    }


                    if (!bOK) { throw new IOException("Invalid XML"); }
                }


            } else {


                buffer.append(str);
                // FIX (HJ): Preserve the newline
                buffer.append("\n");
            }
        }


        StringReader r = new StringReader(buffer.toString());
        return r;
    }


    /**
     *  <p>Transform the InputStream to a Reader Stream.</p>
     *
     *  <p>This hacked code needs to be changed later on.</p>
     *
     *  <p>Issue: the new oasis input file stream means 
     *  that the old input stream fails. see #i33702# </p>
     *
     *  @param  is  <code>InputStream</code> to be filtered.
     *
     *  @return  Reader value of the InputStream().
     *
     *  @throws  IOException  If any I/O error occurs.
     */
    private static Reader secondHack(InputStream is) throws IOException {


        BufferedReader br = new BufferedReader(new InputStreamReader(is, "UTF-8"));
   char[] charArray = new char[is.available()];


   br.read(charArray,0,is.available());
        String sStr = new String(charArray);
        StringBuffer sBuf = new StringBuffer(is.available());
   // ensure there is no trailing garbage after the end of the stream.
        int sIndex = sStr.lastIndexOf("</office:document>");
        sBuf.append(sStr.substring(0, sIndex));
        sBuf.append("</office:document>");
        StringReader r = new StringReader(sBuf.toString());
        return r;
    }
    
    
    /**
     * Method to create the initial entries in the manifest.xml file stored
     * in an SX? file.
     */
    private void initManifestDOM() throws IOException {
    
        try {
            DocumentBuilder builder = factory.newDocumentBuilder();
            DOMImplementation domImpl = builder.getDOMImplementation();


            DocumentType docType = domImpl.createDocumentType(TAG_MANIFEST_ROOT, 
                                        "-//OpenOffice.org//DTD Manifest 1.0//EN", 
                                        "Manifest.dtd");
      manifestDoc = domImpl.createDocument("manifest", TAG_MANIFEST_ROOT, docType);
        } catch (ParserConfigurationException ex) {
            throw new OfficeDocumentException(ex);
        }
        
        // Add the <manifest:manifest> entry
        Element manifestRoot = manifestDoc.getDocumentElement();
        
        manifestRoot.setAttribute("xmlns:manifest", "http://openoffice.org/2001/manifest");       
        
        Element docRoot = manifestDoc.createElement(TAG_MANIFEST_FILE);       


        docRoot.setAttribute(ATTRIBUTE_MANIFEST_FILE_PATH, "/");
        docRoot.setAttribute(ATTRIBUTE_MANIFEST_FILE_TYPE, getDocumentMimeType());
        
        manifestRoot.appendChild(docRoot);
    }
}
Source Code of org.openoffice.xmerge.converter.xml.OfficeDocument

Related Classes of org.openoffice.xmerge.converter.xml.OfficeDocument