Source Code of appl.Portal.Utils.XML.XMLwithDOMBuilder

/*
 *  This software and supporting documentation were developed by
 *
 *    Siemens Corporate Technology
 *    Competence Center Knowledge Management and Business Transformation
 *    D-81730 Munich, Germany
 *
 *    Authors (representing a really great team ;-) )
 *            Stefan B. Augustin, Thorbj�rn Hansen, Manfred Langen
 *
 *  This software is Open Source under GNU General Public License (GPL). 
 *  Read the text of this license in LICENSE.TXT 
 *  or look at www.opensource.org/licenses/
 *
 *  Once more we emphasize, that:
 *  THIS SOFTWARE IS MADE AVAILABLE,  AS IS,  WITHOUT ANY WARRANTY
 *  REGARDING  THE  SOFTWARE,  ITS  PERFORMANCE OR
 *  FITNESS FOR ANY PARTICULAR USE, FREEDOM FROM ANY COMPUTER DISEASES OR
 *  ITS CONFORMITY TO ANY SPECIFICATION. THE ENTIRE RISK AS TO QUALITY AND
 *  PERFORMANCE OF THE SOFTWARE IS WITH THE USER.
 *
 */




// XMLwithDOMBuilder


// ************ package ****************************************************
package appl.Portal.Utils.XML;


// ************ imports ******************************************************


import java.io.IOException;
import java.text.SimpleDateFormat;
import java.io.StringWriter;
import java.util.Hashtable;
import java.net.URL;


import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.apache.xml.serialize.XMLSerializer;
import org.apache.xml.serialize.OutputFormat;


import appl.Portal.Utils.LinkSearch.*;
import KFM.Exceptions.*;
import KFM.Converter;


/**
 * Class XMLwithDOMBuilder is concerned with constructing an XML document with information about Links.
 * It implements this construction process by using the DOM API of the W3C.
 * It is a concrete subclass of XMLBuilder.<P>
 *
 * For the DOM implementation, we use Xerces from the OpenXML project of Apache.
 * It is freely available at http://www.openxml.org.
 * The product chosen should not shine through at too many places, though.
 * Where a product-specific statement is inevitable, we mark it as '//@XERCES'.
 * A good idea would be to place product specific code (right now, this is only
 * within method getXML()) into a specific subclass like XMLwithDOMBuilder_XERCES.<P>
 *
 * <P> The XML to be generated follows the <A HREF="http://portal.erlm.siemens.de/interface.html">
 * specification of the MetaSearchEngine </A>. We made some extensions to this DTD, however:</P>
 *
 * <UL>
 *   <LI> Element ResultSet now supports two (optional) attributes BackToURL and BackToText </LI>
 * </UL>
 *
 * The exact source of the Links is not (and need not be) known in this class
 * and its subclasses, as it is provided by the interface ResultSet (and ResultItem).<P>
 *
 * A small note by Matthias: <BR>
 * One small goal was to format output to comfortable readability even in a texteditor
 * This couldn't be reached because the class "OutputFormat" understands only Indenting
 * but no lineseparating
 *
 * @version 1.0 (2000-03-20), ready to use
 * @see XMLBuilder
 * @see ResultSet
 * @see ResultItem
*/
public class XMLwithDOMBuilder extends XMLBuilder
{
    /**
     * The fully qualified name of the class that implements interface org.w3c.dom.Document.
     * Using this indirection, we manage to keep method build() portable to any
     * other DOM-supporting product (in Java).
     */
    static protected String mDocumentImplClass = "org.apache.xerces.dom.DocumentImpl";  //@XERCES


    /**
     * The XML encoding to use.
     */
    static protected String mEncoding = "ISO-8859-1";


    /**
     * The XMLDocument constructed.
     */
    private Document mXMLDocument;


    /**
     * Construct an instance of XMLwithDOMBuilder.
     */
    public XMLwithDOMBuilder ()
    {
    }


    /**
     * Construct the XML for the set of Links that is described by several ResultSets.
     * All the Links within all the ResultSets are concatenated. The order of the ResultSets
     * within the array is maintained.
     * The result is 'memorized' somehow, so that a subsequent call of getXML()
     * can return the XML document.
     *
     * @param aResultSetArray an array of ResultSets describing all the Links.
     */
    public void build( ResultSet[] aResultSetArray )
    {
        SimpleDateFormat formatter = new SimpleDateFormat ("yyyy-MM-dd");


        ResultSet tResultSet;
        String tAttrValue;
        String[] tAttrArray;
        Hashtable[] tAttrHashtable;
        Hashtable tMoreAttr;
        int tItemCount = 0;


        Element tResultSetElem, tResultElem, tTagToIgnoreElem, tKeyElem, tCategoryElem, tSummaryElem;


        // Some characteristics of the last ResultItem visited.
        // We need this to support grouping properties like GROUP_BY_FIRST_LETTER_TITLE.
        String tLast_FirstLetterTitle   = null;
        String tLast_FirstLetterKeyword = null;
        String tLast_FirstLetterUrl     = null;
        String tLast_Keyword            = null;
        String tLast_CategoryPath       = null;


        try
        {
            // create the XML document
            //========================


            mXMLDocument = ( Document ) Class.forName( mDocumentImplClass ).newInstance();


            // create XML element 'ResultSet' as the root of the XML document
            //===============================================================


            tResultSetElem = mXMLDocument.createElement( "ResultSet" );
            mXMLDocument.appendChild( tResultSetElem );


            // iterate over all the ResultSets within aResultSetArray.
            for (int i=0; i < aResultSetArray.length; i++) {
                tResultSet = aResultSetArray[i];




                // set Attributes of XML element 'ResultSet'
                //==========================================================
                tAttrValue = mBackToURL;
                if( tAttrValue != null )
                    tResultSetElem.setAttribute( "BackToURL", tAttrValue );
                tAttrValue = mBackToText;
                if( tAttrValue != null )
                    tResultSetElem.setAttribute( "BackToText", tAttrValue );


                // set further Attributes of XML element 'ResultSet'
                // (we query the first ResultSet to obtain these attributes)
                //==========================================================
                tAttrValue = tResultSet.getOrigin();
                if( tAttrValue != null )
                    tResultSetElem.setAttribute( "SeName", tAttrValue );


                tAttrValue = tResultSet.getSearchTerm();
                if( tAttrValue != null )
                    tResultSetElem.setAttribute( "QueryString", tAttrValue );


                while( tResultSet.hasMoreItems() )
                {
                    tItemCount++;


                    // This loop iterates as long as there are resultitems available
                    // for every Iteration one Result Element is defined with the
                    // element specific attributes and a few subelements "Summary", "key" , and "Category" are added
                    // and then the Result Element itself is added to the ResultSet
                    ResultItem CurItem = tResultSet.nextItem();


                    // create next XML element 'Result' as a child of XML element 'ResultSet'
                    //=======================================================================


                    tResultElem = mXMLDocument.createElement( "Result" );
                    tResultSetElem.appendChild( tResultElem );


                    // set Attributes of XML element 'Result'
                    //=======================================


                    tMoreAttr = CurItem.getMoreAttributes();


                    String tURL = CurItem.getURL();
                    if( tURL != null ) {
                        tResultElem.setAttribute( "OriginalUrl", tURL );
                    }


                    tAttrValue = CurItem.getTitle();
                    if( tAttrValue != null ) {
                        tResultElem.setAttribute( "Title", tAttrValue );


                        // We have to examine if the group property GROUP_BY_FIRST_LETTER_TITLE has to be applied.
                        // For this, compare the first letter of the current title to tLast_FirstLetterTitle.
                        if (isGroupSet(GROUP_BY_FIRST_LETTER_TITLE) && tAttrValue.length() != 0) {
                            String tGroupValue = getGroupValueIfDifferent(tAttrValue.substring(0,1), tLast_FirstLetterTitle);
                            if (tGroupValue != null) {
                                tLast_FirstLetterTitle = tGroupValue;


                                // create new XML element 'TagToIgnore' as a child of XML element 'Result'
                                //========================================================================
                                // <TagToIgnore groupBy="firstLetterTitle" groupValue="A" src="SieMapIndex"/>
                                tTagToIgnoreElem = mXMLDocument.createElement( "TagToIgnore" );
                                tResultElem.appendChild( tTagToIgnoreElem );
                                tTagToIgnoreElem.setAttribute( "groupBy", "firstLetterTitle");
                                tTagToIgnoreElem.setAttribute( "groupValue", tGroupValue);
                                if (mCreator != null) {
                                    tTagToIgnoreElem.setAttribute( "src", mCreator);
                                }
                            }
                        }
                    }


                    tAttrValue = CurItem.getLanguage();
                    if( tAttrValue != null )
                        tResultElem.setAttribute( "DocLanguage", tAttrValue );


                    // SrcDepartment is not directly supported by ResultItem
                    tAttrValue = (String) tMoreAttr.get("SourceUnit");
                    if( tAttrValue != null )
                        tResultElem.setAttribute( "SrcDepartment", tAttrValue);


                    // HighlightedUrl is not directly supported by ResultItem
                    tAttrValue = (String) tMoreAttr.get("FeedbackModuleUrl");
                    if( tAttrValue != null )
                        tResultElem.setAttribute( "HighlightedUrl", tAttrValue );


                    if( CurItem.getHitScore()!= null )
                        tResultElem.setAttribute( "Score", CurItem.getHitScore().toString() );


                    if( CurItem.getLastModified()!= null )
                        tResultElem.setAttribute( "ModifiedDate", formatter.format(CurItem.getLastModified()) );


                    // ExpiresDate is not directly supported by ResultItem.
                    // It should only be created if mCreateExpiresDate was set to true.
                    if (mCreateExpiresDate) {
                        tAttrValue = (String) tMoreAttr.get("ExpiresDate");
                        if ( tAttrValue != null )
                            tResultElem.setAttribute( "ExpiresDate", tAttrValue );
                    }


                    if( CurItem.getSize()!= null )
                        tResultElem.setAttribute( "DocSize", CurItem.getSize().toString() );


                    // MimeType is not directly supported by ResultItem
                    tAttrValue = (String) tMoreAttr.get("MimeType");
                    if( tAttrValue != null )
                        tResultElem.setAttribute( "MimeType", tAttrValue );


                    tAttrValue = CurItem.getAuthor();
                    if( tAttrValue != null )
                        tResultElem.setAttribute( "Author", tAttrValue );


                    tAttrValue = CurItem.getDescription();
                    if( tAttrValue != null )
                    {
                        tSummaryElem = mXMLDocument.createElement( "Summary" );
                        tSummaryElem.appendChild( mXMLDocument.createTextNode( tAttrValue) );
                        tResultElem.appendChild( tSummaryElem );
                    }


                    // We have to examine if the group property GROUP_BY_CATEGORY_PATH has to be applied.
                    // For this, compare the category path of the current ResultItem (if present) to tLast_CategoryPath.
                    if (isGroupSet(GROUP_BY_CATEGORY_PATH)) {
                        String tGroupValue = (String) tMoreAttr.get("CategoryPath");
                        if (tGroupValue != null && ! tGroupValue.equals(tLast_CategoryPath)) {
                            tLast_CategoryPath = tGroupValue;


                            // create new XML element 'TagToIgnore' as a child of XML element 'Result'
                            //========================================================================
                            // <TagToIgnore groupBy="categoryPath" groupValue="SieMap : Market"
                            //                            categoryUrl="http:...." src="SieMapIndex"/>
                            tTagToIgnoreElem = mXMLDocument.createElement( "TagToIgnore" );
                            tResultElem.appendChild( tTagToIgnoreElem );
                            tTagToIgnoreElem.setAttribute( "groupBy", "categoryPath");
                            tTagToIgnoreElem.setAttribute( "groupValue", tGroupValue);
                            String tCategoryUrl = (String) tMoreAttr.get("CategoryViewerUrl");
                            if (tCategoryUrl != null)
                                tTagToIgnoreElem.setAttribute( "categoryUrl", tCategoryUrl);
                            if (mCreator != null) {
                                tTagToIgnoreElem.setAttribute( "src", mCreator);
                            }
                        }
                    }


                    // We have to examine if the group property GROUP_BY_FIRST_LETTER_KEYWORD has to be applied.
                    // For this, compare the first letter of the current keyword (if present) to tLast_FirstLetterKeyword.
                    tAttrValue = (String) tMoreAttr.get("MasterKeyword");
                    if (isGroupSet(GROUP_BY_FIRST_LETTER_KEYWORD) && tAttrValue != null && tAttrValue.length() != 0) {
                        String tGroupValue = getGroupValueIfDifferent(tAttrValue.substring(0,1), tLast_FirstLetterKeyword);
                        if (tGroupValue != null) {
                            tLast_FirstLetterKeyword = tGroupValue;


                            // create new XML element 'TagToIgnore' as a child of XML element 'Result'
                            //========================================================================
                            // <TagToIgnore groupBy="firstLetterKeyword" groupValue="A" src="SieMapIndex"/>
                            tTagToIgnoreElem = mXMLDocument.createElement( "TagToIgnore" );
                            tResultElem.appendChild( tTagToIgnoreElem );
                            tTagToIgnoreElem.setAttribute( "groupBy", "firstLetterKeyword");
                            tTagToIgnoreElem.setAttribute( "groupValue", tGroupValue);
                            if (mCreator != null) {
                                tTagToIgnoreElem.setAttribute( "src", mCreator);
                            }
                        }
                    }


                    // We have to examine if the group property GROUP_BY_KEYWORD has to be applied.
                    // For this, compare the keyword of the current ResultItem (if present) to tLast_Keyword.
                    if (isGroupSet(GROUP_BY_KEYWORD)) {
                        // Note: tAttrValue is still the MasterKeyword
                        if (tAttrValue != null && ! tAttrValue.equals(tLast_Keyword)) {
                            tLast_Keyword = tAttrValue;


                            // create new XML element 'TagToIgnore' as a child of XML element 'Result'
                            //========================================================================
                            // <TagToIgnore groupBy="keyword" groupValue="Suchmaschinen" src="SieMapIndex"/>
                            tTagToIgnoreElem = mXMLDocument.createElement( "TagToIgnore" );
                            tResultElem.appendChild( tTagToIgnoreElem );
                            tTagToIgnoreElem.setAttribute( "groupBy", "keyword");
                            tTagToIgnoreElem.setAttribute( "groupValue", tAttrValue);
                            if (mCreator != null) {
                                tTagToIgnoreElem.setAttribute( "src", mCreator);
                            }
                        }
                    }


                    tAttrHashtable = CurItem.getKeywords();
                    if( tAttrHashtable != null ) {
                        int tSize = tAttrHashtable.length;
                        String tName;
                        for (int j=0; j<tSize; j++)
                        {
                            // get the information available for the j-th keyword of the current <Result> (only name)
                            tName = (String) tAttrHashtable[j].get("name");
                            tKeyElem = mXMLDocument.createElement( "key" );
                            if (tName != null)
                                tKeyElem.setAttribute( "word", tName );
                            tResultElem.appendChild( tKeyElem );
                        }
                    }


                    tAttrHashtable = CurItem.getCategories();
                    if( tAttrHashtable != null ) {
                        int tSize = tAttrHashtable.length;
                        String tName, tLink;
                        for (int j=0; j<tSize; j++)
                        {
                            // get the information available for the j-th category of the current <Result> (name, link)
                            tName = (String) tAttrHashtable[j].get("name");
                            tLink = (String) tAttrHashtable[j].get("link");
                            tCategoryElem = mXMLDocument.createElement( "Category" );
                            if (tName != null)
                                tCategoryElem.setAttribute( "name", tName );
                            if (tLink != null)
                                tCategoryElem.setAttribute( "link", tLink );
                            tResultElem.appendChild( tCategoryElem );
                        }
                    }
                }


                // some final Attribute of XML element 'ResultSet' is the number of results
                // (we only know the value after the loop)
                tResultSetElem.setAttribute( "NumResults", String.valueOf(tItemCount) );


            } // for
        }
        catch( Exception e)
        {
            e.printStackTrace();
        }


    }


    /**
     * Return the XML document as a String.
     *
     *  @return the XML document as a String
     */
    public String getXMLasString()
    {
        StringWriter tXMLWriter = new StringWriter();


        try
        {
            // a serializer
            OutputFormat outputFormat = new OutputFormat( mXMLDocument, null, true );   //@XERCES
            outputFormat.setEncoding(mEncoding);                                        //@XERCES
            XMLSerializer ser = new XMLSerializer( tXMLWriter, outputFormat );          //@XERCES
            ser.serialize( mXMLDocument );                                              //@XERCES
            return tXMLWriter.toString();
        }
        catch( IOException e )
        {
            throw new ProgrammerException("XMLwithDOMBuilder: getXML returns exception: " + e.getMessage());
        }
    }


    /**
     * Return the XML document as a DOM Node.
     *
     * @return the XML document as a DOM Node
     */
    public Node getXMLasDOM()
    {
        return (Node) mXMLDocument;
    }


}
Source Code of appl.Portal.Utils.XML.XMLwithDOMBuilder

Related Classes of appl.Portal.Utils.XML.XMLwithDOMBuilder