/*
* This software and supporting documentation were developed by
*
* Siemens Corporate Technology
* Competence Center Knowledge Management and Business Transformation
* D-81730 Munich, Germany
*
* Authors (representing a really great team ;-) )
* Stefan B. Augustin, Thorbj�rn Hansen, Manfred Langen
*
* This software is Open Source under GNU General Public License (GPL).
* Read the text of this license in LICENSE.TXT
* or look at www.opensource.org/licenses/
*
* Once more we emphasize, that:
* THIS SOFTWARE IS MADE AVAILABLE, AS IS, WITHOUT ANY WARRANTY
* REGARDING THE SOFTWARE, ITS PERFORMANCE OR
* FITNESS FOR ANY PARTICULAR USE, FREEDOM FROM ANY COMPUTER DISEASES OR
* ITS CONFORMITY TO ANY SPECIFICATION. THE ENTIRE RISK AS TO QUALITY AND
* PERFORMANCE OF THE SOFTWARE IS WITH THE USER.
*
*/
// XMLwithDOMBuilder
// ************ package ****************************************************
package appl.Portal.Utils.XML;
// ************ imports ******************************************************
import java.io.IOException;
import java.text.SimpleDateFormat;
import java.io.StringWriter;
import java.util.Hashtable;
import java.net.URL;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.apache.xml.serialize.XMLSerializer;
import org.apache.xml.serialize.OutputFormat;
import appl.Portal.Utils.LinkSearch.*;
import KFM.Exceptions.*;
import KFM.Converter;
/**
* Class XMLwithDOMBuilder is concerned with constructing an XML document with information about Links.
* It implements this construction process by using the DOM API of the W3C.
* It is a concrete subclass of XMLBuilder.<P>
*
* For the DOM implementation, we use Xerces from the OpenXML project of Apache.
* It is freely available at http://www.openxml.org.
* The product chosen should not shine through at too many places, though.
* Where a product-specific statement is inevitable, we mark it as '//@XERCES'.
* A good idea would be to place product specific code (right now, this is only
* within method getXML()) into a specific subclass like XMLwithDOMBuilder_XERCES.<P>
*
* <P> The XML to be generated follows the <A HREF="http://portal.erlm.siemens.de/interface.html">
* specification of the MetaSearchEngine </A>. We made some extensions to this DTD, however:</P>
*
* <UL>
* <LI> Element ResultSet now supports two (optional) attributes BackToURL and BackToText </LI>
* </UL>
*
* The exact source of the Links is not (and need not be) known in this class
* and its subclasses, as it is provided by the interface ResultSet (and ResultItem).<P>
*
* A small note by Matthias: <BR>
* One small goal was to format output to comfortable readability even in a texteditor
* This couldn't be reached because the class "OutputFormat" understands only Indenting
* but no lineseparating
*
* @version 1.0 (2000-03-20), ready to use
* @see XMLBuilder
* @see ResultSet
* @see ResultItem
*/
public class XMLwithDOMBuilder extends XMLBuilder
{
/**
* The fully qualified name of the class that implements interface org.w3c.dom.Document.
* Using this indirection, we manage to keep method build() portable to any
* other DOM-supporting product (in Java).
*/
static protected String mDocumentImplClass = "org.apache.xerces.dom.DocumentImpl"; //@XERCES
/**
* The XML encoding to use.
*/
static protected String mEncoding = "ISO-8859-1";
/**
* The XMLDocument constructed.
*/
private Document mXMLDocument;
/**
* Construct an instance of XMLwithDOMBuilder.
*/
public XMLwithDOMBuilder ()
{
}
/**
* Construct the XML for the set of Links that is described by several ResultSets.
* All the Links within all the ResultSets are concatenated. The order of the ResultSets
* within the array is maintained.
* The result is 'memorized' somehow, so that a subsequent call of getXML()
* can return the XML document.
*
* @param aResultSetArray an array of ResultSets describing all the Links.
*/
public void build( ResultSet[] aResultSetArray )
{
SimpleDateFormat formatter = new SimpleDateFormat ("yyyy-MM-dd");
ResultSet tResultSet;
String tAttrValue;
String[] tAttrArray;
Hashtable[] tAttrHashtable;
Hashtable tMoreAttr;
int tItemCount = 0;
Element tResultSetElem, tResultElem, tTagToIgnoreElem, tKeyElem, tCategoryElem, tSummaryElem;
// Some characteristics of the last ResultItem visited.
// We need this to support grouping properties like GROUP_BY_FIRST_LETTER_TITLE.
String tLast_FirstLetterTitle = null;
String tLast_FirstLetterKeyword = null;
String tLast_FirstLetterUrl = null;
String tLast_Keyword = null;
String tLast_CategoryPath = null;
try
{
// create the XML document
//========================
mXMLDocument = ( Document ) Class.forName( mDocumentImplClass ).newInstance();
// create XML element 'ResultSet' as the root of the XML document
//===============================================================
tResultSetElem = mXMLDocument.createElement( "ResultSet" );
mXMLDocument.appendChild( tResultSetElem );
// iterate over all the ResultSets within aResultSetArray.
for (int i=0; i < aResultSetArray.length; i++) {
tResultSet = aResultSetArray[i];
// set Attributes of XML element 'ResultSet'
//==========================================================
tAttrValue = mBackToURL;
if( tAttrValue != null )
tResultSetElem.setAttribute( "BackToURL", tAttrValue );
tAttrValue = mBackToText;
if( tAttrValue != null )
tResultSetElem.setAttribute( "BackToText", tAttrValue );
// set further Attributes of XML element 'ResultSet'
// (we query the first ResultSet to obtain these attributes)
//==========================================================
tAttrValue = tResultSet.getOrigin();
if( tAttrValue != null )
tResultSetElem.setAttribute( "SeName", tAttrValue );
tAttrValue = tResultSet.getSearchTerm();
if( tAttrValue != null )
tResultSetElem.setAttribute( "QueryString", tAttrValue );
while( tResultSet.hasMoreItems() )
{
tItemCount++;
// This loop iterates as long as there are resultitems available
// for every Iteration one Result Element is defined with the
// element specific attributes and a few subelements "Summary", "key" , and "Category" are added
// and then the Result Element itself is added to the ResultSet
ResultItem CurItem = tResultSet.nextItem();
// create next XML element 'Result' as a child of XML element 'ResultSet'
//=======================================================================
tResultElem = mXMLDocument.createElement( "Result" );
tResultSetElem.appendChild( tResultElem );
// set Attributes of XML element 'Result'
//=======================================
tMoreAttr = CurItem.getMoreAttributes();
String tURL = CurItem.getURL();
if( tURL != null ) {
tResultElem.setAttribute( "OriginalUrl", tURL );
}
tAttrValue = CurItem.getTitle();
if( tAttrValue != null ) {
tResultElem.setAttribute( "Title", tAttrValue );
// We have to examine if the group property GROUP_BY_FIRST_LETTER_TITLE has to be applied.
// For this, compare the first letter of the current title to tLast_FirstLetterTitle.
if (isGroupSet(GROUP_BY_FIRST_LETTER_TITLE) && tAttrValue.length() != 0) {
String tGroupValue = getGroupValueIfDifferent(tAttrValue.substring(0,1), tLast_FirstLetterTitle);
if (tGroupValue != null) {
tLast_FirstLetterTitle = tGroupValue;
// create new XML element 'TagToIgnore' as a child of XML element 'Result'
//========================================================================
// <TagToIgnore groupBy="firstLetterTitle" groupValue="A" src="SieMapIndex"/>
tTagToIgnoreElem = mXMLDocument.createElement( "TagToIgnore" );
tResultElem.appendChild( tTagToIgnoreElem );
tTagToIgnoreElem.setAttribute( "groupBy", "firstLetterTitle");
tTagToIgnoreElem.setAttribute( "groupValue", tGroupValue);
if (mCreator != null) {
tTagToIgnoreElem.setAttribute( "src", mCreator);
}
}
}
}
tAttrValue = CurItem.getLanguage();
if( tAttrValue != null )
tResultElem.setAttribute( "DocLanguage", tAttrValue );
// SrcDepartment is not directly supported by ResultItem
tAttrValue = (String) tMoreAttr.get("SourceUnit");
if( tAttrValue != null )
tResultElem.setAttribute( "SrcDepartment", tAttrValue);
// HighlightedUrl is not directly supported by ResultItem
tAttrValue = (String) tMoreAttr.get("FeedbackModuleUrl");
if( tAttrValue != null )
tResultElem.setAttribute( "HighlightedUrl", tAttrValue );
if( CurItem.getHitScore()!= null )
tResultElem.setAttribute( "Score", CurItem.getHitScore().toString() );
if( CurItem.getLastModified()!= null )
tResultElem.setAttribute( "ModifiedDate", formatter.format(CurItem.getLastModified()) );
// ExpiresDate is not directly supported by ResultItem.
// It should only be created if mCreateExpiresDate was set to true.
if (mCreateExpiresDate) {
tAttrValue = (String) tMoreAttr.get("ExpiresDate");
if ( tAttrValue != null )
tResultElem.setAttribute( "ExpiresDate", tAttrValue );
}
if( CurItem.getSize()!= null )
tResultElem.setAttribute( "DocSize", CurItem.getSize().toString() );
// MimeType is not directly supported by ResultItem
tAttrValue = (String) tMoreAttr.get("MimeType");
if( tAttrValue != null )
tResultElem.setAttribute( "MimeType", tAttrValue );
tAttrValue = CurItem.getAuthor();
if( tAttrValue != null )
tResultElem.setAttribute( "Author", tAttrValue );
tAttrValue = CurItem.getDescription();
if( tAttrValue != null )
{
tSummaryElem = mXMLDocument.createElement( "Summary" );
tSummaryElem.appendChild( mXMLDocument.createTextNode( tAttrValue) );
tResultElem.appendChild( tSummaryElem );
}
// We have to examine if the group property GROUP_BY_CATEGORY_PATH has to be applied.
// For this, compare the category path of the current ResultItem (if present) to tLast_CategoryPath.
if (isGroupSet(GROUP_BY_CATEGORY_PATH)) {
String tGroupValue = (String) tMoreAttr.get("CategoryPath");
if (tGroupValue != null && ! tGroupValue.equals(tLast_CategoryPath)) {
tLast_CategoryPath = tGroupValue;
// create new XML element 'TagToIgnore' as a child of XML element 'Result'
//========================================================================
// <TagToIgnore groupBy="categoryPath" groupValue="SieMap : Market"
// categoryUrl="http:...." src="SieMapIndex"/>
tTagToIgnoreElem = mXMLDocument.createElement( "TagToIgnore" );
tResultElem.appendChild( tTagToIgnoreElem );
tTagToIgnoreElem.setAttribute( "groupBy", "categoryPath");
tTagToIgnoreElem.setAttribute( "groupValue", tGroupValue);
String tCategoryUrl = (String) tMoreAttr.get("CategoryViewerUrl");
if (tCategoryUrl != null)
tTagToIgnoreElem.setAttribute( "categoryUrl", tCategoryUrl);
if (mCreator != null) {
tTagToIgnoreElem.setAttribute( "src", mCreator);
}
}
}
// We have to examine if the group property GROUP_BY_FIRST_LETTER_KEYWORD has to be applied.
// For this, compare the first letter of the current keyword (if present) to tLast_FirstLetterKeyword.
tAttrValue = (String) tMoreAttr.get("MasterKeyword");
if (isGroupSet(GROUP_BY_FIRST_LETTER_KEYWORD) && tAttrValue != null && tAttrValue.length() != 0) {
String tGroupValue = getGroupValueIfDifferent(tAttrValue.substring(0,1), tLast_FirstLetterKeyword);
if (tGroupValue != null) {
tLast_FirstLetterKeyword = tGroupValue;
// create new XML element 'TagToIgnore' as a child of XML element 'Result'
//========================================================================
// <TagToIgnore groupBy="firstLetterKeyword" groupValue="A" src="SieMapIndex"/>
tTagToIgnoreElem = mXMLDocument.createElement( "TagToIgnore" );
tResultElem.appendChild( tTagToIgnoreElem );
tTagToIgnoreElem.setAttribute( "groupBy", "firstLetterKeyword");
tTagToIgnoreElem.setAttribute( "groupValue", tGroupValue);
if (mCreator != null) {
tTagToIgnoreElem.setAttribute( "src", mCreator);
}
}
}
// We have to examine if the group property GROUP_BY_KEYWORD has to be applied.
// For this, compare the keyword of the current ResultItem (if present) to tLast_Keyword.
if (isGroupSet(GROUP_BY_KEYWORD)) {
// Note: tAttrValue is still the MasterKeyword
if (tAttrValue != null && ! tAttrValue.equals(tLast_Keyword)) {
tLast_Keyword = tAttrValue;
// create new XML element 'TagToIgnore' as a child of XML element 'Result'
//========================================================================
// <TagToIgnore groupBy="keyword" groupValue="Suchmaschinen" src="SieMapIndex"/>
tTagToIgnoreElem = mXMLDocument.createElement( "TagToIgnore" );
tResultElem.appendChild( tTagToIgnoreElem );
tTagToIgnoreElem.setAttribute( "groupBy", "keyword");
tTagToIgnoreElem.setAttribute( "groupValue", tAttrValue);
if (mCreator != null) {
tTagToIgnoreElem.setAttribute( "src", mCreator);
}
}
}
tAttrHashtable = CurItem.getKeywords();
if( tAttrHashtable != null ) {
int tSize = tAttrHashtable.length;
String tName;
for (int j=0; j<tSize; j++)
{
// get the information available for the j-th keyword of the current <Result> (only name)
tName = (String) tAttrHashtable[j].get("name");
tKeyElem = mXMLDocument.createElement( "key" );
if (tName != null)
tKeyElem.setAttribute( "word", tName );
tResultElem.appendChild( tKeyElem );
}
}
tAttrHashtable = CurItem.getCategories();
if( tAttrHashtable != null ) {
int tSize = tAttrHashtable.length;
String tName, tLink;
for (int j=0; j<tSize; j++)
{
// get the information available for the j-th category of the current <Result> (name, link)
tName = (String) tAttrHashtable[j].get("name");
tLink = (String) tAttrHashtable[j].get("link");
tCategoryElem = mXMLDocument.createElement( "Category" );
if (tName != null)
tCategoryElem.setAttribute( "name", tName );
if (tLink != null)
tCategoryElem.setAttribute( "link", tLink );
tResultElem.appendChild( tCategoryElem );
}
}
}
// some final Attribute of XML element 'ResultSet' is the number of results
// (we only know the value after the loop)
tResultSetElem.setAttribute( "NumResults", String.valueOf(tItemCount) );
} // for
}
catch( Exception e)
{
e.printStackTrace();
}
}
/**
* Return the XML document as a String.
*
* @return the XML document as a String
*/
public String getXMLasString()
{
StringWriter tXMLWriter = new StringWriter();
try
{
// a serializer
OutputFormat outputFormat = new OutputFormat( mXMLDocument, null, true ); //@XERCES
outputFormat.setEncoding(mEncoding); //@XERCES
XMLSerializer ser = new XMLSerializer( tXMLWriter, outputFormat ); //@XERCES
ser.serialize( mXMLDocument ); //@XERCES
return tXMLWriter.toString();
}
catch( IOException e )
{
throw new ProgrammerException("XMLwithDOMBuilder: getXML returns exception: " + e.getMessage());
}
}
/**
* Return the XML document as a DOM Node.
*
* @return the XML document as a DOM Node
*/
public Node getXMLasDOM()
{
return (Node) mXMLDocument;
}
}