/*
* This software and supporting documentation were developed by
*
* Siemens Corporate Technology
* Competence Center Knowledge Management and Business Transformation
* D-81730 Munich, Germany
*
* Authors (representing a really great team ;-) )
* Stefan B. Augustin, Thorbj�rn Hansen, Manfred Langen
*
* This software is Open Source under GNU General Public License (GPL).
* Read the text of this license in LICENSE.TXT
* or look at www.opensource.org/licenses/
*
* Once more we emphasize, that:
* THIS SOFTWARE IS MADE AVAILABLE, AS IS, WITHOUT ANY WARRANTY
* REGARDING THE SOFTWARE, ITS PERFORMANCE OR
* FITNESS FOR ANY PARTICULAR USE, FREEDOM FROM ANY COMPUTER DISEASES OR
* ITS CONFORMITY TO ANY SPECIFICATION. THE ENTIRE RISK AS TO QUALITY AND
* PERFORMANCE OF THE SOFTWARE IS WITH THE USER.
*
*/
// ************ package ******************************************************
package appl.Portal.Utils.Translator;
// ************ import ******************************************************
// Linksearch packages
import appl.Portal.Utils.LinkSearch.*;
// OROMatcher packages
import com.oroinc.text.regex.*;
// java packages
import java.io.Writer;
import java.io.StringWriter;
import java.io.IOException;
import java.util.Properties;
import java.util.Vector;
import java.util.Date;
import java.util.Hashtable;
import java.net.URL;
import java.net.MalformedURLException;
// KFM classes
import KFM.Converter;
import KFM.log.*;
// uses HTMLLoader
import KFM.HTML.HtmlLoader2;
public class Translator implements GenericResultSet
{
private HtmlLoader2 mHtmlLoader;
/** String that contains a regular expression.
*
* This regular expression parses the static wraparound of the result html page
* of Leo ( i. e. banners, images etc. ).
*
* For more information about regular expression and about how they work see:
* $/KFM/www-docs/protected/developer/appl/Portal/MetaSearch/SearchEngineWrapper.html
*
* For more information about the implememtation of regular expression by OROMatcher(TM) see:
* $/KFM/www-docs/protected/developer/external-docu/OROMatcher-1.0.7/doc/index.html
*
* To be matched: String that contains a result html page
* MatchResult passed on to: mRegExpWordpair.
*
*/
private static String mRegExpFrame = "<!-- # Spalten 4-6, Ergebnisse # -->(.*)<!-- # Spalte 7, Abstandshalter # -->";
/** String that contains a regular expression.
*
* This regular expression parses one wordpair ( e. g. an english word and the german translation
* for it ) of the result html page.
*
* For more information about regular expression and about how they work see:
* $/KFM/www-docs/protected/developer/appl/Portal/MetaSearch/SearchEngineWrapper.html
*
* For more information about the implememtation of regular expression by OROMatcher(TM) see:
* $/KFM/www-docs/protected/developer/external-docu/OROMatcher-1.0.7/doc/index.html
*
* To be matched: group(0) of mRegExpFrame
* MatchResult passed on to: mRegExpWord
*/
private static String mRegExpWordpair = "<TR\\s*VALIGN=\"top\"(?:.*?)43%\">(.*?)</TD>(?:.*?)43%\">(.*?)</TD>";
/** String that contains a regular expression.
*
* This regular expression parses two words ( e. g. an english word and the german translation
* for it ) of the result html page.
*
* For more information about regular expression and about how they work see:
* $/KFM/www-docs/protected/developer/appl/Portal/MetaSearch/SearchEngineWrapper.html
*
* For more information about the implememtation of regular expression by OROMatcher(TM) see:
* $/KFM/www-docs/protected/developer/external-docu/OROMatcher-1.0.7/doc/index.html
*
* To be matched: group(0) of mRegExpWordpair
* MatchResult passed on to: groups( > 0 ) are passed on to Item.java, there stored in a
* Hashtable that is referenced by strings given through mNames[].
*
*/
private static String mRegExpWord = "<TR\\s*VALIGN=\"top\"(?:.*?)43%\">(.*?)</TD>(?:.*?)43%\">(.*?)</TD>";
/** This stringarray contains the information how the wordpair hashtable shall be referenced. */
private static String mNames[] = {"en", "de" };
/** Empty String because the value of display language is optional. */
private String mDisplayLanguage = "";
/** The webaddress of leo.*/
private static final String theUrltemplate = "http://dict.leo.org/";
/** Http parameter, wich tells leos webapplication to start the translation. */
private static final String theEventString = "?relink=off&search=";
/** Http parameter, wich tells leos webapplication in wich language the result should be displayed.
*
* This parameter is optional.
*/
private static final String theDisplayLanguageString = "&lang=";
/** This class parses HTML Files.
*
* This class parses HTML Files with the information given by the class members
* mRegExpFrame, mRegExpWordpair, mRegExpWord, mNames;
*/
private HtmlParser mHtmlParser;
/** Keeps track of the current index when iterating through the resultset. */
int mCounter = 0;
/** Log File */
private KFMLog mLog;
/** Constructor */
public Translator ()
{
mLog=KFMSystem.log;
}
public Translator (
KFMLog aLog)
{
mLog=aLog;
}
/** Translates a word into the specified language by sending it to leo, retrieving and parsing the result.
* The class HtmlParser.java is used, where the results are stored.
* The results can be retrieved with getWordpair();
*
* @param aWord the word to be translated
*
*/
public boolean translate( String aWord )
{
String tUrlString = this.buildUrl(aWord, this.getDisplayLanguage());
// load URL
mHtmlLoader = new HtmlLoader2();
boolean tResult=false;
try {
tResult = mHtmlLoader.load(tUrlString, 0);
} catch (MalformedURLException me){
}
catch (java.io.InterruptedIOException me){
}
if (!tResult){
mLog.info("Cannot load Translator URL");
return false;
} else {
String tContent = mHtmlLoader.getContent();
mHtmlParser = new HtmlParser();
// give mHtmlParser the information he needs
mHtmlParser.setRegExpFrame(this.mRegExpFrame);
mHtmlParser.setRegExpItemSet(this.mRegExpWordpair);
mHtmlParser.setRegExpItem(this.mRegExpWord);
mHtmlParser.setNames(mNames);
// parse doucument and store wordpairs in a Hashtable
// they can be retrieved with getWordpairs()
mHtmlParser.parse(tContent);
}
return true;
}
/** Returns the wordpairs retrieved from leo as a Hashtable.
*
* See docu of 'mWordpairs' for an explanation of its function.
*
* @return Hashtable with wordpairs.
*/
public Hashtable nextItem()
{
Hashtable tHash = mHtmlParser.getItem(mCounter);
mCounter++;
return tHash;
}
public void setDisplayLanguage( String aDisplayLanguage)
{
mDisplayLanguage = aDisplayLanguage;
}
public String getDisplayLanguage()
{
return mDisplayLanguage;
}
/** Tests if more wordpairs are available. */
public boolean hasMoreItems()
{
if (mHtmlParser == null){
return false;
}
return mCounter < mHtmlParser.getNumberOfItems();
}
/** Builds the url with the specific http parameters.
*
* See docu of 'theLanguageString'.
*/
public String buildUrl( String aWord, String aLanguage )
{
// the word to be translated may contain spaces. therefor it has to be encoded
return (theUrltemplate + theEventString + java.net.URLEncoder.encode( aWord ) +
theDisplayLanguageString + this.getDisplayLanguage());
}
/** Gets the leo web url
*
*/
public String getUrl()
{
return theUrltemplate;
}
public static void main( String[] args )
{
Translator tTranslator = new Translator();
// set word to be tranlated and displaylanguage
tTranslator.translate( "Framework" );
tTranslator.setDisplayLanguage("de");
while( tTranslator.hasMoreItems() )
{
Hashtable tIdiom = tTranslator.nextItem();
if( tTranslator.getDisplayLanguage().equals("de") )
{
System.out.println( "Englisch: " + (String)tIdiom.get("en") );
System.out.println( "Deutsch: " +( String ) tIdiom.get("de"));
}
else
{
System.out.println( "English: " + (String)tIdiom.get("en") );
System.out.println( "German: " +( String ) tIdiom.get("de"));
}
}
System.out.println( "Finished!" );
}
}