/*
* This software and supporting documentation were developed by
*
* Siemens Corporate Technology
* Competence Center Knowledge Management and Business Transformation
* D-81730 Munich, Germany
*
* Authors (representing a really great team ;-) )
* Stefan B. Augustin, Thorbj�rn Hansen, Manfred Langen
*
* This software is Open Source under GNU General Public License (GPL).
* Read the text of this license in LICENSE.TXT
* or look at www.opensource.org/licenses/
*
* Once more we emphasize, that:
* THIS SOFTWARE IS MADE AVAILABLE, AS IS, WITHOUT ANY WARRANTY
* REGARDING THE SOFTWARE, ITS PERFORMANCE OR
* FITNESS FOR ANY PARTICULAR USE, FREEDOM FROM ANY COMPUTER DISEASES OR
* ITS CONFORMITY TO ANY SPECIFICATION. THE ENTIRE RISK AS TO QUALITY AND
* PERFORMANCE OF THE SOFTWARE IS WITH THE USER.
*
*/
// ************ package ****************************************************
package appl.Portal.Utils.LinkSearch;
// ************ imports ******************************************************
// KFM
import KFM.Exceptions.ProgrammerException;
import KFM.File.FileUtils;
import KFM.Converter;
import KFM.log.*;
// OROMatcher package
import com.oroinc.text.regex.*;
// java package
import java.util.Hashtable;
public class Item
{
/** Stringarray that gives the references for the mItem Hashtable. */
private String[] mNames;
// string with the item as content from where we will get the attributes via matcher()
private String mContent;
/* This class helps to get rid of html tags that came from the searchengine. */
private Converter mConverter;
/** A regular expression wich has attributes of an item as a subgroup.
*
* For more information about regular expression and about how they work see:
* $/KFM/www-docs/protected/developer/appl/Portal/MetaSearch/SearchEngineWrapper.html
*
* For more information about the implememtation of regular expression ba OROMatcher(TM) see:
* $/KFM/www-docs/protected/developer/external-docu/OROMatcher-1.0.7/doc/index.html
*
* MatchResult passed on to: group( < 0 )are passed on to a Hashtable wich is referenced
* by the strings given through mNames[].
*/
protected String mRegExpItem;
/** Hashtable that holds the attributes of a ResultItem.
*
* The references for the Hashtable are given by mNames[].
*/
private Hashtable mItems = new Hashtable();
/** A MatchResult, see package com.oroinc.text.regex.*;
*
* For more information about the implememtation of regular expression ba OROMatcher(TM) see:
* $/KFM/www-docs/protected/developer/external-docu/OROMatcher-1.0.7/doc/index.html
*/
private MatchResult mMatcher;
/** Constructor that gets all the information
* he needs to look for ResultItems.
*/
public Item( String aRegExpItem, String aContent, String[] aNames )
{
mNames = aNames;
mContent = aContent;
mRegExpItem = aRegExpItem;
KFMSystem.log.debug(aRegExpItem);
KFMSystem.log.debug(mContent);
KFMSystem.log.debug( "Matcher of Item was reached!");
mMatcher = this.match( mRegExpItem, mContent );
if (mMatcher != null){
} else {
KFMSystem.log.debug("NO ITEM MATCH");
return;
}
// with ( (j -1) < mNames.length )) i just wanted to prevent some null
// pointer exceptions
// KFMSystem.log.debug( mMatcher.groups() );
for( int j = 1; (( j < mMatcher.groups()) && ( (j -1) < mNames.length )); ++j )
{
String s = mConverter.removeHtmlTags( mMatcher.group(j));
s = mConverter.replaceString(" ", "", s );
mItems.put( aNames[j - 1], s );
}
}
/**
* Method wich matches a patternstring with a contentstring.
* Here the regular expression metacharacter '.' matches
* everythin, even new lines ('\n'). See listing below:
*
*
* For more information about the implememtation of regular expression ba OROMatcher(TM) see:
* $/KFM/www-docs/protected/developer/external-docu/OROMatcher-1.0.7/doc/index.html
*
* @return MatchResult
*/
public MatchResult match( String patternString, String text )
{
int groups;
// PatternMatcher Instance
PatternMatcher matcher = new Perl5Matcher();
// a compiler
PatternCompiler compiler = new Perl5Compiler();
Pattern pattern = null;
// the patternmatcher Input
PatternMatcherInput input;
MatchResult result;
// Wenn you set the Perl5Compiler.SINGLELINE_MASK option
// the contentstring is treated singleline, even if there
// where some '\n' in it.
try {
pattern = compiler.compile(patternString, Perl5Compiler.SINGLELINE_MASK );
} catch(MalformedPatternException e) {
System.err.println("LinkSearch.Item.match: Bad pattern: `" + e.getMessage() + "�.");
//@@@ System.exit(1);
//@@@ Make this cleaner some day.
throw new ProgrammerException("LinkSearch.match: Bad pattern: `" + e.getMessage() + "�.");
}
input = new PatternMatcherInput(text);
// When true, a match has been found.
if(matcher.contains(input, pattern)) {
result = matcher.getMatch();
} else {
result = null;
}
return result;
}
public Hashtable getItem()
{
return mItems;
}
/**
* Method to test if a Pattern compiles
*
* @return boolean
*/
public static boolean compile( String aPatternString )
{
try
{
// wenn you set the Perl5Compiler.SINGLELINE_MASK option
// the contentstring is treated singleline, even if there
// where some '\n' in it.
PatternCompiler compiler = new Perl5Compiler();
Pattern pattern = compiler.compile( aPatternString, Perl5Compiler.SINGLELINE_MASK |
Perl5Compiler.CASE_INSENSITIVE_MASK);
return true;
}
catch( MalformedPatternException e ){ System.err.println (e); return false;}
}
/**
* the ideal main routine for debugging perposes
* just put same text to be matched by a pattern into
* a file, type a pattern string and debug
*/
public static void main( String args[] )
{
try {
String tFileContent = FileUtils.readWholeTextFile("C:\\users\\reindl\\Files\\yahooquery.txt");
String tPatternString =
"<li>(.*?)</i><p>";
Item tItem = new Item(tPatternString, tFileContent, new String[2]);
if( tFileContent != null )
{
if (tItem.compile( tPatternString )) {
MatchResult tMR= tItem.match( tPatternString, tFileContent);
for (int i = 0; i < tMR.groups(); ++i )
System.out.println("group(" + i + ") :" + tMR.group(i));
}
else {System.out.println("Did not compile.");}
}
}
catch( java.io.IOException e){System.err.println( e );}
}
}