// ------------------------------------------
// Retrieve the clean-up properties for indexing
retrieveCleanUpProps();
SaxHTMLIndex spe = new SaxHTMLIndex(cleanUpStrings, cleanUpChars); // use clean-up props files
System.setProperty("org.xml.sax.driver", "org.ccil.cowan.tagsoup.Parser");
System.setProperty("javax.xml.parsers.SAXParserFactory", "org.ccil.cowan.tagsoup.jaxp.SAXFactoryImpl");
if (spe.init(tempDico) == 0) {
//create a html file description list
ArrayList filesDescription = new ArrayList();
String indexerLanguage = getProperty("webhelpIndexerLanguage");
indexerLanguage = ((indexerLanguage == null) ? "en" : indexerLanguage);
//TODO: change this when updating webhelpindexer in order to use the new WriteJSFiles.WriteIndex method
if (getLog().isDebugEnabled())
getLog().debug("Indexer language is: " + indexerLanguage);
// parse each html files
for (int f = 0; f < htmlFiles.size(); f++) {
File ftemp = (File) htmlFiles.get(f);
if (getLog().isDebugEnabled())
getLog().debug("Parsing html file: " + ftemp.getAbsolutePath());
//tempMap.put(key, value);
//The HTML file information are added in the list of FileInfoObject
DocFileInfo docFileInfoTemp = new DocFileInfo(spe.runExtractData(ftemp, indexerLanguage, stemming));
ftemp = docFileInfoTemp.getFullpath();
String stemp = ftemp.toString();
int i = stemp.indexOf(targetBaseDir.getAbsolutePath());