Package com.nexwave.nquindexer

Examples of com.nexwave.nquindexer.SaxHTMLIndex


    // ------------------------------------------

    // Retrieve the clean-up properties for indexing
    retrieveCleanUpProps();

    SaxHTMLIndex spe = new SaxHTMLIndex(cleanUpStrings, cleanUpChars); // use clean-up props files

    System.setProperty("org.xml.sax.driver", "org.ccil.cowan.tagsoup.Parser");
    System.setProperty("javax.xml.parsers.SAXParserFactory", "org.ccil.cowan.tagsoup.jaxp.SAXFactoryImpl");

    if (spe.init(tempDico) == 0) {
      //create a html file description list
      ArrayList filesDescription = new ArrayList();

      String indexerLanguage = getProperty("webhelpIndexerLanguage");
      indexerLanguage = ((indexerLanguage == null) ? "en" : indexerLanguage);
      //TODO: change this when updating webhelpindexer in order to use the new WriteJSFiles.WriteIndex method
        if (getLog().isDebugEnabled())
          getLog().debug("Indexer language is: " + indexerLanguage);

      // parse each html files
      for (int f = 0; f < htmlFiles.size(); f++) {
        File ftemp = (File) htmlFiles.get(f);

        if (getLog().isDebugEnabled())
          getLog().debug("Parsing html file: " + ftemp.getAbsolutePath());

        //tempMap.put(key, value);
        //The HTML file information are added in the list of FileInfoObject
        DocFileInfo docFileInfoTemp = new DocFileInfo(spe.runExtractData(ftemp, indexerLanguage, stemming));

        ftemp = docFileInfoTemp.getFullpath();

        String stemp = ftemp.toString();
        int i = stemp.indexOf(targetBaseDir.getAbsolutePath());
View Full Code Here


    // ------------------------------------------

    // Retrieve the clean-up properties for indexing
    retrieveCleanUpProps();

    SaxHTMLIndex spe = new SaxHTMLIndex(cleanUpStrings, cleanUpChars); // use clean-up props files

    if (spe.init(tempDico) == 0) {
      //create a html file description list
      ArrayList filesDescription = new ArrayList();

      String indexerLanguage = getProperty("webhelpIndexerLanguage");
      indexerLanguage = ((indexerLanguage == null) ? "en" : indexerLanguage);
      //TODO: change this when updating webhelpindexer in order to use the new WriteJSFiles.WriteIndex method
      IndexerTask.indexerLanguage = indexerLanguage;
        if (getLog().isDebugEnabled())
          getLog().debug("Indexer language is: " + indexerLanguage);

      // parse each html files
      for (int f = 0; f < htmlFiles.size(); f++) {
        File ftemp = (File) htmlFiles.get(f);

        if (getLog().isDebugEnabled())
          getLog().debug("Parsing html file: " + ftemp.getAbsolutePath());

        //tempMap.put(key, value);
        //The HTML file information are added in the list of FileInfoObject
        DocFileInfo docFileInfoTemp = new DocFileInfo(spe.runExtractData(ftemp, indexerLanguage));

        ftemp = docFileInfoTemp.getFullpath();

        String stemp = ftemp.toString();
        int i = stemp.indexOf(targetBaseDir.getAbsolutePath());
View Full Code Here

    // ------------------------------------------

    // Retrieve the clean-up properties for indexing
    retrieveCleanUpProps();

    SaxHTMLIndex spe = new SaxHTMLIndex(cleanUpStrings, cleanUpChars); // use clean-up props files

    if (spe.init(tempDico) == 0) {
      //create a html file description list
      ArrayList filesDescription = new ArrayList();

      // parse each html files
      for (int f = 0; f < htmlFiles.size(); f++) {
        File ftemp = (File) htmlFiles.get(f);

        if (getLog().isDebugEnabled())
          getLog().debug("Parsing html file: " + ftemp.getAbsolutePath());

        //tempMap.put(key, value);
        //The HTML file information are added in the list of FileInfoObject
        String indexerLanguage = getProperty("webhelpIndexerLanguage");
        DocFileInfo docFileInfoTemp = new DocFileInfo(spe.runExtractData(ftemp, (indexerLanguage == null) ? "en"
            : indexerLanguage));

        ftemp = docFileInfoTemp.getFullpath();

        String stemp = ftemp.toString();
View Full Code Here

    // ------------------------------------------

    // Retrieve the clean-up properties for indexing
    retrieveCleanUpProps();

    SaxHTMLIndex spe = new SaxHTMLIndex(cleanUpStrings, cleanUpChars); // use clean-up props files

    if (spe.init(tempDico) == 0) {
      //create a html file description list
      ArrayList filesDescription = new ArrayList();

      // parse each html files
      for (int f = 0; f < htmlFiles.size(); f++) {
        File ftemp = (File) htmlFiles.get(f);

        if (getLog().isDebugEnabled())
          getLog().debug("Parsing html file: " + ftemp.getAbsolutePath());

        //tempMap.put(key, value);
        //The HTML file information are added in the list of FileInfoObject
        String      indexerLanguage = getProperty("webhelpIndexerLanguage");
        DocFileInfo docFileInfoTemp =
          new DocFileInfo(spe.runExtractData(ftemp,
                                             (indexerLanguage == null) ? "en" : indexerLanguage));

        ftemp = docFileInfoTemp.getFullpath();

        String stemp = ftemp.toString();
View Full Code Here

TOP

Related Classes of com.nexwave.nquindexer.SaxHTMLIndex

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.