Examples of WikiXMLParser


Examples of info.bliki.wiki.dump.WikiXMLParser

      // the following directory must exist for image references
      String imageDirectory = "c:/temp/dump/WikiDumpImages";
      System.out.println("Prepare wiki database");
      db = prepareDB(mainDirectory);
      IArticleFilter handler;
      WikiXMLParser wxp;
      if (!skipFirstPass) {
        System.out.println("First pass - write templates to database:");
        handler = new DemoTemplateArticleFilter(db);
        wxp = new WikiXMLParser(bz2Filename, handler);
        wxp.parse();
        System.out.println(' ');
      }
      System.out.println("Second pass - write HTML files to directory:");
      handler = new DemoArticleFilter(db, htmlDirectory, imageDirectory);
      wxp = new WikiXMLParser(bz2Filename, handler);
      wxp.parse();
      System.out.println(' ');
      System.out.println("Done!");
    } catch (Exception e) {
      e.printStackTrace();
    } finally {
View Full Code Here

Examples of info.bliki.wiki.dump.WikiXMLParser

            // the following directory must exist for image references
            String imageDirectory = "c:/temp/dump/WikiDumpImages";
            System.out.println("Prepare wiki database");
            db = prepareDB(mainDirectory);
            IArticleFilter handler;
            WikiXMLParser wxp;
            if (!skipFirstPass) {
                System.out.println("First pass - write templates to database:");
                handler = new DemoTemplateArticleFilter(db);
                wxp = new WikiXMLParser(bz2Filename, handler);
                wxp.parse();
                System.out.println(' ');
            }
            System.out.println("Second pass - write HTML files to directory:");
            handler = new DemoArticleFilter(db, htmlDirectory, imageDirectory);
            wxp = new WikiXMLParser(bz2Filename, handler);
            wxp.parse();
            System.out.println(' ');
            System.out.println("Done!");
        } catch (Exception e) {
            e.printStackTrace();
        } finally {
View Full Code Here

Examples of info.bliki.wiki.dump.WikiXMLParser

  public WikipediaArticleReader(File inputFile, File outputFile, String lang) {
    JsonConverter handler = new JsonConverter();
    // encoder = new JsonRecordParser<Article>(Article.class);
    parser = new ArticleParser(lang);
    try {
      wxp = new WikiXMLParser(inputFile.getAbsolutePath(), handler);
    } catch (Exception e) {
      logger.error("creating the parser {}", e.toString());
      System.exit(-1);
    }
View Full Code Here

Examples of org.elasticsearch.river.wikipedia.support.WikiXMLParser

            } else {
                logger.warn("failed to create index [{}], disabling river...", e, indexName);
                return;
            }
        }
        WikiXMLParser parser = WikiXMLParserFactory.getSAXParser(url);
        try {
            parser.setPageCallback(new PageCallback());
        } catch (Exception e) {
            logger.error("failed to create parser", e);
            return;
        }
View Full Code Here

Examples of org.elasticsearch.river.wikipedia.support.WikiXMLParser

                logger.warn("failed to create index [{}], disabling river...", e, indexName);
                return;
            }
        }
        currentRequest = client.prepareBulk();
        WikiXMLParser parser = WikiXMLParserFactory.getSAXParser(url);
        try {
            parser.setPageCallback(new PageCallback());
        } catch (Exception e) {
            logger.error("failed to create parser", e);
            return;
        }
        thread = EsExecutors.daemonThreadFactory(settings.globalSettings(), "wikipedia_slurper").newThread(new Parser(parser));
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.