Examples of WikipediaDocnoMappingBuilder


Examples of edu.umd.cloud9.collection.wikipedia.WikipediaDocnoMappingBuilder

          "-input=" + rawCollection,
          "-output_file=" + mappingFile.toString(),
          "-wiki_language=" + collectionLang };
      LOG.info("Running WikipediaDocnoMappingBuilder with args " + Arrays.toString(arr));

      WikipediaDocnoMappingBuilder tool = new WikipediaDocnoMappingBuilder();
      tool.setConf(conf);
      tool.run(arr);

      fs.delete(new Path(indexRootPath + "/wiki-docid-tmp"), true);
    } else {
      LOG.info("Docno mapping already exists at: " + mappingFile);
    }

    // Repack Wikipedia into sequential compressed block
    if (!fs.exists(new Path(seqCollection + "/part-00000"))) {
      LOG.info(seqCollection + " doesn't exist, creating...");
      String[] arr = new String[] { "-input=" + rawCollection,
          "-output=" + seqCollection,
          "-mapping_file=" + mappingFile.toString(),
          "-compression_type=block",
          "-wiki_language=" + collectionLang };
      LOG.info("Running RepackWikipedia with args " + Arrays.toString(arr));

      RepackWikipedia tool = new RepackWikipedia();
      tool.setConf(conf);
      tool.run(arr);
    } else {
      LOG.info("Repacked collection already exists at: " + seqCollection);     
    }

    conf.set(Constants.CollectionName, "Wikipedia-"+collectionLang);
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.