Package edu.umd.cloud9.collection.XMLInputFormatOld

Examples of edu.umd.cloud9.collection.XMLInputFormatOld.XMLRecordReader


  public DocnoMapping getDocnoMapping() throws IOException {
    return loadDocnoMapping(indexPath, fs);
  }

  public static DocnoMapping loadDocnoMapping(String indexPath, FileSystem fs) throws IOException {
    DocnoMapping mDocMapping = null;
    // load the docid to docno mappings
    try {
      LOG.info("Loading DocnoMapping file...");
      RetrievalEnvironment env = new RetrievalEnvironment(indexPath, fs);

      String className = env.readDocnoMappingClass();
      LOG.info(" - Class name: " + className);
      mDocMapping = (DocnoMapping) Class.forName(className).newInstance();

      Path mappingFile = env.getDocnoMappingData();
      LOG.info(" - File name: " + mappingFile);
      mDocMapping.loadMapping(mappingFile, fs);
      LOG.info("Done!");
    } catch (Exception e) {
      throw new IOException("Error initializing DocnoMapping!");
    }
    return mDocMapping;
View Full Code Here


    if (list.getLength() > 0) {  conf.set(Constants.StemmedStopwordListQ, list.item(0).getTextContent())
  }

  static float eval(QueryEngine qe, Configuration conf, String setting){
    Qrels qrels = new Qrels(conf.get(Constants.QrelsPath));
    DocnoMapping mapping = qe.getDocnoMapping();
    float apSum = 0, p10Sum = 0;
    Map<String, Accumulator[]> results = qe.getResults();
    for (String qid : results.keySet()) {
      float ap = (float) RankedListEvaluator.computeAP(results.get(qid), mapping,
          qrels.getReldocsForQid(qid));
View Full Code Here

    public Spinn3rItemRecordReader(FileSplit split, JobConf conf) throws IOException {
      conf.set(XMLInputFormatOld.START_TAG_KEY, Spinn3rItem.XML_START_TAG);
      conf.set(XMLInputFormatOld.END_TAG_KEY, Spinn3rItem.XML_END_TAG);

      mReader = new XMLRecordReader(split, conf);

      // this is the current file
      Path p = split.getPath();

      // get its directory listing
View Full Code Here

    public PmcArticleRecordReader(FileSplit split, JobConf conf) throws IOException {
      conf.set(XMLInputFormatOld.START_TAG_KEY, PmcArticle.XML_START_TAG);
      conf.set(XMLInputFormatOld.END_TAG_KEY, PmcArticle.XML_END_TAG);

      mReader = new XMLRecordReader(split, conf);
    }
View Full Code Here

     */
    public TrecDocumentRecordReader(FileSplit split, JobConf conf) throws IOException {
      conf.set(XMLInputFormatOld.START_TAG_KEY, TrecDocument.XML_START_TAG);
      conf.set(XMLInputFormatOld.END_TAG_KEY, TrecDocument.XML_END_TAG);

      reader = new XMLRecordReader(split, conf);
    }
View Full Code Here

    public WikipediaPageRecordReader(FileSplit split, JobConf conf) throws IOException {
      conf.set(XMLInputFormatOld.START_TAG_KEY, WikipediaPage.XML_START_TAG);
      conf.set(XMLInputFormatOld.END_TAG_KEY, WikipediaPage.XML_END_TAG);
     
      language = conf.get("wiki.language", "en"); // Assume 'en' by default.
      reader = new XMLRecordReader(split, conf);
    }
View Full Code Here

     */
    public TrecWebRecordReader(FileSplit split, JobConf conf) throws IOException {
      conf.set(XMLInputFormatOld.START_TAG_KEY, TrecWebDocument.XML_START_TAG);
      conf.set(XMLInputFormatOld.END_TAG_KEY, TrecWebDocument.XML_END_TAG);

      reader = new XMLRecordReader(split, conf);
    }
View Full Code Here

     */
    public MedlineCitationRecordReader(FileSplit split, JobConf conf) throws IOException {
      conf.set(XMLInputFormatOld.START_TAG_KEY, MedlineCitation.XML_START_TAG);
      conf.set(XMLInputFormatOld.END_TAG_KEY, MedlineCitation.XML_END_TAG);

      reader = new XMLRecordReader(split, conf);
    }
View Full Code Here

    public Aquaint2DocumentRecordReader(FileSplit split, JobConf conf) throws IOException {
      conf.set(XMLInputFormatOld.START_TAG_KEY, Aquaint2Document.XML_START_TAG);
      conf.set(XMLInputFormatOld.END_TAG_KEY, Aquaint2Document.XML_END_TAG);

      reader = new XMLRecordReader(split, conf);
    }
View Full Code Here

      return -1;
    }

    RetrievalEnvironment env = new RetrievalEnvironment(indexPath, fs);
    Path mappingFile = env.getDocnoMappingData();
    new ClueWarcDocnoMappingBuilder().build(new Path(collection), mappingFile, conf);

    conf.set(Constants.CollectionName, "ClueWeb:English:Segment" + segment);
    conf.set(Constants.CollectionPath, collection);
    conf.set(Constants.IndexPath, indexPath);
    conf.set(Constants.InputFormat, SequenceFileInputFormat.class.getCanonicalName());
View Full Code Here

TOP

Related Classes of edu.umd.cloud9.collection.XMLInputFormatOld.XMLRecordReader

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.