Package edu.umd.cloud9.collection

Examples of edu.umd.cloud9.collection.Indexable


  public DocnoMapping getDocnoMapping() throws IOException {
    return loadDocnoMapping(indexPath, fs);
  }

  public static DocnoMapping loadDocnoMapping(String indexPath, FileSystem fs) throws IOException {
    DocnoMapping mDocMapping = null;
    // load the docid to docno mappings
    try {
      LOG.info("Loading DocnoMapping file...");
      RetrievalEnvironment env = new RetrievalEnvironment(indexPath, fs);

      String className = env.readDocnoMappingClass();
      LOG.info(" - Class name: " + className);
      mDocMapping = (DocnoMapping) Class.forName(className).newInstance();

      Path mappingFile = env.getDocnoMappingData();
      LOG.info(" - File name: " + mappingFile);
      mDocMapping.loadMapping(mappingFile, fs);
      LOG.info("Done!");
    } catch (Exception e) {
      throw new IOException("Error initializing DocnoMapping!");
    }
    return mDocMapping;
View Full Code Here


    if (list.getLength() > 0) {  conf.set(Constants.StemmedStopwordListQ, list.item(0).getTextContent())
  }

  static float eval(QueryEngine qe, Configuration conf, String setting){
    Qrels qrels = new Qrels(conf.get(Constants.QrelsPath));
    DocnoMapping mapping = qe.getDocnoMapping();
    float apSum = 0, p10Sum = 0;
    Map<String, Accumulator[]> results = qe.getResults();
    for (String qid : results.keySet()) {
      float ap = (float) RankedListEvaluator.computeAP(results.get(qid), mapping,
          qrels.getReldocsForQid(qid));
View Full Code Here

      int docno = 0;
      try {
        if (req.getParameterValues(DOCNO) != null)
          docno = Integer.parseInt(req.getParameterValues(DOCNO)[0]);

        Indexable doc = mForwardIndex.getDocument(docno);

        if (doc != null) {
          sLogger.info("fetched: " + doc.getDocid() + " = docno " + docno);
          res.setContentType(doc.getDisplayContentType());

          PrintWriter out = res.getWriter();
          out.print(doc.getDisplayContent());
          out.close();
        } else {
          throw new Exception();
        }
      } catch (Exception e) {
View Full Code Here

        if (req.getParameterValues("docno") != null)
          docno = Integer.parseInt(req.getParameterValues("docno")[0]);
        else if (req.getParameterValues("docid") != null)
          docno = sForwardIndex.getDocno(req.getParameterValues("docid")[0]);

        Indexable doc = null;
        int i = 0;
        for(i = 0; i < lastDocs.length; i++)
          if(docno <= lastDocs[i]) {
            doc = docForwardIndex[i].getDocument(docno);
            break;
          }

        if (doc != null) {
          LOG.info("fetched: " + doc.getDocid() + " = docno " + docno);
          res.setContentType(doc.getDisplayContentType());

          PrintWriter out = res.getWriter();
          out.print(doc.getContent().replaceAll("<\\s*/\\s*[bB][oO][dD][Yy]\\s*>", "<br><br><a href=\"/fetch_docno?docno=" + docno +
                "\"> Fetch anchor text for docno: " + docno + "</a></body>"));
          out.close();
        } else {
          throw new Exception();
        }
View Full Code Here

      try {
       
        if (req.getParameterValues("docid") != null)
          docid = req.getParameterValues("docid")[0];

        Indexable doc = sForwardIndex.getDocument(docid);
       
        if (doc != null) {
          LOG.info("fetched: " + doc.getDocid());
          res.setContentType(doc.getDisplayContentType());

          PrintWriter out = res.getWriter();
          out.print(doc.getContent().replace("<body>", "<body><a href=\"/fetch_content?docid=" + docid +
              "\"> Fetch content for docid: " + docid + "</a><br><br>"));
          out.close();
        } else {
          throw new Exception();
        }
View Full Code Here

      try {
       
        if (req.getParameterValues("docno") != null)
          docno = Integer.parseInt(req.getParameterValues("docno")[0]);

        Indexable doc = sForwardIndex.getDocument(docno);
 
        if (doc != null) {
          LOG.info("fetched: " + doc.getDocid() + " = docno " + docno);
          res.setContentType(doc.getDisplayContentType());
         
          PrintWriter out = res.getWriter();
          out.print(doc.getContent().replace("<body>", "<body><a href=\"/fetch_content?docno=" + docno +
              "\"> Fetch content for docno: " + docno + "</a><br><br>"));
          out.close();
        } else {
          throw new Exception();
        }
View Full Code Here

      int docno = 0;
      try {
        if (req.getParameterValues(DOCNO) != null)
          docno = Integer.parseInt(req.getParameterValues(DOCNO)[0]);

        Indexable doc = mForwardIndex.getDocument(docno);

        if (doc != null) {
          sLogger.info("fetched: " + doc.getDocid() + " = docno " + docno);
          res.setContentType(doc.getDisplayContentType());

          PrintWriter out = res.getWriter();
          out.print(doc.getDisplayContent());
          out.close();
        } else {
          throw new Exception();
        }
      } catch (Exception e) {
View Full Code Here

      return -1;
    }

    RetrievalEnvironment env = new RetrievalEnvironment(indexPath, fs);
    Path mappingFile = env.getDocnoMappingData();
    new ClueWarcDocnoMappingBuilder().build(new Path(collection), mappingFile, conf);

    conf.set(Constants.CollectionName, "ClueWeb:English:Segment" + segment);
    conf.set(Constants.CollectionPath, collection);
    conf.set(Constants.IndexPath, indexPath);
    conf.set(Constants.InputFormat, SequenceFileInputFormat.class.getCanonicalName());
View Full Code Here

    if (!fs.exists(p)) {
      sLogger.info("docno-mapping.dat doesn't exist, creating...");
      String[] arr = new String[] { collection, mappingDir.toString(), mappingFile.toString(),
          new Integer(numMappers).toString() };
      NumberTextDocuments tool = new NumberTextDocuments();
      tool.setConf(conf);
      tool.run(arr);

      fs.delete(mappingDir, true);
    }

    // Now we're ready to start the preprocessing pipeline... set
View Full Code Here

    // (sequentially-number integer). If it doesn't exist create it.
    Path mappingFile = env.getDocnoMappingData();
    if (!fs.exists(mappingFile)) {
      sLogger.info(mappingFile + " doesn't exist, creating...");
      String[] arr = new String[] { collection, indexPath + "/medline-docid-tmp",  mappingFile.toString(), new Integer(numMappers).toString() };
      NumberMedlineCitations tool = new NumberMedlineCitations();
      tool.setConf(conf);
      tool.run(arr);

      fs.delete(new Path(indexPath + "/medline-docid-tmp"), true);
    }

    // Now we're ready to start the preprocessing pipeline... set
View Full Code Here

TOP

Related Classes of edu.umd.cloud9.collection.Indexable

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.