Package edu.umd.cloud9.collection.wikipedia

Examples of edu.umd.cloud9.collection.wikipedia.WikipediaDocnoMapping


          reader.close();
        } catch (IOException e1) {
        }
      }

      mDocMapping = new WikipediaDocnoMapping();
      FileSystem fs;
      try {
        fs = FileSystem.get(job);
        String indexPath = job.get("Ivory.IndexPath");
        RetrievalEnvironment env = new RetrievalEnvironment(indexPath, fs);
View Full Code Here


          reader.close();
        } catch (IOException e1) {
        }
      }

      mDocMapping = new WikipediaDocnoMapping();
      FileSystem fs;
      try {
        fs = FileSystem.get(job);
        String indexPath = job.get("Ivory.IndexPath");
        RetrievalEnvironment env = new RetrievalEnvironment(indexPath, fs);
View Full Code Here

        sLogger.info("Loaded " + samplesMap.size() + " samples");
      } else {
        sLogger.info("No sample file read.");
      }

      mDocMapping = new WikipediaDocnoMapping();
      FileSystem fs;
      try {
        fs = FileSystem.get(job);
        String indexPath = job.get("Ivory.IndexPath");
        RetrievalEnvironment env = new RetrievalEnvironment(indexPath, fs);
View Full Code Here

          reader.close();
        } catch (IOException e1) {
        }
      }

      mDocMapping = new WikipediaDocnoMapping();
      FileSystem fs;
      try {
        fs = FileSystem.get(job);
        String indexPath = job.get("Ivory.IndexPath");
        RetrievalEnvironment env = new RetrievalEnvironment(indexPath, fs);
View Full Code Here

          reader.close();
        } catch (IOException e1) {
        }
      }

      mDocMapping = new WikipediaDocnoMapping();
      FileSystem fs;
      try {
        fs = FileSystem.get(job);
        String indexPath = job.get("Ivory.IndexPath");
        RetrievalEnvironment env = new RetrievalEnvironment(indexPath, fs);
View Full Code Here

        sLogger.info("Loaded " + samplesMap.size() + " samples");
      } else {
        sLogger.info("No sample file read.");
      }

      mDocMapping = new WikipediaDocnoMapping();
      FileSystem fs;
      try {
        fs = FileSystem.get(job);
        String indexPath = job.get("Ivory.IndexPath");
        RetrievalEnvironment env = new RetrievalEnvironment(indexPath, fs);
View Full Code Here

          reader.close();
        } catch (IOException e1) {
        }
      }

      mDocMapping = new WikipediaDocnoMapping();
      FileSystem fs;
      try {
        fs = FileSystem.get(job);
        String indexPath = job.get("Ivory.IndexPath");
        RetrievalEnvironment env = new RetrievalEnvironment(indexPath, fs);
View Full Code Here

        if (fileStats[i].getPath().getName().startsWith("_")) {
          continue;
        }

        LOG.info("processing " + fileStats[i].getPath());
        FSLineReader reader = new FSLineReader(fileStats[i].getPath(), fs);

        Text line = new Text();
        while (reader.readLine(line) > 0) {
          String[] arr = line.toString().split("\\t+", 2);

          int docno = Integer.parseInt(arr[0]);
          int len = Integer.parseInt(arr[1]);

          // Note that because of speculative execution there may be
          // multiple copies of doclength data. Therefore, we can't
          // just count number of doclengths read. Instead, keep track
          // of largest docno encountered.
          if (docno < docnoOffset) {
            throw new RuntimeException(
                "Error: docno " + docno + " < docnoOffset " + docnoOffset + "!");
          }

          doclengths[docno - docnoOffset] = len;

          if (docno > maxDocno) {
            maxDocno = docno;
          }
          if (docno < minDocno) {
            minDocno = docno;
          }
        }
        reader.close();
        context.getCounter(DocLengths.Files).increment(1);
      }

      LOG.info("min docno: " + minDocno);
      LOG.info("max docno: " + maxDocno);
View Full Code Here

   *     FileSystem object
   * @return
   *     mapping from term ids to df values
   */
  public static HMapIFW readTransDfTable(Path path, FileSystem fs) {
    HMapIFW transDfTable = new HMapIFW();
    try {
      SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, fs.getConf());

      IntWritable key = (IntWritable) reader.getKeyClass().newInstance();
      FloatWritable value = (FloatWritable) reader.getValueClass().newInstance();

      while (reader.next(key, value)) {
        transDfTable.put(key.get(), value.get());
        //        logger.info(key.get()+"-->"+value.get());
        key = (IntWritable) reader.getKeyClass().newInstance();
        value = (FloatWritable) reader.getValueClass().newInstance();
      }
      reader.close();
View Full Code Here

   *     ttable E-->F (i.e., Pr(f|e))
   * @return
   *     mapping from E-terms to their computed df values
   */
  public static HMapIFW translateDFTable(Vocab eVocabSrc, Vocab fVocabTrg, TTable_monolithic_IFAs e2f_probs, FrequencySortedDictionary dict, DfTableArray dfTable){
    HMapIFW transDfTable = new HMapIFW();
    for(int e=1;e<eVocabSrc.size();e++){
      int[] fS = e2f_probs.get(e).getTranslations(0.0f);
      float df=0;
      for(int f : fS){
        float probEF = e2f_probs.get(e, f);
        String fTerm = fVocabTrg.get(f);
        int id = dict.getId(fTerm);
        if(id != -1){
          float df_f = dfTable.getDf(id);       
          df += (probEF*df_f);
        }else{
          logger.debug(fTerm+" not in dict");
        }
      }
      transDfTable.put(e, df);
    }
    return transDfTable;
  }
View Full Code Here

TOP

Related Classes of edu.umd.cloud9.collection.wikipedia.WikipediaDocnoMapping

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.