Package edu.umd.cloud9.io

Examples of edu.umd.cloud9.io.FSLineReader


      }

      if(localFiles != null && localFiles.length > 0){
        samplesMap = new HMapIIW();
        try {
          FSLineReader reader = new FSLineReader(conf.get("Ivory.SampleFile"), FileSystem.get(conf));
          Text t = new Text();
          while(reader.readLine(t)!=0){
            int docno = Integer.parseInt(t.toString());
            sLogger.info(docno + " --> sample");
            samplesMap.put(docno, 1);
          }
          reader.close();
        } catch (IOException e1) {
        }
        sLogger.info(samplesMap.size()+" sampled");
      }else{
        sLogger.info("samples file does not exist");
View Full Code Here


      }

      if(localFiles != null && localFiles.length > 0){
        samplesMap = new HMapIIW();
        try {
          FSLineReader reader = new FSLineReader(localFiles[0], FileSystem.getLocal(job));
          Text t = new Text();
          while(reader.readLine(t)!=0){
            int docno = Integer.parseInt(t.toString());
            sLogger.info(docno + " --> sample");
            samplesMap.put(docno, 1);
          }
          reader.close();
        } catch (IOException e1) {
        }
        sLogger.info(samplesMap.size()+" sampled");
      }else{
        sLogger.info("samples file not specified in local cache");
View Full Code Here

      }

      if(localFiles != null && localFiles.length > 0){
        samplesMap = new HMapIIW();
        try {
          FSLineReader reader = new FSLineReader(localFiles[0], FileSystem.getLocal(job));
          Text t = new Text();
          while(reader.readLine(t)!=0){
            int docno = Integer.parseInt(t.toString());
            sLogger.info(docno + " --> sample");
            samplesMap.put(docno, 1);
          }
          reader.close();
        } catch (IOException e1) {
        }
        sLogger.info(samplesMap.size()+" sampled");
      }else{
        sLogger.info("samples file not specified in option SampleDocnosFile");
View Full Code Here

     
      sampleDocnosFile = job.get("SampleDocnosFile");
      if(sampleDocnosFile!=null){
        samplesMap = new HMapIIW();
        try {
          FSLineReader reader = new FSLineReader(sampleDocnosFile);
          Text t = new Text();
          while(reader.readLine(t)!=0){
            int docno = Integer.parseInt(t.toString());
            samplesMap.put(docno, 1);
          }
          reader.close();
        } catch (IOException e1) {
        }
      }
     
      mDocMapping = new WikipediaDocnoMapping();
View Full Code Here

      title2Docno = SequenceFileUtils.readFileIntoMap(new Path(job.get("TitleDocnoFile")));
      sampleDocnosFile = job.get("SampleDocnosFile");
      if(sampleDocnosFile!=null){
        samplesMap = new HMapIIW();
        try {
          FSLineReader reader = new FSLineReader(sampleDocnosFile);
          Text t = new Text();
          while(reader.readLine(t)!=0){
            int docno = Integer.parseInt(t.toString());
            samplesMap.put(docno, 1);
          }
          reader.close();
        } catch (IOException e1) {
        }
        sLogger.info("Loaded "+samplesMap.size()+" samples");
      }else{
        sLogger.info("No sample file read.");
View Full Code Here

     
      sampleDocnosFile = job.get("SampleDocnosFile");
      if(sampleDocnosFile!=null){
        samplesMap = new HMapIIW();
        try {
          FSLineReader reader = new FSLineReader(sampleDocnosFile);
          Text t = new Text();
          while(reader.readLine(t)!=0){
            String[] docnos = t.toString().split("\t");
            germandocnos.add(Integer.parseInt(docnos[0]));
            englishdocnos.add(Integer.parseInt(docnos[1]));
          }
          reader.close();
        } catch (IOException e1) {
        }
      }
     
      mDocMapping = new WikipediaDocnoMapping();
View Full Code Here

   *     FileSystem object
   * @return
   *     mapping from term ids to df values
   */
  public static HMapIFW readTransDfTable(Path path, FileSystem fs) {
    HMapIFW transDfTable = new HMapIFW();
    try {
      SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, fs.getConf());

      IntWritable key = (IntWritable) reader.getKeyClass().newInstance();
      FloatWritable value = (FloatWritable) reader.getValueClass().newInstance();

      while (reader.next(key, value)) {
        transDfTable.put(key.get(), value.get());
        //        logger.info(key.get()+"-->"+value.get());
        key = (IntWritable) reader.getKeyClass().newInstance();
        value = (FloatWritable) reader.getValueClass().newInstance();
      }
      reader.close();
View Full Code Here

   *     ttable E-->F (i.e., Pr(f|e))
   * @return
   *     mapping from E-terms to their computed df values
   */
  public static HMapIFW translateDFTable(Vocab eVocabSrc, Vocab fVocabTrg, TTable_monolithic_IFAs e2f_probs, FrequencySortedDictionary dict, DfTableArray dfTable){
    HMapIFW transDfTable = new HMapIFW();
    for(int e=1;e<eVocabSrc.size();e++){
      int[] fS = e2f_probs.get(e).getTranslations(0.0f);
      float df=0;
      for(int f : fS){
        float probEF = e2f_probs.get(e, f);
        String fTerm = fVocabTrg.get(f);
        int id = dict.getId(fTerm);
        if(id != -1){
          float df_f = dfTable.getDf(id);       
          df += (probEF*df_f);
        }else{
          logger.debug(fTerm+" not in dict");
        }
      }
      transDfTable.put(e, df);
    }
    return transDfTable;
  }
View Full Code Here

   *     mapping from F-terms to their df values
   * @return
   *     mapping from E-terms to their computed df values
   */
  public static HMapIFW translateDFTable(Vocab eVocabSrc, Vocab fVocabTrg, TTable_monolithic_IFAs e2f_probs, HMapSIW dfs){
    HMapIFW transDfTable = new HMapIFW();
    for(int e=1;e<eVocabSrc.size();e++){
      int[] fS = null;
      try {
        fS = e2f_probs.get(e).getTranslations(0.0f);
      } catch (Exception e1) {
        e1.printStackTrace();
      }
      float df=0;
      for(int f : fS){
        float probEF = e2f_probs.get(e, f);
        String fTerm = fVocabTrg.get(f);
        if(!dfs.containsKey(fTerm)){  //only if word is in the collection, can it contribute to the df values.
          continue;
        }     
        float df_f = dfs.get(fTerm);
        df+=(probEF*df_f);
      }
      transDfTable.put(e, df);
    }
    return transDfTable;
  }
View Full Code Here

        if (e1.getDocno() < mBlockStart)
          continue;
        if (e1.getDocno() >= mBlockEnd)
          break;

        HMapIFW map = new HMapIFW();

        sLogger.debug(key + ": " + e1);

        PostingsReader reader2 = postings.getPostingsReader();

        while (reader2.nextPosting(e2)) {

          sLogger.debug(key + ": " + e1 + ", " + e2);

          if (e1.getDocno() == e2.getDocno())
            continue;

          // compute partial score of similarity for a pair of
          // documents
          float weight = mModel.computeScore(e1.getScore(), e2.getScore(),
              mDocLengthTable.getDocLength(e1.getDocno()), mDocLengthTable
                  .getDocLength(e2.getDocno()));

          map.put(e2.getDocno(), weight);
        }
        output.collect(new IntWritable(e1.getDocno()), map);
      }
    }
View Full Code Here

TOP

Related Classes of edu.umd.cloud9.io.FSLineReader

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.