Package edu.umd.cloud9.io.map

Examples of edu.umd.cloud9.io.map.String2IntOpenHashMapWritable


    assertEquals(22, m.getInt("there"));
  }

  @Test
  public void testAccent() throws IOException {
    String2IntOpenHashMapWritable map1 = new String2IntOpenHashMapWritable();

    // '\u00E0': [LATIN SMALL LETTER A WITH GRAVE]
    // '\u00E6': [LATIN SMALL LETTER AE]
    // '\u00E7': [LATIN SMALL LETTER C WITH CEDILLA]
    // '\u00FC': [LATIN SMALL LETTER U WITH DIAERESIS]

    map1.put("\u00E0", 1);
    map1.put("\u00E6", 2);
    map1.put("\u00E7", 3);
    map1.put("\u00FC", 4);

    assertEquals(1, map1.getInt("\u00E0"));
    assertEquals(2, map1.getInt("\u00E6"));
    assertEquals(3, map1.getInt("\u00E7"));
    assertEquals(4, map1.getInt("\u00FC"));

    map1.increment("\u00E0");
    map1.increment("\u00E6");
    map1.increment("\u00E7");
    map1.increment("\u00FC");

    assertEquals(2, map1.getInt("\u00E0"));
    assertEquals(3, map1.getInt("\u00E6"));
    assertEquals(4, map1.getInt("\u00E7"));
    assertEquals(5, map1.getInt("\u00FC"));

    map1.put("\u00E0", 10);
    map1.remove("\u00E6");
    map1.remove("\u00E7");
    map1.put("\u00E7", 2);
    map1.increment("\u00FC");

    assertEquals(10, map1.getInt("\u00E0"));
    assertEquals(2, map1.getInt("\u00E7"));
    assertEquals(6, map1.getInt("\u00FC"));

    assertEquals(3, map1.size());

    // Test serialization
    String2IntOpenHashMapWritable map2 = String2IntOpenHashMapWritable.create(map1.serialize());
    assertEquals(10, map2.getInt("\u00E0"));
    assertEquals(2, map2.getInt("\u00E7"));
    assertEquals(6, map2.getInt("\u00FC"));
  }
View Full Code Here


    assertEquals(6, map2.getInt("\u00FC"));
  }

  @Test
  public void testJp() throws IOException {
    String2IntOpenHashMapWritable map1 = new String2IntOpenHashMapWritable();
    BufferedReader in = new BufferedReader(new InputStreamReader(
        new FileInputStream("etc/jp-sample.txt"), "UTF8"));

    List<String> list = Lists.newArrayList();
    int cnt = 0;
    String line;
    while ((line = in.readLine()) != null) {
      list.add(line);
      map1.put(line, cnt++);
    }

    for (int i = 0; i < list.size(); i++) {
      assertEquals(i, map1.getInt(list.get(i)));
    }
    assertEquals(5, map1.size());

    for (int i = 0; i < list.size(); i++) {
      map1.increment(list.get(i));
    }
    assertEquals(5, map1.size());

    for (int i = 0; i < list.size(); i++) {
      assertEquals(i + 1, map1.getInt(list.get(i)));
    }
    assertEquals(5, map1.size());

    // Test serialization
    String2IntOpenHashMapWritable map2 = String2IntOpenHashMapWritable.create(map1.serialize());
    for (int i = 0; i < list.size(); i++) {
      assertEquals(i + 1, map2.getInt(list.get(i)));
    }
    assertEquals(5, map2.size());

    for (int i = 0; i < list.size(); i++) {
      map2.remove(list.get(i));
    }
    assertEquals(5, map1.size());
    assertEquals(0, map2.size());

    in.close();
  }
View Full Code Here

      return null;
    }
    PriorityQueue<PairOfFloatInt> eS = f2eProbs.get(f).getTranslationsWithProbs(lexProbThreshold);

    if (!eS.isEmpty()) {
      PairOfFloatInt entry = eS.poll();
      int e = entry.getRightElement();
      String eTerm = eVocab_f2e.get(e);
      return eTerm;
    }
    return token;
  }
View Full Code Here

    float sumProbEF = 0;
    int numTrans = 0;
    //tf(e) = sum_f{tf(f)*prob(e|f)}
    while (numTrans < numTransPerToken && !eS.isEmpty()) {
      PairOfFloatInt entry = eS.poll();
      float probEF = entry.getLeftElement();
      int e = entry.getRightElement();
      String eTerm = eVocab_f2e.get(e);

      //      LOG.info("Pr("+eTerm+"|"+token+")="+probEF);

      if (probEF > 0 && e > 0 && !docLangTokenizer.isStopWord(eTerm) && (translateOnly == null || !translateOnly.equals("indri") || indriPuncPattern.matcher(eTerm).matches()) && (pairsInSCFG == null || pairsInSCFG.contains(new PairOfStrings(token,eTerm)))) {     
View Full Code Here

            curIndex = prevIndex;    // revert curIndex value since we're skipping this one
            skipTerm = true;
            continue;
          }
          logger.debug("Processing: "+srcTerm+" with index: "+curIndex);     
          topTrans.add(new PairOfFloatString(prob, trgTerm));
          sumOfProbs += prob;
          logger.debug("Added to queue: "+trgTerm+" with prob: "+prob+" (sum: "+sumOfProbs+")");     
        }else if(!earlyTerminate && !skipTerm && !delims.contains(srcTerm)){  //continue adding translation term,prob pairs (except if early termination is ON)
          topTrans.add(new PairOfFloatString(prob, trgTerm));
          sumOfProbs += prob;
          logger.debug("Added to queue: "+trgTerm+" with prob: "+prob+" (sum: "+sumOfProbs+")");     

          // keep top numTrans translations
          if(topTrans.size() > numTrans){
            PairOfFloatString pair = topTrans.pollFirst();
            float removedProb = pair.getLeftElement();
            sumOfProbs -= removedProb;
            logger.debug("Removed from queue: "+pair.getRightElement()+" (sum: "+sumOfProbs+")");     
          }
        }else{
          logger.debug("Skipped line: "+line);
        }
      }
View Full Code Here

          continue;
        }
        prob = ttable.get(srcIndex, trgIndex);
        logger.debug("Found: " + trgTerm + " with " + prob);

        topTrans.add(new PairOfFloatString(prob, trgTerm));
        // keep top numTrans translations
        if (topTrans.size() > numTrans) {
          float removedProb = topTrans.pollFirst().getLeftElement();
          sumOfProbs -= removedProb;
        }
View Full Code Here

    List<Integer> sortedIndices = new ArrayList<Integer>();
    HMapIF index2ProbMap = new HMapIF();

    float sumOfProbs = 0.0f;    //only extract the top K<15 if the mass prob. exceeds MAX_probThreshold
    while(!topTrans.isEmpty() && sumOfProbs < cumProbThreshold){
      PairOfFloatString e = topTrans.pollLast();
      String term = e.getRightElement();
      float pr = e.getLeftElement()/cumProb;    // normalize
      logger.debug(term+"-->"+pr);
      int trgIndex = trgVocab.addOrGet(term);
      sumOfProbs += e.getLeftElement();         // keep track of unnormalized cumulative prob for determining cutoff
      sortedIndices.add(trgIndex);
      index2ProbMap.put(trgIndex, pr);
    }

    // to enable faster access with binary search, we sort entries by vocabulary index.
View Full Code Here

          int e2 = eVocabTrg.get(eTerm);        

          float prob2 = f2e_Probs.get(f2, e2);
          float prob = prob1*prob2;
          sumOfProbs += prob;
          topTrans.add(new PairOfFloatString(prob, fTerm));
        }
        logger.info("Adding "+eTerm);
        addToTable(e1, topTrans, sumOfProbs, table, fVocabTrg, 1.0f, stats);     
      }
      logger.info(stats);
View Full Code Here

            curIndex = prevIndex;    // revert curIndex value since we're skipping this one
            skipTerm = true;
            continue;
          }
          logger.debug("Processing: "+srcTerm+" with index: "+curIndex);     
          topTrans.add(new PairOfFloatString(prob, trgTerm));
          sumOfProbs += prob;
          logger.debug("Added to queue: "+trgTerm+" with prob: "+prob+" (sum: "+sumOfProbs+")");     
        }else if(!earlyTerminate && !skipTerm && !delims.contains(srcTerm)){  //continue adding translation term,prob pairs (except if early termination is ON)
          topTrans.add(new PairOfFloatString(prob, trgTerm));
          sumOfProbs += prob;
          logger.debug("Added to queue: "+trgTerm+" with prob: "+prob+" (sum: "+sumOfProbs+")");     

          // keep top numTrans translations
          if(topTrans.size() > numTrans){
            PairOfFloatString pair = topTrans.pollFirst();
            float removedProb = pair.getLeftElement();
            sumOfProbs -= removedProb;
            logger.debug("Removed from queue: "+pair.getRightElement()+" (sum: "+sumOfProbs+")");     
          }
        }else{
          logger.debug("Skipped line: "+line);
        }
        //        // line processed: check if early terminate
View Full Code Here

      logger.debug("Processing: "+srcTerm+" with index: "+curIndex+" ("+srcIndex+")");
      for(int trgIndex : translations){
        trgTerm = trgVocab.get(trgIndex);
        prob = ttable.get(srcIndex, trgIndex);

        topTrans.add(new PairOfFloatString(prob, trgTerm));
        // keep top numTrans translations
        if(topTrans.size() > numTrans){
          float removedProb = topTrans.pollFirst().getLeftElement();
          sumOfProbs -= removedProb;
        }
View Full Code Here

TOP

Related Classes of edu.umd.cloud9.io.map.String2IntOpenHashMapWritable

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.