Package edu.umd.cloud9.util

Examples of edu.umd.cloud9.util.StackOfInts


    // now, read tokens in first sentence and keep track of sequences of uppercased tokens in buffer
    HashSet<String> upperCaseMap1 = getUppercaseParts(tokens1);
    HashSet<String> upperCaseMap2 = getUppercaseParts(tokens2);
    float cntUpperRatio1 = getRatio(upperCaseMap1, upperCaseMap2);
    float cntUpperRatio2 = getRatio(upperCaseMap2, upperCaseMap1);
    PairOfFloats result = new PairOfFloats(cntUpperRatio1, cntUpperRatio2);
    return result;
  }
View Full Code Here


                String term = m2.group(1);
                if ( !term.equals("NULL") ) {
                  float prob = Float.parseFloat(m2.group(2));
                  int engIndex = trgVocab.addOrGet(term);
                  logger.debug("Added: "+term+" with index: "+engIndex+" and prob:"+prob);
                  indexProbPairs.add(new PairOfIntFloat(engIndex, prob));
                  sumOfProbs += prob;
                }
              }
            }
            // if number of translations not set, we never cut-off, so all cases are long tails
View Full Code Here

                String term = m2.group(1);
                if (!term.equals("NULL")) {
                  float prob = Float.parseFloat(m2.group(2));
                  int engIndex = trgVocab.addOrGet(term);
                  logger.debug("Added: "+term+" with index: "+engIndex+" and prob:"+prob);
                  indexProbPairs.add(new PairOfIntFloat(engIndex, prob));
                  sumOfProbs+=prob;
                }
              }
            }
            if(sumOfProbs > probThreshold){
View Full Code Here

    @Override
    public void reduce(Text key, Iterable<PairOfIntLong> values, Context context)
        throws IOException, InterruptedException {
      String term = key.toString();
      Iterator<PairOfIntLong> iter = values.iterator();
      PairOfIntLong p = iter.next();
      int df = p.getLeftElement();
      long cf = p.getRightElement();
      WritableUtils.writeVInt(dfByTermOut, df);
      WritableUtils.writeVLong(cfByTermOut, cf);
      if (iter.hasNext()) {
        throw new RuntimeException("More than one record for term: " + term);
      }
View Full Code Here

      // map from the id back to text
      // sLogger.info("termid: " + key);
      String term = mTermIdMap.getTerm(key.get());
      // sLogger.info("term: " + term);
      PairOfIntLong pair = gs.getStats(term);

      if (pair == null) {
        p.setCf(-1);
        p.setDf(-1);
      } else {
        p.setCf(pair.getRightElement());
        p.setDf(pair.getLeftElement());
      }

      output.collect(key, p);
    }
View Full Code Here

      float sumProb2 = 0;
      for (Entry<String> entry : probDist.entrySet()) {
        float pr = entry.getValue() / sumProb;
        if (pr > lexProbThreshold) {
          sumProb2 += pr;
          sortedFilteredProbDist.add(new PairOfStringFloat(entry.getKey(), pr));
        }
      }

      // re-normalize values after removal of low-prob entries
      float cumProb = 0;
      int cnt = 0;
      while (cnt < maxNumTrans && cumProb < cumProbThreshold && !sortedFilteredProbDist.isEmpty()) {
        PairOfStringFloat entry = sortedFilteredProbDist.pollLast();
        float pr = entry.getValue() / sumProb2;
        cumProb += pr;
        normProbDist.put(entry.getKey(), pr);
        cnt++;
      }

      probMap.put(sourceTerm, normProbDist);
    }
View Full Code Here

        String[] parts = rule.split("\\|\\|\\|");
        String[] lhs = parts[0].trim().split(" ");
        String[] rhs = parts[1].trim().split(" ");;
        for (String l : lhs) {
          for (String r : rhs) {
            pairsInSCFG.add(new PairOfStrings(l, r));
          }
        }
      }
    } catch (UnsupportedEncodingException e) {
      e.printStackTrace();
View Full Code Here

      int e = entry.getRightElement();
      String eTerm = eVocab_f2e.get(e);

      //      LOG.info("Pr("+eTerm+"|"+token+")="+probEF);

      if (probEF > 0 && e > 0 && !docLangTokenizer.isStopWord(eTerm) && (translateOnly == null || !translateOnly.equals("indri") || indriPuncPattern.matcher(eTerm).matches()) && (pairsInSCFG == null || pairsInSCFG.contains(new PairOfStrings(token,eTerm)))) {     
        // assuming our bilingual dictionary is learned from normally segmented text, but we want to use bigram tokenizer for CLIR purposes
        // then we need to convert the translations of each source token into a sequence of bigrams
        // we can distribute the translation probability equally to the each bigram
        if (bigramSegment) {
          String[] eTokens = docLangTokenizer.processContent(eTerm);
View Full Code Here

        quicksort(keys, counts, i+1, right);
    }
   
    public static void quicksortWithStack(short[] keys, int[] counts, int left, int right) {
        if (right <= left) return;
      StackOfInts lStack = new StackOfInts(), rStack = new StackOfInts();
     
      lStack.push(left);
      rStack.push(right);
        while(!lStack.isEmpty()){
          left = lStack.pop();
          right = rStack.pop();
          int i = partition(keys, counts, left, right);
          if (i-1 > left){
            lStack.push(left);
            rStack.push(i-1);
          }
          if(right > i+1){
            lStack.push(i+1);
            rStack.push(right);
          }
        }
    }
View Full Code Here

        }
    }
   
    public static void quicksortWithStack(Object[] keys, int[] counts, int left, int right) {
        if (right <= left) return;
      StackOfInts lStack = new StackOfInts(), rStack = new StackOfInts();
     
      lStack.push(left);
      rStack.push(right);
        while(!lStack.isEmpty()){
          left = lStack.pop();
          right = rStack.pop();
          int i = partition(keys, counts, left, right);
          if (i-1 > left){
            lStack.push(left);
            rStack.push(i-1);
          }
          if(right > i+1){
            lStack.push(i+1);
            rStack.push(right);
          }
        }
    }
View Full Code Here

TOP

Related Classes of edu.umd.cloud9.util.StackOfInts

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.