Package edu.umd.cloud9.io.map

Examples of edu.umd.cloud9.io.map.HMapIIW


      keyInt = new IntWritable();
      valText = new Text();

      sampleDocnosFile = job.get("SampleDocnosFile");
      if (sampleDocnosFile != null) {
        samplesMap = new HMapIIW();
        try {
          LineReader reader = new LineReader(FileSystem.get(job).open(new Path(sampleDocnosFile)));
          Text t = new Text();
          while (reader.readLine(t) != 0) {
            int docno = Integer.parseInt(t.toString());
View Full Code Here


    public void configure(JobConf job) {
      sLogger.setLevel(Level.DEBUG);
      title2Docno = SequenceFileUtils.readFileIntoMap(new Path(job.get("TitleDocnoFile")));
      sampleDocnosFile = job.get("SampleDocnosFile");
      if (sampleDocnosFile != null) {
        samplesMap = new HMapIIW();
        try {
          LineReader reader = new LineReader(FileSystem.get(job).open(new Path(sampleDocnosFile)));
          Text t = new Text();
          while (reader.readLine(t) != 0) {
            int docno = Integer.parseInt(t.toString());
View Full Code Here

      keyText = new Text();
      valText = new Text();

      sampleDocnosFile = job.get("SampleDocnosFile");
      if (sampleDocnosFile != null) {
        samplesMap = new HMapIIW();
        try {
          LineReader reader = new LineReader(FileSystem.get(job).open(new Path(sampleDocnosFile)));
          Text t = new Text();
          while (reader.readLine(t) != 0) {
            String[] docnos = t.toString().split("\t");
View Full Code Here

      keyInt = new IntWritable();
      valText = new Text();

      sampleDocnosFile = job.get("SampleDocnosFile");
      if (sampleDocnosFile != null) {
        samplesMap = new HMapIIW();
        try {
          LineReader reader = new LineReader(FileSystem.get(job).open(new Path(sampleDocnosFile)));
          Text t = new Text();
          while (reader.readLine(t) != 0) {
            int docno = Integer.parseInt(t.toString());
View Full Code Here

        // TODO Auto-generated catch block
        e2.printStackTrace();
      }
      sampleDocnosFile = job.get("SampleDocnosFile");
      if (sampleDocnosFile != null) {
        samplesMap = new HMapIIW();
        try {
          LineReader reader = new LineReader(FileSystem.get(job).open(new Path(sampleDocnosFile)));
          Text t = new Text();
          while (reader.readLine(t) != 0) {
            int docno = Integer.parseInt(t.toString());
View Full Code Here

      keyText = new Text();
      valText = new Text();

      sampleDocnosFile = job.get("SampleDocnosFile");
      if (sampleDocnosFile != null) {
        samplesMap = new HMapIIW();
        try {
          LineReader reader = new LineReader(FileSystem.get(job).open(new Path(sampleDocnosFile)));
          Text t = new Text();
          while (reader.readLine(t) != 0) {
            String[] docnos = t.toString().split("\t");
View Full Code Here

      // add token translations into a #combine of #weight array structures
      JsonArray tokensArr = new JsonArray();
      if (tokenWeight > 0) {
        for (String srcToken : stemmedSourceTokens) {
          HMapSFW nbestDist = translation.getDistributionOf(srcToken);

          if (defaultTokenizer.isStopWord(srcToken)){
            continue;
          }
          LOG.info("Processing "+srcToken);

          // combine translations from N-best AND bilingual dictionary
          List<PairOfFloatMap> tokenRepresentationList = new ArrayList<PairOfFloatMap>();

          // Pr{bitext}
          if (bitextWeight > 0) {
            HMapSFW bitextDist = clGenerator.getTranslations(origQuery.trim(), srcToken, pairsInGrammar, stemmed2Stemmed);
            if(bitextDist != null && !bitextDist.isEmpty()){
              tokenRepresentationList.add(new PairOfFloatMap(bitextDist, bitextWeight));
            }
          }

          // Pr{scfg}
          if (scfgWeight > 0) {
            HMapSFW scfgDist = scfgGenerator.getTranslations(origQuery.trim(), srcToken, probMap, stemmed2Stemmed);
            if (scfgDist != null && !scfgDist.isEmpty() ){
              tokenRepresentationList.add(new PairOfFloatMap(scfgDist, scfgWeight));
            }
          }

          // Pr{n-best}
          if (mtWeight > 0 && nbestDist != null && !nbestDist.isEmpty()) {
            Utils.normalize(nbestDist);
            tokenRepresentationList.add(new PairOfFloatMap(nbestDist, mtWeight));
          }

          JsonArray combinedArr;
          float scale = 1;
          if (scaling) {
            scale = scale * translation.getSourceTokenCnt().get(srcToken) / ((float)translation.getCount());
          }
          if(tokenRepresentationList.size() == 0) {
            continue;       // if empty distr., do not represent this source token in query
          } else if(tokenRepresentationList.size() == 1) {
            combinedArr = Utils.createJsonArrayFromProbabilities(Utils.scaleProbMap(lexProbThreshold, scale, tokenRepresentationList.get(0).getMap()));
          } else {
            combinedArr = Utils.createJsonArrayFromProbabilities(Utils.combineProbMaps(lexProbThreshold, scale, tokenRepresentationList));
          }

          JsonObject tokenWeightedArr = new JsonObject();         
          tokenWeightedArr.add("#weight", combinedArr);

          // optional: if this source token has occurred more than once per query, reflect this in the representation
          //  for (int i = 0; i < Math.ceil(tokenCount.get(srcToken)/(float)kBest); i++) {
          //    tokensArr.put(tokenWeightedArr);
          //  }
          tokensArr.add(tokenWeightedArr);
        }
        queryTJson.add("#combine", tokensArr);
      }

      // combine the token-based and phrase-based representations into a #combweight structure
      JsonArray queryJsonArr = new JsonArray();

      HMapSFW scaledPhrase2Weight = null;
      if (phraseWeight > 0) {
        scaledPhrase2Weight = Utils.scaleProbMap(lexProbThreshold, phraseWeight, translation.getPhraseDist());     
        for (String phrase : scaledPhrase2Weight.keySet()) {
          queryJsonArr.add(new JsonPrimitive(scaledPhrase2Weight.get(phrase)));
          queryJsonArr.add(new JsonPrimitive(phrase));
        }
      }
      if (tokenWeight > 0) {
        queryJsonArr.add(new JsonPrimitive(tokenWeight));
View Full Code Here

    }  
    return probMap;
  }

  private String getBestTranslation(String query, String token) {
    HMapSFW probDist = query2probMap.get(query).get(token);

    if(probDist == null){
      return token;
    }

    float maxProb = 0f;
    String maxProbTrans = null;
    for (edu.umd.cloud9.util.map.MapKF.Entry<String> entry : probDist.entrySet()) {
      if (entry.getValue() > maxProb) {
        maxProb = entry.getValue();
        maxProbTrans = entry.getKey();
      }
    }
View Full Code Here

    }
    return maxProbTrans;
  }

  protected HMapSFW getTranslations(String query, String token, Map<String, HMapSFW> probMap, Map<String, String> stemmed2Stemmed) {
    HMapSFW probDist = null;
    try {
      probDist = probMap.get(token);
    } catch (NullPointerException e) {
      LOG.info("Prob map not found for " + query);
      e.printStackTrace();
    }
   
    if(probDist == null){
      // borrow OOV word heuristic from MT: if no translation found, include itself as translation
      probDist = new HMapSFW();
      String targetStem = stemmed2Stemmed.get(token);
      String target = (stemmed2Stemmed == null || targetStem == null) ? token : stemmed2Stemmed.get(token);
      probDist.put(target, 1);     
      return probDist;
    }

    return probDist;
  }
View Full Code Here

    });
    Interp_AP.put(2, new String[] {
        "78", "0.3167","77", "0.2599","35", "0.0019","36", "0.0033","33", "0.3573","39", "0.1078","38", "0.0","43", "0.0679","42", "0.2039","41", "0.147","40", "1.0E-4","82", "0.3175","83", "0.1541","80", "0.082","87", "0.291","84", "0.257","85", "0.0732","67", "0.1302","66", "0.0092","69", "0.0","68", "0.6626","23", "0.1562","26", "0.125","28", "0.4415","29", "0.1302","2", "0.1573","30", "0.2587","6", "0.0582","5", "0.0","32", "0.3711","70", "0.04","9", "0.2236","71", "0.5076","72", "0.383","73", "0.0065","74", "0.0679","75", "0.0034","76", "0.1072","59", "0.0353","58", "0.0933","57", "0.378","19", "0.2664","56", "0.2486","18", "0.0453","15", "0.4233","16", "0.0514","12", "0.0459","64", "0.4827","65", "0.4462","62", "0.5034","63", "0.0139","99", "0.267","61", "0.0392","100", "0.0264","98", "0.3932","49", "0.0026","97", "0.0025","48", "2.0E-4","96", "0.03","95", "3.0E-4","94", "0.4591","45", "0.3061","93", "0.0957","44", "0.2947","92", "0.2229","47", "0.0","91", "0.6519","46", "0.2098","90", "0.4246","51", "0.0","52", "0.1495","53", "0.1198","54", "0.2776"
    });

    HMapSFW tenbestAPMap = array2Map(Nbest_AP.get(2));
    HMapSFW onebestAPMap = array2Map(Onebest_AP.get(2));
    HMapSFW grammarAPMap = array2Map(grammar_AP.get(2));
    HMapSFW tokenAPMap = array2Map(baseline_token_AP);
    HMapSFW gridAPMap = array2Map(Interp_AP.get(2));
    System.out.println(countNumberOfImprovedTopics(tokenAPMap, gridAPMap));
    System.out.println(countNumberOfImprovedTopics(tokenAPMap, tenbestAPMap));
    System.out.println(countNumberOfImprovedTopics(tokenAPMap, onebestAPMap));
    System.out.println(countNumberOfImprovedTopics(tokenAPMap, grammarAPMap));
    System.out.println(countNumberOfNegligibleTopics(tokenAPMap, gridAPMap));
View Full Code Here

TOP

Related Classes of edu.umd.cloud9.io.map.HMapIIW

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.