Package ivory.regression.coling2012

Source Code of ivory.regression.coling2012.EnFr_CLEF06

package ivory.regression.coling2012;

import ivory.core.eval.Qrels;
import ivory.regression.GroundTruth;
import ivory.regression.GroundTruth.Metric;
import ivory.smrf.retrieval.Accumulator;
import ivory.sqe.retrieval.Constants;
import ivory.sqe.retrieval.QueryEngine;
import ivory.sqe.retrieval.RunQueryEngine;

import java.util.Map;
import java.util.Set;

import junit.framework.JUnit4TestAdapter;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.log4j.Logger;
import org.junit.Test;

import com.google.common.collect.Maps;

import edu.umd.cloud9.collection.DocnoMapping;
import edu.umd.cloud9.io.map.HMapSFW;

public class EnFr_CLEF06 {
  private static final Logger LOG = Logger.getLogger(EnFr_CLEF06.class);
  private QueryEngine qe;

  private static String[] baseline_token_p005_c95_Fr_CLEF06_AP = new String[] {
    "338", "0.0336","339", "0.4941","332", "0.0","333", "0.8719","330", "0.6","331", "0.2254","336", "0.5","337", "0.6051","334", "0.4261",
    "335", "0.3699","349", "0.3297","302", "0.2168","301", "0.4115","304", "0.2737","303", "0.2342","306", "0.0442","341", "0.4332","305", "0.1813",
    "342", "0.0641","308", "0.2852","343", "0.1029","307", "0.1168","344", "0.3045","345", "0.3336","309", "0.0134","346", "0.0499","347", "0.0647",
    "348", "0.5605","340", "0.1798","318", "0.4432","319", "0.5609","316", "0.3711","317", "0.0334","314", "0.0666","315", "0.1825","312", "0.2237",
    "313", "0.2365","310", "0.0033","311", "0.0668","327", "0.225","328", "0.4386","329", "0.7745","323", "0.0775","324", "0.0","325", "0.0104",
    "326", "0.0","320", "0.0707","350", "0.4197","321", "0.4164","322", "0.1389"};

  private static String[] phrase_p005_c95_Fr_CLEF06_AP = new String[] {
    "338", "0.0901","339", "0.4803","332", "0.0","333", "0.864","330", "0.6","331", "0.2199","336", "0.125","337", "0.6117","334", "0.1691",
    "335", "0.5367","349", "0.3406","302", "0.247","301", "0.4428","304", "0.1994","303", "0.2011","341", "0.3399","306", "0.0473","305", "0.1864",
    "342", "0.3393","343", "0.1077","308", "0.338","344", "0.3513","307", "0.1258","345", "0.2542","346", "0.0199","309", "0.0135","347", "0.0707",
    "348", "0.5583","340", "0.1778","318", "0.5155","319", "0.4582","316", "0.3671","317", "0.0724","314", "0.0709","315", "0.3667","312", "0.2406",
    "313", "0.3657","310", "0.0048","311", "0.0736","327", "0.6314","328", "0.454","329", "0.7832","323", "0.0563","324", "0.0","325", "0.0175",
    "326", "0.7436","320", "0.0359","321", "0.416","350", "0.4443","322", "0.1622"};

  private static String[] Nbest_p005_c95_Fr_CLEF06_AP = new String[] {
    "338", "0.1695","339", "0.4548","332", "0.0","333", "0.8713","330", "0.6","331", "0.2192","336", "1.0","337", "0.6015","334", "0.4628",
    "335", "0.3373","349", "0.3252","302", "0.1983","301", "0.4306","304", "0.3629","303", "0.1883","341", "0.0033","306", "0.0484","305", "0.18",
    "342", "0.3465","343", "0.103","308", "0.3853","344", "0.3619","307", "0.077","345", "0.2182","346", "0.0235","309", "0.013","347", "0.059",
    "348", "0.5502","340", "0.1785","318", "0.5062","319", "0.5747","316", "0.3903","317", "0.109","314", "0.0572","315", "0.3695","312", "0.2193",
    "313", "0.3221","310", "0.0084","311", "0.0512","327", "0.485","328", "0.4048","329", "0.7542","323", "0.0225","324", "0.0","325", "0.0068",
    "326", "0.7667","320", "0.1571","321", "0.4218","350", "0.4028","322", "0.0963"};

  private static String[] Onebest_p005_c95_Fr_CLEF06_AP = new String[] {
    "338", "0.1263","339", "0.4548","332", "0.0","333", "0.8713","330", "0.6","331", "0.2192","336", "0.0476","337", "0.6015","334", "0.4628",
    "335", "0.3373","349", "0.3281","302", "0.1983","301", "0.4314","304", "0.361","303", "0.1883","341", "0.0029","306", "0.0484","305", "0.1801",
    "342", "0.3465","343", "0.103","308", "0.3853","344", "0.3619","307", "0.0525","345", "0.4165","346", "0.0233","309", "0.013","347", "0.059",
    "348", "0.549","340", "0.227","318", "0.5062","319", "0.5752","316", "0.3894","317", "0.1043","314", "0.0447","315", "0.3695","312", "0.2193",
    "313", "0.318","310", "0.0084","311", "0.0512","327", "0.4512","328", "0.4048","329", "0.7542","323", "0.0076","324", "0.0","325", "0.0068",
    "326", "0.7778","320", "0.2405","321", "0.4218","350", "0.4028","322", "0.0963"};

  private static String[] Gridbest_p005_c95_Fr_CLEF06_AP = new String[] {
    "338", "0.0751","339", "0.4793","332", "0.0","333", "0.8726","330", "0.6","331", "0.2212","336", "1.0","337", "0.6063","334", "0.3359","335", "0.4889",
    "349", "0.3459","302", "0.2194","301", "0.4427","304", "0.3067","303", "0.2038","341", "0.2185","306", "0.0478","305", "0.1892","342", "0.3332",
    "343", "0.1009","308", "0.356","344", "0.3147","307", "0.0879","345", "0.2893","346", "0.0396","309", "0.0137","347", "0.0707","348", "0.5578",
    "340", "0.176","318", "0.5072","319", "0.563","316", "0.3952","317", "0.1406","314", "0.0736","315", "0.3403","312", "0.2488","313", "0.3669",
    "310", "0.0064","311", "0.0624","327", "0.7365","328", "0.4378","329", "0.7579","323", "0.0493","324", "0.0","325", "0.0122","326", "0.6667",
    "320", "0.075","321", "0.4195","350", "0.4349","322", "0.1339"};

  public EnFr_CLEF06() {
    super();
    qe = new QueryEngine();
  }

  @Test
  public void runRegression() throws Exception {
    /////// baseline-token

    Configuration conf = RunQueryEngine.parseArgs(new String[] {
        "--xml", "data/en-fr.clef06/run_en-fr.token.xml",
        "--queries_path", "data/en-fr.clef06/queries.en-fr.clef06.xml" });
    FileSystem fs = FileSystem.getLocal(conf);

    //    conf.setBoolean(Constants.Quiet, true);
    qe.init(conf, fs);
    qe.runQueries(conf);

    /////// 1-best

    conf = RunQueryEngine.parseArgs(new String[] {
        "--xml", "data/en-fr.clef06/run_en-fr.1best.xml",
        "--queries_path", "data/en-fr.clef06/queries.en-fr.k1.clef06.xml" });

    qe.init(conf, fs);
    qe.runQueries(conf);

    /////// phrase

    conf = RunQueryEngine.parseArgs(new String[] {
        "--xml", "data/en-fr.clef06/run_en-fr.phrase.xml",
        "--queries_path", "data/en-fr.clef06/queries.en-fr.k10.clef06.xml" });

    qe.init(conf, fs);
    qe.runQueries(conf);

    /////// 10-best

    conf = RunQueryEngine.parseArgs(new String[] {
        "--xml", "data/en-fr.clef06/run_en-fr.10best.xml",
        "--queries_path", "data/en-fr.clef06/queries.en-fr.k10.clef06.xml" });

    qe.init(conf, fs);
    qe.runQueries(conf);

    /////// grid-best

    conf = RunQueryEngine.parseArgs(new String[] {
        "--xml", "data/en-fr.clef06/run_en-fr.gridbest.xml",
        "--queries_path", "data/en-fr.clef06/queries.en-fr.k10.clef06.xml" });

    qe.init(conf, fs);
    qe.runQueries(conf);

    verifyAllResults(qe.getModels(), qe.getAllResults(), qe.getDocnoMapping(),
        new Qrels("data/en-fr.clef06/qrels.en-fr.clef06.txt"));
  }

  public static void verifyAllResults(Set<String> models,
      Map<String, Map<String, Accumulator[]>> results, DocnoMapping mapping, Qrels qrels) {

    Map<String, GroundTruth> g = Maps.newHashMap();

    g.put("en-fr.token_0-0-0-0", new GroundTruth(Metric.AP, 50, baseline_token_p005_c95_Fr_CLEF06_AP, 0.2617f));
    g.put("en-fr.phrase_10-0-0-100", new GroundTruth(Metric.AP, 50, phrase_p005_c95_Fr_CLEF06_AP, 0.2868f));
    g.put("en-fr.1best_1-100-0-100", new GroundTruth(Metric.AP, 50, Onebest_p005_c95_Fr_CLEF06_AP, 0.2829f));
    g.put("en-fr.10best_10-100-0-100", new GroundTruth(Metric.AP, 50, Nbest_p005_c95_Fr_CLEF06_AP, 0.2979f));
    g.put("en-fr.gridbest_10-30-30-100", new GroundTruth(Metric.AP, 50, Gridbest_p005_c95_Fr_CLEF06_AP, 0.3084f));      // nbest=0.3, bitext=0.3 scfg=0.4

    for (String model : models) {
      LOG.info("Verifying results of model \"" + model + "\"");

      g.get(model).verify(results.get(model), mapping, qrels);

      LOG.info("Done!");
    }
  }

  public static junit.framework.Test suite() {
    return new JUnit4TestAdapter(EnFr_CLEF06.class);
  }

  public static void main(String[] args) {
    HMapSFW gridAPMap = array2Map(Gridbest_p005_c95_Fr_CLEF06_AP);
    HMapSFW tenbestAPMap = array2Map(Nbest_p005_c95_Fr_CLEF06_AP);
    HMapSFW onebestAPMap = array2Map(Onebest_p005_c95_Fr_CLEF06_AP);
    HMapSFW phraseAPMap = array2Map(phrase_p005_c95_Fr_CLEF06_AP);
    HMapSFW tokenAPMap = array2Map(baseline_token_p005_c95_Fr_CLEF06_AP);
    System.out.println(countNumberOfImprovedTopics(tokenAPMap, gridAPMap));
    System.out.println(countNumberOfImprovedTopics(tokenAPMap, tenbestAPMap));
    System.out.println(countNumberOfImprovedTopics(tokenAPMap, onebestAPMap));
    System.out.println(countNumberOfImprovedTopics(tokenAPMap, phraseAPMap));
    System.out.println(countNumberOfImprovedTopics(tokenAPMap, tokenAPMap));
    System.out.println(countNumberOfNegligibleTopics(tokenAPMap, gridAPMap));
    System.out.println(countNumberOfNegligibleTopics(tokenAPMap, tenbestAPMap));
    System.out.println(countNumberOfNegligibleTopics(tokenAPMap, onebestAPMap));
    System.out.println(countNumberOfNegligibleTopics(tokenAPMap, phraseAPMap));
    System.out.println(countNumberOfNegligibleTopics(tokenAPMap, tokenAPMap));
  }

  private static int countNumberOfImprovedTopics(HMapSFW tokenAPMap, HMapSFW gridAPMap) {
    int cnt = 0;
    for (String key : tokenAPMap.keySet()) {
      if(gridAPMap.get(key) - tokenAPMap.get(key) > 0.001) {
        cnt++;
      }
    }
    return cnt;
  }

  private static int countNumberOfNegligibleTopics(HMapSFW tokenAPMap, HMapSFW gridAPMap) {
    int cnt = 0;
    for (String key : tokenAPMap.keySet()) {
      float difference = gridAPMap.get(key) - tokenAPMap.get(key);
      if ( difference > -0.001 && difference < 0.001 ) {
        cnt++;
      }
    }
    return cnt;
  }

  private static HMapSFW array2Map(String[] array) {
    HMapSFW map = new HMapSFW();
    for ( int i = 0; i < array.length; i += 2 ) {
      map.put(array[i], Float.parseFloat(array[i+1]));
    }
    return map;
  }
}
TOP

Related Classes of ivory.regression.coling2012.EnFr_CLEF06

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.