Package ivory.regression.coling2012

Source Code of ivory.regression.coling2012.EnAr_TREC02

package ivory.regression.coling2012;

import ivory.core.eval.Qrels;
import ivory.regression.GroundTruth;
import ivory.regression.GroundTruth.Metric;
import ivory.smrf.retrieval.Accumulator;
import ivory.sqe.retrieval.Constants;
import ivory.sqe.retrieval.QueryEngine;
import ivory.sqe.retrieval.RunQueryEngine;

import java.util.Map;
import java.util.Set;

import junit.framework.JUnit4TestAdapter;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.log4j.Logger;
import org.junit.Test;

import com.google.common.collect.Maps;

import edu.umd.cloud9.collection.DocnoMapping;
import edu.umd.cloud9.io.map.HMapSFW;

public class EnAr_TREC02 {
  private static final Logger LOG = Logger.getLogger(EnAr_TREC02.class);
  private QueryEngine qe;

  private static String[] baseline_token_p005_c95_Ar_TREC02_AP = new String[] {
    "35", "0.1661","36", "0.0022","33", "0.0198","34", "0.0058","39", "0.324","37", "0.3725","38", "0.0069","43", "0.0","42", "0.4007",
    "41", "0.3234","40", "0.5038","67", "0.5584","66", "0.2805","69", "0.5403","68", "0.2987","26", "0.0037","27", "0.2965","28", "0.0021",
    "29", "0.5276","30", "0.7243","32", "0.3655","31", "0.0419","70", "0.5104","71", "0.2268","72", "0.0273","73", "0.248","74", "0.0159",
    "75", "0.1714","59", "0.7359","58", "0.1171","57", "0.0971","56", "0.3575","55", "0.6105","64", "6.0E-4","65", "0.5153","62", "0.0509",
    "63", "0.0779","60", "0.7074","61", "0.0884","49", "0.7931","48", "0.7132","45", "0.073","44", "0.1707","47", "0.3368","46", "0.2871",
    "51", "0.0186","52", "0.0617","53", "0.0038","54", "0.041","50", "0.7398"};

  private static String[] phrase_p005_c95_Ar_TREC02_AP = new String[] {
    "35", "0.1855","36", "0.0027","33", "0.0237","34", "0.0052","39", "0.3555","37", "0.3856","38", "0.0094","43", "0.0","42", "0.4135",
    "41", "0.422","40", "0.5109","67", "0.6494","66", "0.3678","69", "0.5601","68", "0.2751","26", "0.0104","27", "0.3312","28", "0.0080",
    "29", "0.5418","30", "0.7043","32", "0.8558","31", "0.0607","70", "0.5446","71", "0.2204","72", "0.0289","73", "0.2758","74", "0.0788",
    "75", "0.1316","59", "0.7402","58", "0.147","57", "0.1396","56", "0.4536","55", "0.6735","64", "6.0E-4","65", "0.5912","62", "0.0516",
    "63", "0.1069","60", "0.7384","61", "0.0841","49", "0.7517","48", "0.7093","45", "0.3406","44", "0.1845","47", "0.341","46", "0.321",
    "51", "0.064","52", "0.1015","53", "0.0043","54", "0.0484","50", "0.7407"};

  private static String[] Nbest_p005_c95_Ar_TREC02_AP = new String[] {
    "35", "0.1653","36", "0.0032","33", "0.0202","34", "0.0036","39", "0.3121","37", "0.2503","38", "0.0052","43", "0.0214","42", "0.4007",
    "41", "0.0249","40", "0.1235","67", "0.5854","66", "0.1554","69", "0.526","68", "0.1657","26", "0.0157","27", "0.2883","28", "0.0030",
    "29", "0.5233","30", "0.6088","32", "0.0635","31", "0.0743","70", "0.474","71", "0.089","72", "0.0013","73", "0.2016","74", "0.0",
    "75", "0.1259","59", "0.7296","58", "0.0994","57", "0.1145","56", "0.3448","55", "0.6379","64", "6.0E-4","65", "0.4769","62", "0.0723",
    "63", "0.0757","60", "0.7307","61", "0.0704","49", "0.7039","48", "0.6716","45", "0.5263","44", "0.1795","47", "0.3353","46", "0.3252",
    "51", "0.0826","52", "0.0832","53", "0.0022","54", "0.0345","50", "0.6239"};

  private static String[] Onebest_p005_c95_Ar_TREC02_AP = new String[] {
    "35", "0.1442","36", "0.0032","33", "0.0229","34", "0.0036","39", "0.2904","37", "0.2199","38", "0.0052","43", "0.0214","42", "0.2194",
    "41", "0.0649","40", "0.4814","67", "0.5826","66", "0.3417","69", "0.526","68", "0.1358","26", "0.0176","27", "0.2883","28", "0.0031",
    "29", "0.5233","30", "0.6088","32", "0.0484","31", "0.0743","70", "0.474","71", "0.089","72", "0.0010","73", "0.1375","74", "1.0E-4",
    "75", "0.1259","59", "0.7296","58", "0.1009","57", "0.1232","56", "0.2683","55", "0.6427","64", "6.0E-4","65", "0.5629","62", "0.0541",
    "63", "0.0757","60", "0.7306","61", "0.0704","49", "0.7039","48", "0.6716","45", "0.5263","44", "0.1795","47", "0.3341","46", "0.3357",
    "51", "0.0824","52", "0.0832","53", "0.0021","54", "0.0339","50", "0.6018"};

  private static String[] Gridbest_p005_c95_Ar_TREC02_AP = phrase_p005_c95_Ar_TREC02_AP;

  public EnAr_TREC02() {
    super();
    qe = new QueryEngine();
  }

  @Test
  public void runRegression() throws Exception {
    /////// baseline-token

    Configuration conf = RunQueryEngine.parseArgs(new String[] {
        "--xml", "data/en-ar.trec02/run_en-ar.token.xml",
        "--queries_path", "data/en-ar.trec02/queries.en-ar.trec02.xml" });
    FileSystem fs = FileSystem.getLocal(conf);

    conf.setBoolean(Constants.Quiet, true);
    qe.init(conf, fs);
    qe.runQueries(conf);

    /////// 1-best

    conf = RunQueryEngine.parseArgs(new String[] {
        "--xml", "data/en-ar.trec02/run_en-ar.1best.xml",
        "--queries_path", "data/en-ar.trec02/queries.en-ar.k1.trec02.xml" });

    qe.init(conf, fs);
    qe.runQueries(conf);

    /////// phrase

    conf = RunQueryEngine.parseArgs(new String[] {
        "--xml", "data/en-ar.trec02/run_en-ar.phrase.xml",
        "--queries_path", "data/en-ar.trec02/queries.en-ar.k10.trec02.xml" });

    qe.init(conf, fs);
    qe.runQueries(conf);

    /////// 10-best

    conf = RunQueryEngine.parseArgs(new String[] {
        "--xml", "data/en-ar.trec02/run_en-ar.10best.xml",
        "--queries_path", "data/en-ar.trec02/queries.en-ar.k10.trec02.xml" });

    qe.init(conf, fs);
    qe.runQueries(conf);

    /////// grid-best

    conf = RunQueryEngine.parseArgs(new String[] {
        "--xml", "data/en-ar.trec02/run_en-ar.gridbest.xml",
        "--queries_path", "data/en-ar.trec02/queries.en-ar.k10.trec02.xml" });

    qe.init(conf, fs);
    qe.runQueries(conf);

    verifyAllResults(qe.getModels(), qe.getAllResults(), qe.getDocnoMapping(),
        new Qrels("data/en-ar.trec02/qrels.en-ar.trec02.txt"));
  }

  public static void verifyAllResults(Set<String> models,
      Map<String, Map<String, Accumulator[]>> results, DocnoMapping mapping, Qrels qrels) {

    Map<String, GroundTruth> g = Maps.newHashMap();

    g.put("en-ar.token_0-0-0-0", new GroundTruth(Metric.AP, 50, baseline_token_p005_c95_Ar_TREC02_AP, 0.2712f));
    g.put("en-ar.phrase_10-0-0-100", new GroundTruth(Metric.AP, 50, phrase_p005_c95_Ar_TREC02_AP, 0.3058f));
    g.put("en-ar.1best_1-100-0-100", new GroundTruth(Metric.AP, 50, Onebest_p005_c95_Ar_TREC02_AP, 0.2473f));
    g.put("en-ar.10best_10-100-0-100", new GroundTruth(Metric.AP, 50, Nbest_p005_c95_Ar_TREC02_AP, 0.2431f));
    g.put("en-ar.gridbest_10-0-0-100", new GroundTruth(Metric.AP, 50, Gridbest_p005_c95_Ar_TREC02_AP, 0.3058f));   // scfg=1.0

    for (String model : models) {
      LOG.info("Verifying results of model \"" + model + "\"");

      g.get(model).verify(results.get(model), mapping, qrels);

      LOG.info("Done!");
    }
  }

  public static junit.framework.Test suite() {
    return new JUnit4TestAdapter(EnAr_TREC02.class);
  }
 
  public static void main(String[] args) {
    HMapSFW gridAPMap = array2Map(Gridbest_p005_c95_Ar_TREC02_AP);
    HMapSFW tenbestAPMap = array2Map(Nbest_p005_c95_Ar_TREC02_AP);
    HMapSFW onebestAPMap = array2Map(Onebest_p005_c95_Ar_TREC02_AP);
    HMapSFW phraseAPMap = array2Map(phrase_p005_c95_Ar_TREC02_AP);
    HMapSFW tokenAPMap = array2Map(baseline_token_p005_c95_Ar_TREC02_AP);
    System.out.println(countNumberOfImprovedTopics(tokenAPMap, gridAPMap));
    System.out.println(countNumberOfImprovedTopics(tokenAPMap, tenbestAPMap));
    System.out.println(countNumberOfImprovedTopics(tokenAPMap, onebestAPMap));
    System.out.println(countNumberOfImprovedTopics(tokenAPMap, phraseAPMap));
    System.out.println(countNumberOfImprovedTopics(tokenAPMap, tokenAPMap));
    System.out.println(countNumberOfNegligibleTopics(tokenAPMap, gridAPMap));
    System.out.println(countNumberOfNegligibleTopics(tokenAPMap, tenbestAPMap));
    System.out.println(countNumberOfNegligibleTopics(tokenAPMap, onebestAPMap));
    System.out.println(countNumberOfNegligibleTopics(tokenAPMap, phraseAPMap));
    System.out.println(countNumberOfNegligibleTopics(tokenAPMap, tokenAPMap));
  }

  private static int countNumberOfImprovedTopics(HMapSFW tokenAPMap, HMapSFW gridAPMap) {
    int cnt = 0;
    for (String key : tokenAPMap.keySet()) {
      float difference = gridAPMap.get(key) - tokenAPMap.get(key);
      if ( difference > 0.001 ) {
        cnt++;
      }
    }
    return cnt;
  }

  private static int countNumberOfNegligibleTopics(HMapSFW tokenAPMap, HMapSFW gridAPMap) {
    int cnt = 0;
    for (String key : tokenAPMap.keySet()) {
      float difference = gridAPMap.get(key) - tokenAPMap.get(key);
      if ( difference > -0.001 && difference < 0.001 ) {
        cnt++;
      }
    }
    return cnt;
  }
 
  private static HMapSFW array2Map(String[] array) {
    HMapSFW map = new HMapSFW();
    for ( int i = 0; i < array.length; i += 2 ) {
      map.put(array[i], Float.parseFloat(array[i+1]));
    }
    return map;
  }

}
TOP

Related Classes of ivory.regression.coling2012.EnAr_TREC02

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.