Package ivory.integration.local

Source Code of ivory.integration.local.IntegrationTestBaseCACM

package ivory.integration.local;

import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
import ivory.app.PreprocessCollection;
import ivory.core.eval.Qrels;
import ivory.core.eval.RankedListEvaluator;
import ivory.integration.IntegrationUtils;
import ivory.smrf.retrieval.Accumulator;
import ivory.smrf.retrieval.BatchQueryRunner;

import java.util.List;
import java.util.Map;
import java.util.Set;

import org.apache.commons.lang.ArrayUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.log4j.Logger;

import com.google.common.base.Joiner;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;

import edu.umd.cloud9.collection.DocnoMapping;

public abstract class IntegrationTestBaseCACM {
  private static final Logger LOG = Logger.getLogger(IntegrationTestBaseCACM.class);
  private static final Path collectionPath = new Path("data/cacm/cacm-collection.xml.gz");

  public void runBuildIndex(String index, String[] args) throws Exception {
    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.getLocal(conf);

    assertTrue(fs.exists(collectionPath));

    fs.delete(new Path(index), true);

    List<String> jars = Lists.newArrayList();
    jars.add(IntegrationUtils.getJar("lib", "cloud9"));
    jars.add(IntegrationUtils.getJar("lib", "guava"));
    jars.add(IntegrationUtils.getJar("lib", "dsiutils"));
    jars.add(IntegrationUtils.getJar("lib", "fastutil"));
    jars.add(IntegrationUtils.getJar("lib", "jsap"));
    jars.add(IntegrationUtils.getJar("lib", "sux4j"));
    jars.add(IntegrationUtils.getJar("lib", "commons-collections"));
    jars.add(IntegrationUtils.getJar("lib", "kamikaze"));

    String libjars = String.format("-libjars=%s", Joiner.on(",").join(jars));

    String[] cmdArgs = new String[] { "hadoop jar", IntegrationUtils.getJar("dist", "ivory"),
        ivory.app.PreprocessTrecCollection.class.getCanonicalName(),
        IntegrationUtils.LOCAL_ARGS, libjars,
        "-" + PreprocessCollection.COLLECTION_NAME, "CACM",
        "-" + PreprocessCollection.COLLECTION_PATH, collectionPath.toString(),
        "-" + PreprocessCollection.INDEX_PATH, index };

    IntegrationUtils.exec(Joiner.on(" ").join(cmdArgs));

    cmdArgs = (String[]) ArrayUtils.addAll(new String[] {
        "hadoop jar", IntegrationUtils.getJar("dist", "ivory"),
        ivory.app.BuildIndex.class.getCanonicalName(), IntegrationUtils.LOCAL_ARGS, libjars}, args);

    IntegrationUtils.exec(Joiner.on(" ").join(cmdArgs));

    // Done with indexing, now do retrieval run.
    String[] params = new String[] {
        "data/cacm/run.cacm.xml",
        "data/cacm/queries.cacm.xml" };

    BatchQueryRunner qr = new BatchQueryRunner(params, fs, index);

    long start = System.currentTimeMillis();
    qr.runQueries();
    long end = System.currentTimeMillis();

    LOG.info("Total query time: " + (end - start) + "ms");

    verifyAllResults(qr.getModels(), qr.getAllResults(), qr.getDocnoMapping(),
        new Qrels("data/cacm/qrels.cacm.txt"));
  }

  static final ImmutableMap<String, Float> DIR_BASE_AP = new ImmutableMap.Builder<String, Float>()
      .put("1"0.0648f).put("2"0.6667f).put("3"0.0992f).put("4"0.1031f).put("5"0.0873f)
      .put("6"0.4111f).put("7"0.2920f).put("8"0.2749f).put("9"0.1370f).put("10", 0.5866f)
      .put("11", 0.4833f).put("12", 0.4430f).put("13", 0.1831f).put("14", 0.1411f).put("15", 0.1509f)
      .put("16", 0.0595f).put("17", 0.1441f).put("18", 0.0711f).put("19", 0.3650f).put("20", 0.7667f)
      .put("21", 0.3003f).put("22", 0.7008f).put("23", 0.1396f).put("24", 0.1293f).put("25", 0.2931f)
      .put("26", 0.4900f).put("27", 0.2274f).put("28", 0.8103f).put("29", 0.3927f).put("30", 0.2476f)
      .put("31", 1.0000f).put("32", 0.6746f).put("33", 0.2000f).put("36", 0.3216f).put("37", 0.1385f)
      .put("38", 0.2331f).put("39", 0.2417f).put("40", 0.2087f).put("42", 0.0708f).put("43", 0.2513f)
      .put("44", 0.1688f).put("45", 0.3740f).put("48", 0.0595f).put("49", 0.3322f).put("57", 1.0000f)
      .put("58", 0.2976f).put("59", 0.3167f).put("60", 0.2203f).put("61", 0.5400f).put("62", 0.0545f)
      .put("63", 0.5209f).put("64", 0.0010f).build();

  static final ImmutableMap<String, Float> DIR_BASE_P10 = new ImmutableMap.Builder<String, Float>()
      .put("1"0.0000f).put("2"0.2000f).put("3"0.1000f).put("4"0.1000f).put("5"0.1000f)
      .put("6"0.3000f).put("7"0.6000f).put("8"0.2000f).put("9"0.2000f).put("10", 0.9000f)
      .put("11", 0.6000f).put("12", 0.2000f).put("13", 0.2000f).put("14", 0.2000f).put("15", 0.2000f)
      .put("16", 0.2000f).put("17", 0.2000f).put("18", 0.1000f).put("19", 0.5000f).put("20", 0.3000f)
      .put("21", 0.2000f).put("22", 0.7000f).put("23", 0.1000f).put("24", 0.2000f).put("25", 0.5000f)
      .put("26", 0.8000f).put("27", 0.6000f).put("28", 0.4000f).put("29", 0.3000f).put("30", 0.2000f)
      .put("31", 0.2000f).put("32", 0.2000f).put("33", 0.1000f).put("36", 0.5000f).put("37", 0.2000f)
      .put("38", 0.3000f).put("39", 0.3000f).put("40", 0.3000f).put("42", 0.0000f).put("43", 0.6000f)
      .put("44", 0.3000f).put("45", 0.5000f).put("48", 0.1000f).put("49", 0.2000f).put("57", 0.1000f)
      .put("58", 0.6000f).put("59", 0.7000f).put("60", 0.3000f).put("61", 0.9000f).put("62", 0.0000f)
      .put("63", 0.5000f).put("64", 0.0000f).build();

  static final ImmutableMap<String, Float> BM25_BASE_AP = new ImmutableMap.Builder<String, Float>()
      .put("1"0.0986f).put("2"0.6667f).put("3"0.1969f).put("4"0.1147f).put("5"0.0908f)
      .put("6"0.3194f).put("7"0.3278f).put("8"0.2869f).put("9"0.1180f).put("10", 0.5212f)
      .put("11", 0.4984f).put("12", 0.4402f).put("13", 0.1094f).put("14", 0.1305f).put("15", 0.1381f)
      .put("16", 0.0823f).put("17", 0.2006f).put("18", 0.0833f).put("19", 0.4212f).put("20", 0.8095f)
      .put("21", 0.2543f).put("22", 0.6604f).put("23", 0.0935f).put("24", 0.0794f).put("25", 0.3275f)
      .put("26", 0.4228f).put("27", 0.2148f).put("28", 0.7782f).put("29", 0.3509f).put("30", 0.2824f)
      .put("31", 0.3750f).put("32", 0.6720f).put("33", 0.0833f).put("36", 0.3687f).put("37", 0.1432f)
      .put("38", 0.1971f).put("39", 0.2048f).put("40", 0.1994f).put("42", 0.0683f).put("43", 0.2536f)
      .put("44", 0.1513f).put("45", 0.3893f).put("48", 0.0235f).put("49", 0.4152f).put("57", 1.0000f)
      .put("58", 0.2153f).put("59", 0.3775f).put("60", 0.2056f).put("61", 0.4790f).put("62", 0.0579f)
      .put("63", 0.4338f).put("64", 0.0000f).build();

  static final ImmutableMap<String, Float> BM25_BASE_P10 = new ImmutableMap.Builder<String, Float>()
      .put("1"0.2000f).put("2"0.2000f).put("3"0.1000f).put("4"0.1000f).put("5"0.2000f)
      .put("6"0.3000f).put("7"0.6000f).put("8"0.3000f).put("9"0.2000f).put("10", 0.6000f)
      .put("11", 0.8000f).put("12", 0.2000f).put("13", 0.2000f).put("14", 0.2000f).put("15", 0.2000f)
      .put("16", 0.2000f).put("17", 0.3000f).put("18", 0.2000f).put("19", 0.5000f).put("20", 0.3000f)
      .put("21", 0.2000f).put("22", 0.7000f).put("23", 0.0000f).put("24", 0.3000f).put("25", 0.6000f)
      .put("26", 0.6000f).put("27", 0.3000f).put("28", 0.4000f).put("29", 0.3000f).put("30", 0.2000f)
      .put("31", 0.2000f).put("32", 0.2000f).put("33", 0.0000f).put("36", 0.4000f).put("37", 0.1000f)
      .put("38", 0.2000f).put("39", 0.3000f).put("40", 0.2000f).put("42", 0.0000f).put("43", 0.6000f)
      .put("44", 0.2000f).put("45", 0.6000f).put("48", 0.0000f).put("49", 0.3000f).put("57", 0.1000f)
      .put("58", 0.4000f).put("59", 0.7000f).put("60", 0.3000f).put("61", 0.7000f).put("62", 0.0000f)
      .put("63", 0.2000f).put("64", 0.0000f).build();

  private static void verifyAllResults(Set<String> models,
      Map<String, Map<String, Accumulator[]>> results, DocnoMapping mapping, Qrels qrels) {
    Map<String, Map<String, Float>> AllModelsAPScores = Maps.newHashMap();
    AllModelsAPScores.put("cacm-dir-base", DIR_BASE_AP);
    AllModelsAPScores.put("cacm-bm25-base", BM25_BASE_AP);

    Map<String, Map<String, Float>> AllModelsP10Scores = Maps.newHashMap();
    AllModelsP10Scores.put("cacm-dir-base", DIR_BASE_P10);
    AllModelsP10Scores.put("cacm-bm25-base", BM25_BASE_P10);
   
    for (String model : models) {
      LOG.info("Verifying results of model \"" + model + "\"");
      verifyResults(model, results.get(model),
          AllModelsAPScores.get(model), AllModelsP10Scores.get(model), mapping, qrels);
      LOG.info("Done!");
    }
  }

  private static void verifyResults(String model, Map<String, Accumulator[]> results,
      Map<String, Float> apScores, Map<String, Float> p10Scores, DocnoMapping mapping,
      Qrels qrels) {
    float apSum = 0, p10Sum = 0;
    for (String qid : results.keySet()) {
      float ap = (float) RankedListEvaluator.computeAP(results.get(qid), mapping,
          qrels.getReldocsForQid(qid));

      float p10 = (float) RankedListEvaluator.computePN(10, results.get(qid), mapping,
          qrels.getReldocsForQid(qid));

      apSum += ap;
      p10Sum += p10;

      LOG.info("verifying qid " + qid + " for model " + model);

      assertEquals(apScores.get(qid), ap, 10e-6);
      assertEquals(p10Scores.get(qid), p10, 10e-6);
    }

    float MAP = (float) RankedListEvaluator.roundTo4SigFigs(apSum / 52f);
    float P10Avg = (float) RankedListEvaluator.roundTo4SigFigs(p10Sum / 52f);

    if (model.equals("cacm-dir-base")) {
      assertEquals(0.3171, MAP, 10e-5);
      assertEquals(0.3135, P10Avg, 10e-5);
    } else if (model.equals("cacm-bm25-base")) {
      assertEquals(0.2968, MAP, 10e-5);
      assertEquals(0.2923, P10Avg, 10e-5);
    }
  }
}
TOP

Related Classes of ivory.integration.local.IntegrationTestBaseCACM

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.