Package ivory.integration.local

Source Code of ivory.integration.local.IntegrationTestBaseCACM

package ivory.integration.local;

import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
import ivory.app.BuildIndex;
import ivory.app.PreprocessCollection;
import ivory.app.PreprocessTrecCollection;
import ivory.core.eval.Qrels;
import ivory.core.eval.RankedListEvaluator;
import ivory.integration.IntegrationUtils;
import ivory.smrf.retrieval.Accumulator;
import ivory.smrf.retrieval.BatchQueryRunner;

import java.util.List;
import java.util.Map;
import java.util.Set;

import org.apache.commons.lang.ArrayUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.log4j.Logger;

import com.google.common.base.Joiner;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;

import edu.umd.cloud9.collection.DocnoMapping;

public abstract class IntegrationTestBaseCACM {
  private static final Logger LOG = Logger.getLogger(IntegrationTestBaseCACM.class);
  private static final Path collectionPath = new Path("data/cacm/cacm-collection.xml.gz");

  public void runBuildIndex(String index, String[] args) throws Exception {
    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.getLocal(conf);

    assertTrue(fs.exists(collectionPath));

    fs.delete(new Path(index), true);

    List<String> jars = Lists.newArrayList();
    jars.add(IntegrationUtils.getJar("lib", "cloud9"));
    jars.add(IntegrationUtils.getJar("lib", "guava-13"));
    jars.add(IntegrationUtils.getJar("lib", "guava-r09"));
    jars.add(IntegrationUtils.getJar("lib", "dsiutils"));
    jars.add(IntegrationUtils.getJar("lib", "fastutil"));
    jars.add(IntegrationUtils.getJar("lib", "jsap"));
    jars.add(IntegrationUtils.getJar("lib", "sux4j"));
    jars.add(IntegrationUtils.getJar("lib", "commons-collections"));
    jars.add(IntegrationUtils.getJar("lib", "kamikaze"));
    jars.add(IntegrationUtils.getJar("dist", "ivory"));

    String libjars = String.format("-libjars=%s", Joiner.on(",").join(jars));

    PreprocessTrecCollection.main(new String[] { libjars,
        IntegrationUtils.D_JT_LOCAL, IntegrationUtils.D_NN_LOCAL,
        "-" + PreprocessCollection.COLLECTION_NAME, "CACM",
        "-" + PreprocessCollection.COLLECTION_PATH, collectionPath.toString(),
        "-" + PreprocessCollection.INDEX_PATH, index });
    BuildIndex.main((String[]) ArrayUtils.addAll(new String[] { libjars,
        IntegrationUtils.D_JT_LOCAL, IntegrationUtils.D_NN_LOCAL }, args));

    // Done with indexing, now do retrieval run.
    String[] params = new String[] {
        "data/cacm/run.cacm.xml",
        "data/cacm/queries.cacm.xml" };

    BatchQueryRunner qr = new BatchQueryRunner(params, fs, index);

    long start = System.currentTimeMillis();
    qr.runQueries();
    long end = System.currentTimeMillis();

    LOG.info("Total query time: " + (end - start) + "ms");

    verifyAllResults(qr.getModels(), qr.getAllResults(), qr.getDocnoMapping(),
        new Qrels("data/cacm/qrels.cacm.txt"));
  }

  static final ImmutableMap<String, Float> DIR_BASE_AP = new ImmutableMap.Builder<String, Float>()
      .put("1", 0.0648f).put("2", 0.6667f).put("3", 0.0992f).put("4", 0.1031f).put("5", 0.0809f)
      .put("6", 0.4111f).put("7", 0.2920f).put("8", 0.2749f).put("9", 0.1370f).put("10", 0.4819f)
      .put("11", 0.4833f).put("12", 0.4430f).put("13", 0.1795f).put("14", 0.0827f).put("15", 0.1509f)
      .put("16", 0.0595f).put("17", 0.1369f).put("18", 0.0711f).put("19", 0.2492f).put("20", 0.7667f)
      .put("21", 0.3003f).put("22", 0.7008f).put("23", 0.1396f).put("24", 0.0953f).put("25", 0.2880f)
      .put("26", 0.4900f).put("27", 0.2274f).put("28", 0.8103f).put("29", 0.2699f).put("30", 0.2476f)
      .put("31", 1.0000f).put("32", 0.6667f).put("33", 0.2000f).put("36", 0.3216f).put("37", 0.1385f)
      .put("38", 0.2331f).put("39", 0.2417f).put("40", 0.2087f).put("42", 0.0691f).put("43", 0.2344f)
      .put("44", 0.1688f).put("45", 0.3740f).put("48", 0.0565f).put("49", 0.3322f).put("57", 1.0000f)
      .put("58", 0.2821f).put("59", 0.2942f).put("60", 0.2203f).put("61", 0.4712f).put("62", 0.0374f)
      .put("63", 0.4123f).put("64", 0.0010f).build();

  static final ImmutableMap<String, Float> DIR_BASE_P10 = new ImmutableMap.Builder<String, Float>()
      .put("1", 0.0000f).put("2", 0.2000f).put("3", 0.1000f).put("4", 0.1000f).put("5", 0.1000f)
      .put("6", 0.3000f).put("7", 0.6000f).put("8", 0.2000f).put("9", 0.2000f).put("10", 0.8000f)
      .put("11", 0.6000f).put("12", 0.2000f).put("13", 0.2000f).put("14", 0.2000f).put("15", 0.2000f)
      .put("16", 0.2000f).put("17", 0.2000f).put("18", 0.1000f).put("19", 0.4000f).put("20", 0.3000f)
      .put("21", 0.2000f).put("22", 0.7000f).put("23", 0.1000f).put("24", 0.1000f).put("25", 0.5000f)
      .put("26", 0.8000f).put("27", 0.6000f).put("28", 0.4000f).put("29", 0.3000f).put("30", 0.2000f)
      .put("31", 0.2000f).put("32", 0.2000f).put("33", 0.1000f).put("36", 0.5000f).put("37", 0.2000f)
      .put("38", 0.3000f).put("39", 0.3000f).put("40", 0.3000f).put("42", 0.0000f).put("43", 0.6000f)
      .put("44", 0.3000f).put("45", 0.5000f).put("48", 0.1000f).put("49", 0.2000f).put("57", 0.1000f)
      .put("58", 0.6000f).put("59", 0.7000f).put("60", 0.3000f).put("61", 0.8000f).put("62", 0.0000f)
      .put("63", 0.4000f).put("64", 0.0000f).build();

  static final ImmutableMap<String, Float> BM25_BASE_AP = new ImmutableMap.Builder<String, Float>()
      .put("1", 0.0986f).put("2", 0.6667f).put("3", 0.1969f).put("4", 0.1147f).put("5", 0.0890f)
      .put("6", 0.3194f).put("7", 0.3278f).put("8", 0.2869f).put("9", 0.1180f).put("10", 0.4399f)
      .put("11", 0.4984f).put("12", 0.4402f).put("13", 0.1058f).put("14", 0.0707f).put("15", 0.1381f)
      .put("16", 0.0823f).put("17", 0.1933f).put("18", 0.0833f).put("19", 0.2759f).put("20", 0.8095f)
      .put("21", 0.2543f).put("22", 0.6604f).put("23", 0.0935f).put("24", 0.0493f).put("25", 0.3234f)
      .put("26", 0.4228f).put("27", 0.2148f).put("28", 0.7782f).put("29", 0.3161f).put("30", 0.2824f)
      .put("31", 0.3750f).put("32", 0.6667f).put("33", 0.0833f).put("36", 0.3687f).put("37", 0.1432f)
      .put("38", 0.1971f).put("39", 0.2048f).put("40", 0.1994f).put("42", 0.0666f).put("43", 0.2365f)
      .put("44", 0.1513f).put("45", 0.3893f).put("48", 0.0219f).put("49", 0.4152f).put("57", 1.0000f)
      .put("58", 0.2093f).put("59", 0.3508f).put("60", 0.2056f).put("61", 0.4356f).put("62", 0.0423f)
      .put("63", 0.3762f).put("64", 0.0000f).build();

  static final ImmutableMap<String, Float> BM25_BASE_P10 = new ImmutableMap.Builder<String, Float>()
      .put("1", 0.2000f).put("2", 0.2000f).put("3", 0.1000f).put("4", 0.1000f).put("5", 0.2000f)
      .put("6", 0.3000f).put("7", 0.6000f).put("8", 0.3000f).put("9", 0.2000f).put("10", 0.6000f)
      .put("11", 0.8000f).put("12", 0.2000f).put("13", 0.2000f).put("14", 0.2000f).put("15", 0.2000f)
      .put("16", 0.2000f).put("17", 0.3000f).put("18", 0.2000f).put("19", 0.4000f).put("20", 0.3000f)
      .put("21", 0.2000f).put("22", 0.7000f).put("23", 0.0000f).put("24", 0.2000f).put("25", 0.6000f)
      .put("26", 0.6000f).put("27", 0.3000f).put("28", 0.4000f).put("29", 0.3000f).put("30", 0.2000f)
      .put("31", 0.2000f).put("32", 0.2000f).put("33", 0.0000f).put("36", 0.4000f).put("37", 0.1000f)
      .put("38", 0.2000f).put("39", 0.3000f).put("40", 0.2000f).put("42", 0.0000f).put("43", 0.6000f)
      .put("44", 0.2000f).put("45", 0.6000f).put("48", 0.0000f).put("49", 0.3000f).put("57", 0.1000f)
      .put("58", 0.4000f).put("59", 0.7000f).put("60", 0.3000f).put("61", 0.7000f).put("62", 0.0000f)
      .put("63", 0.2000f).put("64", 0.0000f).build();

  private static void verifyAllResults(Set<String> models,
      Map<String, Map<String, Accumulator[]>> results, DocnoMapping mapping, Qrels qrels) {
    Map<String, Map<String, Float>> AllModelsAPScores = Maps.newHashMap();
    AllModelsAPScores.put("cacm-dir-base", DIR_BASE_AP);
    AllModelsAPScores.put("cacm-bm25-base", BM25_BASE_AP);

    Map<String, Map<String, Float>> AllModelsP10Scores = Maps.newHashMap();
    AllModelsP10Scores.put("cacm-dir-base", DIR_BASE_P10);
    AllModelsP10Scores.put("cacm-bm25-base", BM25_BASE_P10);
   
    for (String model : models) {
      LOG.info("Verifying results of model \"" + model + "\"");
      verifyResults(model, results.get(model),
          AllModelsAPScores.get(model), AllModelsP10Scores.get(model), mapping, qrels);
      LOG.info("Done!");
    }
  }

  private static void verifyResults(String model, Map<String, Accumulator[]> results,
      Map<String, Float> apScores, Map<String, Float> p10Scores, DocnoMapping mapping,
      Qrels qrels) {
    float apSum = 0, p10Sum = 0;
    for (String qid : results.keySet()) {
      float ap = (float) RankedListEvaluator.computeAP(results.get(qid), mapping,
          qrels.getReldocsForQid(qid));

      float p10 = (float) RankedListEvaluator.computePN(10, results.get(qid), mapping,
          qrels.getReldocsForQid(qid));

      apSum += ap;
      p10Sum += p10;

      LOG.info("verifying qid " + qid + " for model " + model);
      assertEquals(apScores.get(qid), ap, 10e-6);
      assertEquals(p10Scores.get(qid), p10, 10e-6);
    }

    float MAP = (float) RankedListEvaluator.roundTo4SigFigs(apSum / 52f);
    float P10Avg = (float) RankedListEvaluator.roundTo4SigFigs(p10Sum / 52f);

    if (model.equals("cacm-dir-base")) {
      assertEquals(0.3032, MAP, 10e-5);
      assertEquals(0.3038, P10Avg, 10e-5);
    } else if (model.equals("cacm-bm25-base")) {
      assertEquals(0.2863, MAP, 10e-5);
      assertEquals(0.2885, P10Avg, 10e-5);
    }
  }
}
TOP

Related Classes of ivory.integration.local.IntegrationTestBaseCACM

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.