Package uk.ac.ucl.panda

Source Code of uk.ac.ucl.panda.Panda

/*
* To change this template, choose Tools | Templates
* and open the template in the editor.
*/

package uk.ac.ucl.panda;

import java.io.BufferedReader;
import java.io.IOException;
import java.util.Properties;
import uk.ac.ucl.panda.indexing.TrecIndex;
import uk.ac.ucl.panda.retrieval.TrecRetrieval;
import uk.ac.ucl.panda.utility.io.*;

/**
*
* @author xxms
*/
public class Panda {

  // Paths
  private final String INDEX = "../Panda/index/";
  private final String DATA  = "../Panda/Data/Documents/";
  private final static String PANDA_ETC  = System.getProperty("panda.etc", "../Panda/etc/");
  private final static String PANDA_VAR  = System.getProperty("panda.var", "../Panda/var/");
  private final static String PANDA_HOME = System.getProperty("panda.home", "../Panda/");
 
  protected String newline = System.getProperty("line.separator");

  protected String fileseparator = System.getProperty("file.separator");

  protected BufferedReader buf = null;
  /** The unkown option */
  protected String unknownOption;

  /** Specifies whether a help message is printed */
  protected boolean printHelp;

  /** Specified whether a version message is printed */
  protected boolean printVersion;

  /** Specifies whether to index a collection */
  protected boolean indexing;

  /**
   * Specifies whether to perform trec_eval like evaluation.
   */
  protected boolean evaluation;

  /**
   * Specifies batch.
   */
  protected boolean batch;

  /**
   * Specifies var and mean.
   */
  protected boolean variance;

  protected boolean plot;
 
 
  protected int algorithm ;

  protected void version() {
    System.out.println("LuceneTrec version: 1.0 alpha");
    // System.out.println("Built on ");
  }

  /**
   * Prints a help message that explains the possible options.
   */
  protected void usage() {
    System.out.println("LuceneTrec version: 1.0 alpha");
    System.out.println("usage: java LuceneTrec [flags in any order]");
    System.out.println("");
    System.out.println("  -h --help    print this message");
    System.out.println("  -V --version   print version information");
    System.out.println("  -i --index     index a collection");
    System.out.println("  -b --batch  retrieve for batch");
    System.out.println("  -e --evaluate  evaluates the results");
    System.out.println("  -v --var   get var and mean for each query");
    System.out.println("           var/results with the specified qrels file");
    System.out.println("           in the file etc/trec.qrels");
    System.out.println("");
    System.out.println("If invoked with \'-i\', then both the direct and");
    System.out
    .println("inverted files are build, unless it is specified which");
    System.out.println("of the structures to build.");
    /*
     * System.out.println("  -d --direct    creates the direct file");
     * System.out.println(
     * "  -v --inverted  creates the inverted file, from an already existing direct"
     * );System.out.println(
     * "  -j --ifile     creates the inverted file, from scratch, single pass"
     * );System.out.println(
     * "  -l --langmodel   creates additional structures for language modelling"
     * ); System.out.println("");System.out.println(
     * "If invoked with \'-r\', there are the following options.");
     * System.out
     * .println("  -c value     parameter value for term frequency normalisation."
     * );System.out.println(
     * "           If it is not specified, then the default value for each");
     * System.out.println(
     * "           weighting model is used, eg PL2 => c=1, BM25 b=> 0.75");
     * System.out.println("  -q --queryexpand applies query expansion");
     * System.out.println("  -l --langmodel   applies language modelling");
     * System.out.println("");System.out.println(
     * "If invoked with \'-e\', there is the following options.");
     * System.out.println(
     * "  -p --perquery  reports the average precision for each query separately."
     * );System.out.println(
     * "  -n --named    evaluates for the named-page finding task.");
     * System.out
     * .println("  filename.res   restrict evaluation to filename.res only."
     * ); System.out.println("");System.out.println(
     * "If invoked with one of the following options, then the contents of the "
     * );System.out.println(
     * "corresponding data structure are shown in the standard output.");
     * System
     * .out.println("  --printdocid   prints the contents of the document index"
     * );System.out.println(
     * "  --printlexicon   prints the contents of the lexicon");
     * System.out.println
     * ("  --printinverted  prints the contents of the inverted file");
     * System
     * .out.println("  --printdirect  prints the contents of the direct file"
     * );System.out.println(
     * "  --printstats   prints statistics about the indexed collection");
     */
  }

  /**
   * set config files
   */

  private static void setproperties(Properties appProp) {
    appProp.setProperty("panda.home", PANDA_HOME);
    appProp.setProperty("panda.var", PANDA_VAR);
    appProp.setProperty("panda.etc", PANDA_ETC);
  }

  /**
   * @param args
   */
  public static void main(String[] args) throws IOException, Exception {
    Properties appProp = new Properties();
    setproperties(appProp);
    try {
      Panda panda = new Panda();
      int status = panda.processOptions(args);
      panda.applyOptions(status, appProp);
    } catch (java.lang.OutOfMemoryError oome) {
      System.err.println(oome);
      oome.printStackTrace();
      System.exit(1);
    }
  }

  /**
   * Processes the command line arguments and sets the corresponding
   * properties accordingly.
   *
   * @param args
   *            the command line arguments.
   * @return int zero if the command line arguments are processed
   *         successfully, otherwise it returns an error code.
   */
  protected int processOptions(String[] args) {
    if (args.length == 0)
      return ERROR_NO_ARGUMENTS;

    int pos = 0;
    while (pos < args.length) {
      if (args[pos].equals("-h") || args[pos].equals("--help"))
        printHelp = true;

      else if (args[pos].equals("-i") || args[pos].equals("--index"))
        indexing = true;

      else if (args[pos].equals("-v") || args[pos].equals("--var"))
        variance = true;

      else if (args[pos].equals("-e") || args[pos].equals("--evaluate")) {
        evaluation = true;
        algorithm = Integer.parseInt(args[++pos]) ;
      } else if (args[pos].equals("-b") || args[pos].equals("--batch")) {
        batch = true;
      } else if (args[pos].equals("-p") || args[pos].equals("--plot")) {
        plot = true;
      }

      else {
        unknownOption = args[pos];
        return ERROR_UNKNOWN_OPTION;
      }
      pos++;
    }

    return ARGUMENTS_OK;
  }

  /**
   * Calls the required classes from Terrier.
   */
  public void run(Properties appProp) throws IOException, Exception {
    if (printVersion) {
      version();
      return;
    }
    if (printHelp) {
      usage();
      return;
    }
    long startTime = System.currentTimeMillis();
    if (batch) {
      buf = FileReader.openFileReader(appProp.getProperty("panda.etc")
          + fileseparator + "IndexDir.config");
      String index = buf.readLine();

      buf = FileReader.openFileReader(appProp.getProperty("panda.etc")
          + fileseparator + "Topics.config");
      String topics = buf.readLine();

      buf = FileReader.openFileReader(appProp.getProperty("panda.etc")
          + fileseparator + "Qrels.config");
      String qrels = buf.readLine();

      // System.out.println(index);
      // System.out.println(topics);
      // System.out.println(qrels);

      TrecRetrieval trecsearch = new TrecRetrieval();
      trecsearch.batch(index, topics, qrels, appProp
          .getProperty("panda.var"));

    }

    else if (evaluation) {
      buf = FileReader.openFileReader(appProp.getProperty("panda.etc")
          + fileseparator + "IndexDir.config");
      String index = buf.readLine();

      buf = FileReader.openFileReader(appProp.getProperty("panda.etc")
          + fileseparator + "Topics.config");
      String topics = buf.readLine();

      buf = FileReader.openFileReader(appProp.getProperty("panda.etc")
          + fileseparator + "Qrels.config");
      String qrels = buf.readLine();

      // System.out.println(index);
      // System.out.println(topics);
      // System.out.println(qrels);

      TrecRetrieval trecsearch = new TrecRetrieval();
      trecsearch.process(index, topics, qrels, appProp
          .getProperty("panda.var"),algorithm);

    } else if (variance) {
      buf = FileReader.openFileReader(appProp.getProperty("panda.etc")
          + fileseparator + "IndexDir.config");
      String index = buf.readLine();

      buf = FileReader.openFileReader(appProp.getProperty("panda.etc")
          + fileseparator + "Topics.config");
      String topics = buf.readLine();

      buf = FileReader.openFileReader(appProp.getProperty("panda.etc")
          + fileseparator + "Qrels.config");
      String qrels = buf.readLine();

      // System.out.println(index);
      // System.out.println(topics);
      // System.out.println(qrels);

      TrecRetrieval trecsearch = new TrecRetrieval();
      trecsearch.process_var(index, topics, qrels, appProp
          .getProperty("panda.var"));

    } else if (variance) {
      buf = FileReader.openFileReader(appProp.getProperty("panda.etc")
          + fileseparator + "IndexDir.config");
      String index = buf.readLine();

      buf = FileReader.openFileReader(appProp.getProperty("panda.etc")
          + fileseparator + "Topics.config");
      String topics = buf.readLine();

      buf = FileReader.openFileReader(appProp.getProperty("panda.etc")
          + fileseparator + "Qrels.config");
      String qrels = buf.readLine();

      // System.out.println(index);
      // System.out.println(topics);
      // System.out.println(qrels);

      TrecRetrieval trecsearch = new TrecRetrieval();
      trecsearch.process_var(index, topics, qrels, appProp
          .getProperty("panda.var"));

    } else if (plot) {
      buf = FileReader.openFileReader(appProp.getProperty("panda.etc")
          + fileseparator + "IndexDir.config");
      String index = buf.readLine();

      buf = FileReader.openFileReader(appProp.getProperty("panda.etc")
          + fileseparator + "Topics.config");
      String topics = buf.readLine();

      buf = FileReader.openFileReader(appProp.getProperty("panda.etc")
          + fileseparator + "Qrels.config");
      String qrels = buf.readLine();

      // System.out.println(index);
      // System.out.println(topics);
      // System.out.println(qrels);

      TrecRetrieval trecsearch = new TrecRetrieval();
      trecsearch.process_plot(index, topics, qrels, appProp
          .getProperty("panda.var"));

    } else if (indexing) {

      buf = FileReader.openFileReader(appProp.getProperty("panda.etc")
          + fileseparator + "IndexDir.config");
     
      buf = FileReader.openFileReader(appProp.getProperty("panda.etc")
          + fileseparator + "DataDir.config");
      TrecIndex trecindex = new TrecIndex();
      // System.out.println(appProp);
      // System.s(0);
      trecindex.pocess(INDEX, DATA, appProp);
    }

    long endTime = System.currentTimeMillis();
    System.err.println("Time elapsed: " + (endTime - startTime) / 1000.0d
        + " seconds.");
  }

  public void applyOptions(int status, Properties appProp)
  throws IOException, Exception {
    switch (status) {
    case ERROR_NO_ARGUMENTS:
      usage();
      break;
    case ERROR_NO_C_VALUE:
      System.err
      .println("A value for the term frequency normalisation parameter");
      System.err
      .println("is required. Please specify it with the option '-c value'");
      break;
    case ERROR_CONFLICTING_ARGUMENTS:
      System.err
      .println("There is a conclict between the specified options. For example,");
      System.err
      .println("option '-c' is used only in conjuction with option '-r'.");
      System.err
      .println("In addition, options '-v' or '-d' are used only in conjuction");
      System.err.println("with option '-i'");
      break;
    case ERROR_DIRECT_FILE_EXISTS:
      System.err
      .println("Trying to build a new direct file, while there exists a previous");

      break;
    case ERROR_DIRECT_FILE_NOT_EXISTS:
      System.err
      .println("Trying to build an inverted file, while there is no direct file.");
      break;
    case ERROR_INVERTED_FILE_EXISTS:
      System.err
      .println("Trying to build a new inverted file, while there exists a previous. Please delete the .if file in the index directory.");
      System.err.println("inverted file.");
      break;
    case ERROR_PRINT_DOCINDEX_FILE_NOT_EXISTS:
      System.err
      .println("The specified document index file does not exist.");
      break;
    case ERROR_PRINT_INVERTED_FILE_NOT_EXISTS:
      System.err.println("The specified inverted index does not exist.");
      break;
    case ERROR_PRINT_DIRECT_FILE_NOT_EXISTS:
      System.err.println("The specified direct index does not exist.");
      break;
    case ERROR_UNKNOWN_OPTION:
      System.err.println("The option '" + unknownOption
          + "' is not recognised");
      break;
    case ERROR_DIRECT_NOT_INDEXING:
      System.err
      .println("The option '-d' or '--direct' can be used only while indexing with option '-i'.");
      break;
    case ERROR_INVERTED_NOT_INDEXING:
      System.err
      .println("The option '-i' or '--inverted' can be used only while indexing with option '-i'.");
      break;
    case ERROR_EXPAND_NOT_RETRIEVE:
      System.err
      .println("The option '-q' or '--queryexpand' can be used only while retrieving with option '-r'.");
      break;
    case ERROR_LANGUAGEMODEL_NOT_RETRIEVE:
      System.err
      .println("The option '-l' or '--langmodel' can be used only while retrieving with option '-r'.");
      break;
    case ERROR_GIVEN_C_NOT_RETRIEVING:
      System.err
      .println("A value for the parameter c can be specified only while retrieving with option '-r'.");
      break;
    case ARGUMENTS_OK:
    default:
      run(appProp);
    }
  }

  protected static final int ARGUMENTS_OK = 0;
  protected static final int ERROR_NO_ARGUMENTS = 1;
  protected static final int ERROR_NO_C_VALUE = 2;
  protected static final int ERROR_CONFLICTING_ARGUMENTS = 3;
  protected static final int ERROR_DIRECT_FILE_EXISTS = 4;
  protected static final int ERROR_INVERTED_FILE_EXISTS = 5;
  protected static final int ERROR_DIRECT_FILE_NOT_EXISTS = 6;
  protected static final int ERROR_PRINT_DOCINDEX_FILE_NOT_EXISTS = 7;
  protected static final int ERROR_PRINT_LEXICON_FILE_NOT_EXISTS = 8;
  protected static final int ERROR_PRINT_INVERTED_FILE_NOT_EXISTS = 9;
  protected static final int ERROR_PRINT_STATS_FILE_NOT_EXISTS = 10;
  protected static final int ERROR_PRINT_DIRECT_FILE_NOT_EXISTS = 11;
  protected static final int ERROR_UNKNOWN_OPTION = 12;
  protected static final int ERROR_DIRECT_NOT_INDEXING = 13;
  protected static final int ERROR_INVERTED_NOT_INDEXING = 14;
  protected static final int ERROR_EXPAND_NOT_RETRIEVE = 15;
  protected static final int ERROR_GIVEN_C_NOT_RETRIEVING = 16;
  protected static final int ERROR_LANGUAGEMODEL_NOT_RETRIEVE = 17;

}
TOP

Related Classes of uk.ac.ucl.panda.Panda

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.