Package ivory.core

Examples of ivory.core.RetrievalEnvironment


      sLogger.setLevel(Level.DEBUG);

      String indexPath = conf.get("Ivory.IndexPath");
      FileSystem fs2  = FileSystem.get(conf);

      RetrievalEnvironment env = new RetrievalEnvironment(indexPath, fs2);

      String transDfFile = conf.get("TransDfFile");
      String eFile = conf.get("Ivory.E_Vocab_E2F");
      String fFile = conf.get("Ivory.F_Vocab_E2F");

      String e2fttableFile = conf.get("Ivory.TTable_E2F");
      String termsFile = env.getIndexTermsData();
      String dfByTermFile = env.getDfByTermData();

      sLogger.debug(e2fttableFile+eFile+termsFile);
      if(!fs2.exists(new Path(fFile)) || !fs2.exists(new Path(eFile)) || !fs2.exists(new Path(e2fttableFile)) || !fs2.exists(new Path(termsFile)) || !fs2.exists(new Path(dfByTermFile))){
        throw new RuntimeException("Error: Translation files do not exist!");
      }
View Full Code Here


    }
    JobConf job = new JobConf(conf, ComputeSignaturesMinhash.class);
//    job.set("mapred.job.tracker", "local");
//    job.set("fs.default.name", "file:///");
    FileSystem fs = FileSystem.get(job);
    RetrievalEnvironment env = new RetrievalEnvironment(dir, fs);
    int vocabSize = (int) env.readCollectionTermCount();

    job.setJobName("ComputeSignatures_minhash");//+"_D="+D+"_"+RetrievalEnvironment.readCollectionName(fs, dir));

    String inputPath = PwsimEnvironment.getFileNameWithPars(dir, "IntDocs");
    String outputPath = PwsimEnvironment.getFileNameWithPars(dir, "SignaturesMinhash");
View Full Code Here

      System.out.println("numBatchFiles: "+numBatchFiles);
      throw new RuntimeException("Parameters not read properly! Quitting...");
    }
    JobConf job = new JobConf(conf, ComputeSignaturesSimhash.class);
    FileSystem fs = FileSystem.get(job);
    RetrievalEnvironment env = new RetrievalEnvironment(dir, fs);
    job.setJobName("ComputeSignatures_simhash"+"_D=64_"+env.readCollectionName());

    String inputPath = PwsimEnvironment.getFileNameWithPars(dir, "TermDocs");
    String outputPath = PwsimEnvironment.getFileNameWithPars(dir, "SignaturesSimhash");
   
    int numMappers = 300;
View Full Code Here

    String targetLangDir = args[0];
    String srcLangDir = args[1];
    config.setInt("Ivory.NumMapTasks", 100);

    RetrievalEnvironment targetEnv = new RetrievalEnvironment(targetLangDir, hdfs);
    RetrievalEnvironment srcEnv = new RetrievalEnvironment(srcLangDir, hdfs);

    config.set("Ivory.CollectionName", targetEnv.readCollectionName()+"_"+srcEnv.readCollectionName());
    config.set("Ivory.IndexPath", targetLangDir);

    // collection size is the sum of the two collections' sizes
    int collSize = targetEnv.readCollectionDocumentCount()+srcEnv.readCollectionDocumentCount();
    config.setInt("Ivory.CollectionDocumentCount", collSize);

    ///////Parameters/////////////
    numOfBits = Integer.parseInt(args[2]);
    signatureType = args[3].toLowerCase();
View Full Code Here

      throw new RuntimeException("Error: Unknown signature type.");
    }
  }
 
  public static String getFileNameWithPars(String dir, String fileName) throws Exception{
    RetrievalEnvironment env = new RetrievalEnvironment(dir, FileSystem.get(new Configuration()));
    if(fileName.equals("TermDocs")){
      return env.getWeightedTermDocVectorsDirectory();
    }else if(fileName.equals("IntDocs")){
      return env.getWeightedIntDocVectorsDirectory();
    }else if(fileName.equals("SampleIntDocs")){
      String s = env.getWeightedIntDocVectorsDirectory();
      return s.substring(0, s.length()-1)+"_sample="+sampleSize;
    }else if(fileName.equals("SampleDocnos")){
      return dir + "/sample-docnos_"+sampleSize;
    }else if(fileName.equals("RandomVectors")){
      return dir + "/randomvectors_D="+numOfBits;
View Full Code Here

    config.set("Ivory.IndexPath", dir);
    config.setInt("Ivory.NumOfBits", numOfBits);
   
    String type = (signatureType.charAt(0)+"").toUpperCase()+signatureType.substring(1, signatureType.length());    //capitalize first character
    RetrievalEnvironment env = new RetrievalEnvironment(dir, fs);
    String collName = env.readCollectionName();
    config.set("Ivory.CollectionName", collName);
   
    PwsimEnvironment.setClassTypes(config);
    int batchSize = -1;
    try {
      if(batchSizeGiven){
        batchSize = Integer.parseInt(args[3]);
        if(batchSize>0){
          int numDocs = env.readCollectionDocumentCount();
          numBatchFiles = numDocs / batchSize;
          if(numDocs % batchSize > 0) numBatchFiles++;
          System.out.println("numBatchFiles: "+numBatchFiles);
          config.setInt("NumBatch", numBatchFiles);
        }
View Full Code Here

    String indexPath = args[0];

    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.get(conf);

    RetrievalEnvironment env = new RetrievalEnvironment(indexPath, fs);

    DfTableArray dfs = new DfTableArray(new Path(env.getDfByIntData()), fs);

    String input = null;
    BufferedReader stdin = new BufferedReader(new InputStreamReader(System.in));
    System.out.print("lookup > ");
    while ((input = stdin.readLine()) != null) {
View Full Code Here

    }
  }

  protected void loadRetrievalEnv() throws ConfigurationException {
    try {
      env = new RetrievalEnvironment(indexPath, fs);
      env.initialize(true);
    } catch (IOException e) {
      throw new ConfigurationException("Failed to instantiate RetrievalEnvironment: "
          + e.getMessage());
    }
View Full Code Here

    String indexPath = "c:/Research/ivory-workspace";

    Configuration conf = new Configuration();
    FileSystem fileSys= FileSystem.getLocal(conf);

    RetrievalEnvironment env = new RetrievalEnvironment(indexPath, fileSys);
   
    Path termsFilePath = new Path(env.getIndexTermsData());
   
    Path dfByTermFilePath = new Path(env.getDfByTermData());
    Path cfByTermFilePath = new Path(env.getCfByTermData());

    System.out.println("PrefixEncodedGlobalStats");
    PrefixEncodedGlobalStats globalStatsMap = new PrefixEncodedGlobalStats(termsFilePath);
    System.out.println("PrefixEncodedGlobalStats1");
    globalStatsMap.loadDFStats(dfByTermFilePath);
View Full Code Here

    if (!fs.exists(p)) {
      sLogger.info("index path doesn't exist, creating...");
      fs.mkdirs(p);
    }

    RetrievalEnvironment env = new RetrievalEnvironment(indexPath, fs);

    // Look for the docno mapping, which maps from docid (String) to docno
    // (sequentially-number integer). If it doesn't exist create it.
    Path mappingFile = env.getDocnoMappingData();
    if (!fs.exists(mappingFile)) {
      sLogger.info(mappingFile + " doesn't exist, creating...");
      String[] arr = new String[] { collection, indexPath + "/medline-docid-tmp",  mappingFile.toString(), new Integer(numMappers).toString() };
      NumberMedlineCitations tool = new NumberMedlineCitations();
      tool.setConf(conf);
View Full Code Here

TOP

Related Classes of ivory.core.RetrievalEnvironment

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.