Package ivory.core

Examples of ivory.core.RetrievalEnvironment


      mDocMapping = new WikipediaDocnoMapping();
      FileSystem fs;
      try {
        fs = FileSystem.get(job);
        String indexPath = job.get("Ivory.IndexPath");
        RetrievalEnvironment env = new RetrievalEnvironment(indexPath, fs);

        Path mappingFile = env.getDocnoMappingData();
        mDocMapping.loadMapping(mappingFile, fs);
      } catch (IOException e) {
        e.printStackTrace();
      }
View Full Code Here


      mDocMapping = new WikipediaDocnoMapping();
      FileSystem fs;
      try {
        fs = FileSystem.get(job);
        String indexPath = job.get("Ivory.IndexPath");
        RetrievalEnvironment env = new RetrievalEnvironment(indexPath, fs);

        Path mappingFile = env.getDocnoMappingData();
        mDocMapping.loadMapping(mappingFile, fs);
      } catch (IOException e) {
        e.printStackTrace();
      }
    }
View Full Code Here

        docMapping =
          (DocnoMapping) Class.forName(conf.get(Constants.DocnoMappingClass)).newInstance();

        // Take a different code path if we're in standalone mode.
        if (conf.get("mapred.job.tracker").equals("local")) {
          RetrievalEnvironment env = new RetrievalEnvironment(
              context.getConfiguration().get(Constants.IndexPath), localFs);
          docMapping.loadMapping(env.getDocnoMappingData(), localFs);
        } else {
          Path[] localFiles = DistributedCache.getLocalCacheFiles(conf);
          // Load the docid to docno mappings. Assume file 0.
          docMapping.loadMapping(localFiles[0], localFs);
        }
View Full Code Here

      mDocMapping = new WikipediaDocnoMapping();
      FileSystem fs;
      try {
        fs = FileSystem.get(job);
        String indexPath = job.get("Ivory.IndexPath");
        RetrievalEnvironment env = new RetrievalEnvironment(indexPath, fs);

        Path mappingFile = env.getDocnoMappingData();
        mDocMapping.loadMapping(mappingFile, fs);
      } catch (IOException e) {
        e.printStackTrace();
      }
View Full Code Here

    D = getConf().getInt("Ivory.NumOfBits", -1);
    String indexPath = getConf().get("Ivory.IndexPath");

    JobConf job = new JobConf(getConf(), WriteRandomVectors.class);
    FileSystem fs = FileSystem.get(job);
    RetrievalEnvironment env = new RetrievalEnvironment(indexPath, fs);
    K = (int) env.readCollectionTermCount();
    job.setJobName("WriteRandomVectors");

    if (D <= 0 || K <= 0) {
      throw new RuntimeException("parameters not read properly");
    }
View Full Code Here

      throw new RuntimeException("Parameters not read properly! Quitting...");
    }
    JobConf job = new JobConf(getConf(), ComputeSignaturesRandom.class);
    FileSystem fs = FileSystem.get(job);

    RetrievalEnvironment re = new RetrievalEnvironment(dir, fs);
    job.setJobName("ComputeSignatures_random_D=" + D + ":" + re.readCollectionName());

    String inputPath = PwsimEnvironment.getIntDocvectorsFile(dir, fs);
    String outputPath = PwsimEnvironment.getSignaturesDir(dir, D, "random");
    String randomVectorFile = PwsimEnvironment.getRandomVectorsDir(dir, D) + "/part-00000";
View Full Code Here

    String fCollectionPath = args[3];

    String eDir = args[4];
    String fDir = args[5];

    RetrievalEnvironment eEnv = new RetrievalEnvironment(eDir, FileSystem.get(conf));

    String vocabDir = args[6];
    String eLang = args[7];
    String fLang = args[8];
    String classifierFile = args[9];

    float classifierThreshold = Float.parseFloat(args[10]);
    int classifierId = Integer.parseInt(args[11]);

    conf.setJobName("FindParallelSentences_" + fLang +"-" + eLang +"_F1="+classifierThreshold+"["+classifierId+"]");

    String eSentDetect = vocabDir+"/"+eLang+"-sent.bin";
    String eTokenizer = vocabDir+"/"+eLang+"-token.bin";
    String eVocabSrc = vocabDir+"/vocab."+eLang+"-"+fLang+"."+eLang;
    String eVocabTrg = vocabDir+"/vocab."+fLang+"-"+eLang+"."+eLang;

    String fSentDetect = vocabDir+"/"+fLang+"-sent.bin";
    String fTokenizer = vocabDir+"/"+fLang+"-token.bin";
    String fVocabSrc = vocabDir+"/vocab."+fLang+"-"+eLang+"."+fLang;
    String fVocabTrg = vocabDir+"/vocab."+eLang+"-"+fLang+"."+fLang;

    String f2e_ttableFile = vocabDir+"/ttable."+fLang+"-"+eLang;
    String e2f_ttableFile = vocabDir+"/ttable."+eLang+"-"+fLang;

    int numReducers = 50;

    conf.set("eDir", eDir);
    conf.set("fDir", fDir);
    conf.set("eLang", eLang);
    conf.set("fLang", fLang);
    conf.setInt("NumReducers", numReducers);
    conf.setFloat("ClassifierThreshold", classifierThreshold);
    conf.setInt("ClassifierId", classifierId);

    sLogger.info("caching files...");

    //e-files

    sLogger.info("caching files...0,1,2,3,4");

    DistributedCache.addCacheFile(new URI(eEnv.getDfByTermData()), conf);
    DistributedCache.addCacheFile(new URI(eSentDetect), conf);
    DistributedCache.addCacheFile(new URI(eTokenizer), conf);
    DistributedCache.addCacheFile(new URI(eVocabSrc), conf);
    DistributedCache.addCacheFile(new URI(eVocabTrg), conf);

    //f-files
   
    sLogger.info("caching files...5,6,7,8,9");

    DistributedCache.addCacheFile(new URI(fDir+"/transDf.dat"), conf);
    DistributedCache.addCacheFile(new URI(fSentDetect), conf);
    DistributedCache.addCacheFile(new URI(fTokenizer), conf);
    DistributedCache.addCacheFile(new URI(fVocabSrc), conf);
    DistributedCache.addCacheFile(new URI(fVocabTrg), conf);

    /////cross-lang files

    sLogger.info("caching files...10,11,12,13,14");

    DistributedCache.addCacheFile(new URI(f2e_ttableFile), conf);
    DistributedCache.addCacheFile(new URI(e2f_ttableFile), conf);
    DistributedCache.addCacheFile(new URI(eEnv.getIndexTermsData()), conf);
    DistributedCache.addCacheFile(new URI(classifierFile), conf);
    DistributedCache.addCacheFile(new URI(pwsimPairsPath), conf);

    FileInputFormat.addInputPaths(conf, eCollectionPath);
    FileInputFormat.addInputPaths(conf, fCollectionPath);
View Full Code Here

          // Explicitly not support local mode.
          throw new RuntimeException("Local mode not supported!");
        }

        FileSystem remoteFS = FileSystem.get(conf);
        RetrievalEnvironment targetEnv = new RetrievalEnvironment(conf.get(Constants.TargetIndexPath), remoteFS);

        termsFile = getFilename(targetEnv.getIndexTermsData());
        termidsFile = getFilename(targetEnv.getIndexTermIdsData());
        idToTermFile = getFilename(targetEnv.getIndexTermIdMappingData());
        dfFile = getFilename(targetEnv.getDfByIntData());

        FileSystem fs = FileSystem.getLocal(conf);
        Map<String, Path> pathMapping = Maps.newHashMap();

        // We need to figure out which file in the DistributeCache is which...
View Full Code Here

      sLogger.setLevel(Level.DEBUG);

      String indexPath = conf.get(Constants.IndexPath);
      FileSystem fs2  = FileSystem.get(conf);

      RetrievalEnvironment env = new RetrievalEnvironment(indexPath, fs2);

      String transDfFile = conf.get("TransDfFile");
      String eFile = conf.get("Ivory.E_Vocab_E2F");
      String fFile = conf.get("Ivory.F_Vocab_E2F");

      String e2fttableFile = conf.get("Ivory.TTable_E2F");
      String termsFile = env.getIndexTermsData();
      String dfByIntFile = env.getDfByIntData();

      if(!fs2.exists(new Path(fFile)) || !fs2.exists(new Path(eFile)) || !fs2.exists(new Path(e2fttableFile)) || !fs2.exists(new Path(termsFile)) || !fs2.exists(new Path(dfByIntFile))){
        throw new RuntimeException("Error: Translation files do not exist!");
      }

      Vocab eVocab_e2f = null, fVocab_e2f = null;
      TTable_monolithic_IFAs en2DeProbs = null;
      try {
        eVocab_e2f = HadoopAlign.loadVocab(new Path(eFile), conf);
        fVocab_e2f = HadoopAlign.loadVocab(new Path(fFile), conf);

        en2DeProbs = new TTable_monolithic_IFAs(fs2, new Path(e2fttableFile), true);
      } catch (IOException e) {
        e.printStackTrace();
     

      DefaultFrequencySortedDictionary dict = new DefaultFrequencySortedDictionary(new Path(env.getIndexTermsData()), new Path(env.getIndexTermIdsData()), new Path(env.getIndexTermIdMappingData()), fs2);
      DfTableArray dfTable = new DfTableArray(new Path(dfByIntFile), fs2);

      HMapIFW transDfTable = CLIRUtils.translateDFTable(eVocab_e2f, fVocab_e2f, en2DeProbs, dict, dfTable);

      SequenceFile.Writer writer = SequenceFile.createWriter(fs2, conf, new Path(transDfFile), IntWritable.class, FloatWritable.class);
View Full Code Here

    // if sample docnos path provided and frequency not provided
    if (sampleDocnosFile != null && fs.exists(new Path(sampleDocnosFile))) {
      job.set("Ivory.SampleFile", sampleDocnosFile);
      DistributedCache.addCacheFile(new URI(sampleDocnosFile), job);
    } else if (sampleSize != -1) {
      RetrievalEnvironment env = new RetrievalEnvironment(workDir, fs);
      int collectionSize = env.readCollectionDocumentCount();
      sampleFreq = collectionSize / (float) sampleSize;
      job.setInt("SampleFrequency", (int) sampleFreq);
    } else {
      throw new RuntimeException("Either provide sample frequency with " +
          "option -" + SAMPLESIZE_OPTION+ " or existing sample docnos with option -" + SAMPLEDOCNOS_OPTION);
View Full Code Here

TOP

Related Classes of ivory.core.RetrievalEnvironment

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.