Examples of ivory.core.RetrievalEnvironment

ivory.core.RetrievalEnvironment
@author Don Metzler @author Jimmy Lin

    }
    LOG.info("Bigram segmentation = " + bigramSegment);


    // initialize environment to access index
    try {
      env = new RetrievalEnvironment(conf.get(Constants.IndexPath), fs);
      env.initialize(true);
    } catch (ConfigurationException e) {
      e.printStackTrace();
    }

View Full Code Here


    // initialize environment to access index
    // skip this if we only want to translate query (i.e., no retrieval)
    if (translateOnly == null) {    
      try {
        env = new RetrievalEnvironment(conf.get(Constants.IndexPath), fs);
        env.initialize(true);
      } catch (Exception e) {
        throw new RuntimeException(e);
      }
    }

View Full Code Here

      mCollectionDocCount = job.getInt("Ivory.CollectionDocumentCount", -1);


      try {
        if (job.get("mapred.job.tracker").equals("local")) {
          FileSystem fs = FileSystem.getLocal(job);
          RetrievalEnvironment re = new RetrievalEnvironment(job.get("Ivory.IndexPath"),
              fs);
          Path path = re.getDoclengthsData();
          sLogger.debug("Reading doclengths: " + path);
          mDocLengthTable = new DocLengthTable2B(path, fs);
        } else {
          Path[] localFiles = DistributedCache.getLocalCacheFiles(job);
          mDocLengthTable = new DocLengthTable2B(localFiles[0], FileSystem.getLocal(job));

View Full Code Here

    String indexPath = args[0];


    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.get(conf);


    RetrievalEnvironment env = new RetrievalEnvironment(indexPath, fs);


    Path termsFilePath = new Path(env.getIndexTermsData());
    Path termIDsFilePath = new Path(env.getIndexTermIdsData());
    Path idToTermFilePath = new Path(env.getIndexTermIdMappingData());


    DefaultCachedFrequencySortedDictionary dictionary =
        new DefaultCachedFrequencySortedDictionary(termsFilePath, termIDsFilePath,
            idToTermFilePath, 100, fs);

View Full Code Here

    }    
  }
  
  protected void loadRetrievalEnv() throws ConfigurationException{
    try {
      env = new RetrievalEnvironment(indexPath, fs);
      env.initialize(true);
    } catch (IOException e) {
      throw new ConfigurationException("Failed to instantiate RetrievalEnvironment: "
          + e.getMessage());
    }

View Full Code Here

    String indexPath = args[0];


    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.get(conf);


    RetrievalEnvironment env = new RetrievalEnvironment(indexPath, fs);


    Path termsFilePath = new Path(env.getIndexTermsData());
    Path termIDsFilePath = new Path(env.getIndexTermIdsData());
    Path idToTermFilePath = new Path(env.getIndexTermIdMappingData());


    DefaultFrequencySortedDictionary dictionary =
        new DefaultFrequencySortedDictionary(termsFilePath, termIDsFilePath, idToTermFilePath, fs);


    int nTerms = dictionary.size();

View Full Code Here

  public IntDocVectorsForwardIndex(String indexPath, FileSystem fs, boolean weighted)
      throws IOException {
    this.fs = Preconditions.checkNotNull(fs);
    this.conf = fs.getConf();


    RetrievalEnvironment env = new RetrievalEnvironment(indexPath, fs);
    path = (weighted ? env.getWeightedIntDocVectorsDirectory() : env.getIntDocVectorsDirectory());


    String forwardIndexPath = (weighted ? env.getWeightedIntDocVectorsForwardIndex()
        : env.getIntDocVectorsForwardIndex());
    FSDataInputStream posInput = fs.open(new Path(forwardIndexPath));


    docnoOffset = posInput.readInt();
    collectionDocumentCount = posInput.readInt();

View Full Code Here

  public TermDocVectorsForwardIndex(String indexPath, FileSystem fs) throws IOException {
    Preconditions.checkNotNull(indexPath);
    this.fs = Preconditions.checkNotNull(fs);
    conf = fs.getConf();


    RetrievalEnvironment env = new RetrievalEnvironment(indexPath, fs);
    path = env.getTermDocVectorsDirectory();


    FSDataInputStream posInput = fs.open(new Path(env.getTermDocVectorsForwardIndex()));


    docnoOffset = posInput.readInt();
    collectionDocumentCount = posInput.readInt();


    positions = new long[collectionDocumentCount];

View Full Code Here


  public IntPostingsForwardIndex(String indexPath, FileSystem fs) throws IOException {
    this.fs = fs;
    this.conf = fs.getConf();
    postingsType = ivory.core.data.index.PostingsListDocSortedPositional.class.getCanonicalName();
    RetrievalEnvironment env = new RetrievalEnvironment(indexPath, fs);
    postingsPath = env.getPostingsDirectory();


    FSDataInputStream posInput = fs.open(new Path(env
        .getPostingsIndexData()));


    int l = posInput.readInt();
    positions = new long[l];
    for (int i = 0; i < l; i++) {

View Full Code Here

    int blockSize = getConf().getInt("Ivory.BlockSize", -1);
    int topN = getConf().getInt("Ivory.TopN", -1);


    FileSystem fs = FileSystem.get(getConf());


    RetrievalEnvironment re = new RetrievalEnvironment(indexPath, fs);


    String collectionName = re.readCollectionName();
    int numDocs = re.readCollectionDocumentCount();
    Path docLengthPath = re.getDoclengthsData();
    String scoringModel = getConf().get("Ivory.ScoringModel");


    sLogger.info("Characteristics of the collection:");
    sLogger.info(" - CollectionName: " + collectionName);
    sLogger.info(" - IndexPath: " + indexPath);
    sLogger.info("Characteristics of the job:");
    sLogger.info(" - NumMapTasks: " + mapTasks);
    sLogger.info(" - NumReduceTasks: " + reduceTasks);
    sLogger.info(" - DfCut: " + getConf().getInt("Ivory.DfCut", 0));
    sLogger.info(" - BlockSize: " + blockSize);
    sLogger.info(" - ScoringModel: " + scoringModel);
    sLogger.info(" - topN: " + topN);
    sLogger.info(" - OutputPath: " + outputPath);


    getConf().setInt("Ivory.CollectionDocumentCount", numDocs);


    if (fs.exists(new Path(outputPath))) {
      System.out.println("PCP output path already exists!");
      return 0;
    }


    int numBlocks = numDocs / blockSize + 1;


    for (int i = 0; i < numBlocks; i++) {
      int start = blockSize * i;
      int end = i == numBlocks - 1 ? numDocs : blockSize * (i + 1);


      JobConf conf = new JobConf(getConf(), PCP.class);
      DistributedCache.addCacheFile(docLengthPath.toUri(), conf);


      sLogger.info("block " + i + ": " + start + "-" + end);


      conf.setInt("Ivory.BlockStart", start);
      conf.setInt("Ivory.BlockEnd", end);


      conf.setJobName("PCP:" + collectionName + "-dfCut=" + dfCut
          + (topN > 0 ? "-topN" + topN : "-all") + ":Block #" + i);


      conf.setNumMapTasks(mapTasks);
      conf.setNumReduceTasks(reduceTasks);


      String currentOutputPath = outputPath + "/block" + i;


      FileInputFormat.setInputPaths(conf, new Path(re.getPostingsDirectory()));
      FileOutputFormat.setOutputPath(conf, new Path(currentOutputPath));


      conf.setInputFormat(SequenceFileInputFormat.class);
      conf.setOutputKeyClass(IntWritable.class);
      conf.setOutputValueClass(HMapIFW.class);

View Full Code Here

0 1 2 3 4 5 6 7 8 9

TOP

Related Classes of ivory.core.RetrievalEnvironment

edu.umd.cloud9.collection.DocnoMapping

ivory.app.PreprocessClueWebEnglish

ivory.app.PreprocessClueWebEnglishMultipleSegments

ivory.app.PreprocessCollection

ivory.app.PreprocessMedline

ivory.app.PreprocessTextCollection

ivory.app.PreprocessTrecForeign

ivory.app.PreprocessWikipedia

ivory.bloomir.preprocessing.GenerateBloomFilters

ivory.bloomir.preprocessing.GenerateCompressedPostings

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.