Package ivory.core

Examples of ivory.core.RetrievalEnvironment


    }
    LOG.info("Bigram segmentation = " + bigramSegment);

    // initialize environment to access index
    try {
      env = new RetrievalEnvironment(conf.get(Constants.IndexPath), fs);
      env.initialize(true);
    } catch (ConfigurationException e) {
      e.printStackTrace();
    }
View Full Code Here


    // initialize environment to access index
    // skip this if we only want to translate query (i.e., no retrieval)
    if (translateOnly == null) {   
      try {
        env = new RetrievalEnvironment(conf.get(Constants.IndexPath), fs);
        env.initialize(true);
      } catch (Exception e) {
        throw new RuntimeException(e);
      }
    }
View Full Code Here

      mCollectionDocCount = job.getInt("Ivory.CollectionDocumentCount", -1);

      try {
        if (job.get("mapred.job.tracker").equals("local")) {
          FileSystem fs = FileSystem.getLocal(job);
          RetrievalEnvironment re = new RetrievalEnvironment(job.get("Ivory.IndexPath"),
              fs);
          Path path = re.getDoclengthsData();
          sLogger.debug("Reading doclengths: " + path);
          mDocLengthTable = new DocLengthTable2B(path, fs);
        } else {
          Path[] localFiles = DistributedCache.getLocalCacheFiles(job);
          mDocLengthTable = new DocLengthTable2B(localFiles[0], FileSystem.getLocal(job));
View Full Code Here

    String indexPath = args[0];

    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.get(conf);

    RetrievalEnvironment env = new RetrievalEnvironment(indexPath, fs);

    Path termsFilePath = new Path(env.getIndexTermsData());
    Path termIDsFilePath = new Path(env.getIndexTermIdsData());
    Path idToTermFilePath = new Path(env.getIndexTermIdMappingData());

    DefaultCachedFrequencySortedDictionary dictionary =
        new DefaultCachedFrequencySortedDictionary(termsFilePath, termIDsFilePath,
            idToTermFilePath, 100, fs);
View Full Code Here

    }   
  }
 
  protected void loadRetrievalEnv() throws ConfigurationException{
    try {
      env = new RetrievalEnvironment(indexPath, fs);
      env.initialize(true);
    } catch (IOException e) {
      throw new ConfigurationException("Failed to instantiate RetrievalEnvironment: "
          + e.getMessage());
    }
View Full Code Here

    String indexPath = args[0];

    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.get(conf);

    RetrievalEnvironment env = new RetrievalEnvironment(indexPath, fs);

    Path termsFilePath = new Path(env.getIndexTermsData());
    Path termIDsFilePath = new Path(env.getIndexTermIdsData());
    Path idToTermFilePath = new Path(env.getIndexTermIdMappingData());

    DefaultFrequencySortedDictionary dictionary =
        new DefaultFrequencySortedDictionary(termsFilePath, termIDsFilePath, idToTermFilePath, fs);

    int nTerms = dictionary.size();
View Full Code Here

  public IntDocVectorsForwardIndex(String indexPath, FileSystem fs, boolean weighted)
      throws IOException {
    this.fs = Preconditions.checkNotNull(fs);
    this.conf = fs.getConf();

    RetrievalEnvironment env = new RetrievalEnvironment(indexPath, fs);
    path = (weighted ? env.getWeightedIntDocVectorsDirectory() : env.getIntDocVectorsDirectory());

    String forwardIndexPath = (weighted ? env.getWeightedIntDocVectorsForwardIndex()
        : env.getIntDocVectorsForwardIndex());
    FSDataInputStream posInput = fs.open(new Path(forwardIndexPath));

    docnoOffset = posInput.readInt();
    collectionDocumentCount = posInput.readInt();
View Full Code Here

  public TermDocVectorsForwardIndex(String indexPath, FileSystem fs) throws IOException {
    Preconditions.checkNotNull(indexPath);
    this.fs = Preconditions.checkNotNull(fs);
    conf = fs.getConf();

    RetrievalEnvironment env = new RetrievalEnvironment(indexPath, fs);
    path = env.getTermDocVectorsDirectory();

    FSDataInputStream posInput = fs.open(new Path(env.getTermDocVectorsForwardIndex()));

    docnoOffset = posInput.readInt();
    collectionDocumentCount = posInput.readInt();

    positions = new long[collectionDocumentCount];
View Full Code Here

  public IntPostingsForwardIndex(String indexPath, FileSystem fs) throws IOException {
    this.fs = fs;
    this.conf = fs.getConf();
    postingsType = ivory.core.data.index.PostingsListDocSortedPositional.class.getCanonicalName();
    RetrievalEnvironment env = new RetrievalEnvironment(indexPath, fs);
    postingsPath = env.getPostingsDirectory();

    FSDataInputStream posInput = fs.open(new Path(env
        .getPostingsIndexData()));

    int l = posInput.readInt();
    positions = new long[l];
    for (int i = 0; i < l; i++) {
View Full Code Here

    int blockSize = getConf().getInt("Ivory.BlockSize", -1);
    int topN = getConf().getInt("Ivory.TopN", -1);

    FileSystem fs = FileSystem.get(getConf());

    RetrievalEnvironment re = new RetrievalEnvironment(indexPath, fs);

    String collectionName = re.readCollectionName();
    int numDocs = re.readCollectionDocumentCount();
    Path docLengthPath = re.getDoclengthsData();
    String scoringModel = getConf().get("Ivory.ScoringModel");

    sLogger.info("Characteristics of the collection:");
    sLogger.info(" - CollectionName: " + collectionName);
    sLogger.info(" - IndexPath: " + indexPath);
    sLogger.info("Characteristics of the job:");
    sLogger.info(" - NumMapTasks: " + mapTasks);
    sLogger.info(" - NumReduceTasks: " + reduceTasks);
    sLogger.info(" - DfCut: " + getConf().getInt("Ivory.DfCut", 0));
    sLogger.info(" - BlockSize: " + blockSize);
    sLogger.info(" - ScoringModel: " + scoringModel);
    sLogger.info(" - topN: " + topN);
    sLogger.info(" - OutputPath: " + outputPath);

    getConf().setInt("Ivory.CollectionDocumentCount", numDocs);

    if (fs.exists(new Path(outputPath))) {
      System.out.println("PCP output path already exists!");
      return 0;
    }

    int numBlocks = numDocs / blockSize + 1;

    for (int i = 0; i < numBlocks; i++) {
      int start = blockSize * i;
      int end = i == numBlocks - 1 ? numDocs : blockSize * (i + 1);

      JobConf conf = new JobConf(getConf(), PCP.class);
      DistributedCache.addCacheFile(docLengthPath.toUri(), conf);

      sLogger.info("block " + i + ": " + start + "-" + end);

      conf.setInt("Ivory.BlockStart", start);
      conf.setInt("Ivory.BlockEnd", end);

      conf.setJobName("PCP:" + collectionName + "-dfCut=" + dfCut
          + (topN > 0 ? "-topN" + topN : "-all") + ":Block #" + i);

      conf.setNumMapTasks(mapTasks);
      conf.setNumReduceTasks(reduceTasks);

      String currentOutputPath = outputPath + "/block" + i;

      FileInputFormat.setInputPaths(conf, new Path(re.getPostingsDirectory()));
      FileOutputFormat.setOutputPath(conf, new Path(currentOutputPath));

      conf.setInputFormat(SequenceFileInputFormat.class);
      conf.setOutputKeyClass(IntWritable.class);
      conf.setOutputValueClass(HMapIFW.class);
View Full Code Here

TOP

Related Classes of ivory.core.RetrievalEnvironment

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.