Package ivory.core.data.index

Examples of ivory.core.data.index.PostingsReader


    // Now merge and test.
    Posting p = new Posting();

    PostingsListDocSortedPositional merged;
    PostingsReader mergedReader;

    merged = PostingsListDocSortedPositional.create(PostingsListDocSortedPositional.merge(
        postings1b, postings2b, 30).serialize());

    merged.setCollectionDocumentCount(30);
    mergedReader = merged.getPostingsReader();

    assertEquals(5, merged.getNumberOfPostings());
    mergedReader.nextPosting(p);
    assertEquals(2, p.getDocno());
    assertEquals(3, p.getTf());

    mergedReader.nextPosting(p);
    assertEquals(11, p.getDocno());
    assertEquals(4, p.getTf());

    mergedReader.nextPosting(p);
    assertEquals(13, p.getDocno());
    assertEquals(5, p.getTf());

    mergedReader.nextPosting(p);
    assertEquals(19, p.getDocno());
    assertEquals(1, p.getTf());

    mergedReader.nextPosting(p);
    assertEquals(25, p.getDocno());
    assertEquals(2, p.getTf());

    merged = PostingsListDocSortedPositional.create(PostingsListDocSortedPositional.merge(
        postings2b, postings1b, 30).serialize());

    merged.setCollectionDocumentCount(30);
    mergedReader = merged.getPostingsReader();

    assertEquals(5, merged.getNumberOfPostings());
    mergedReader.nextPosting(p);
    assertEquals(2, p.getDocno());
    assertEquals(3, p.getTf());

    mergedReader.nextPosting(p);
    assertEquals(11, p.getDocno());
    assertEquals(4, p.getTf());

    mergedReader.nextPosting(p);
    assertEquals(13, p.getDocno());
    assertEquals(5, p.getTf());

    mergedReader.nextPosting(p);
    assertEquals(19, p.getDocno());
    assertEquals(1, p.getTf());

    mergedReader.nextPosting(p);
    assertEquals(25, p.getDocno());
    assertEquals(2, p.getTf());
  }
View Full Code Here


        out = fs.create(new Path(outputPath + "/" + i));
      }

      try {
        PostingsList pl = env.getPostingsList(env.getTermFromId(i));
        PostingsReader reader = pl.getPostingsReader();
        Signature filter = null;

        //Decide which filter to use based on the configuration parameters
        int df = pl.getDf();
        if (df <= bloomConfig.getIdentityHashThreshold()) {
          filter = new BloomFilterHash(df * bloomConfig.getBitsPerElement(),
                                       bloomConfig.getHashCount());
        } else {
          filter = new BloomFilterIdentityHash(bloomConfig.getDocumentCount());
        }

        while (reader.nextPosting(posting)) {
          filter.add(newDocids[posting.getDocno()]);
        }

        out.writeInt(i);
        out.writeInt(df);
View Full Code Here

    assertEquals(3, postings2.getDf());
    assertEquals(8, postings2.getCf());

    Posting posting = new Posting();

    PostingsReader reader = postings2.getPostingsReader();

    int arr[] = null;

    reader.nextPosting(posting);
    arr = reader.getPositions();
    assertEquals(13, posting.getDocno());
    assertEquals(5, posting.getTf());
    assertEquals(1, arr[0]);
    assertEquals(4, arr[1]);
    assertEquals(5, arr[2]);
    assertEquals(10, arr[3]);
    assertEquals(23, arr[4]);

    reader.nextPosting(posting);
    arr = reader.getPositions();
    assertEquals(14, posting.getDocno());
    assertEquals(2, posting.getTf());
    assertEquals(2, arr[0]);
    assertEquals(23, arr[1]);

    reader.nextPosting(posting);
    arr = reader.getPositions();
    assertEquals(24, posting.getDocno());
    assertEquals(1, posting.getTf());
    assertEquals(1, arr[0]);
  }
View Full Code Here

    assertEquals(3, postings2.getDf());
    assertEquals(8, postings2.getCf());

    Posting posting = new Posting();

    PostingsReader reader = postings2.getPostingsReader();

    int arr[] = null;

    reader.nextPosting(posting);
    arr = reader.getPositions();
    assertEquals(13, posting.getDocno());
    assertEquals(5, posting.getTf());
    assertEquals(1, arr[0]);
    assertEquals(4, arr[1]);
    assertEquals(5, arr[2]);
    assertEquals(10, arr[3]);
    assertEquals(23, arr[4]);

    reader.nextPosting(posting);
    arr = reader.getPositions();
    assertEquals(14, posting.getDocno());
    assertEquals(2, posting.getTf());
    assertEquals(2, arr[0]);
    assertEquals(23, arr[1]);

    reader.nextPosting(posting);
    arr = reader.getPositions();
    assertEquals(24, posting.getDocno());
    assertEquals(1, posting.getTf());
    assertEquals(1, arr[0]);

    // Set new tf and cf.
    postings2.setDf(6);
    postings2.setCf(16);

    // Verify tf and cf.
    assertEquals(6, postings2.getDf());
    assertEquals(16, postings2.getCf());

    reader = postings2.getPostingsReader();
    arr = null;

    reader.nextPosting(posting);
    arr = reader.getPositions();
    assertEquals(13, posting.getDocno());
    assertEquals(5, posting.getTf());
    assertEquals(1, arr[0]);
    assertEquals(4, arr[1]);
    assertEquals(5, arr[2]);
    assertEquals(10, arr[3]);
    assertEquals(23, arr[4]);

    reader.nextPosting(posting);
    arr = reader.getPositions();
    assertEquals(14, posting.getDocno());
    assertEquals(2, posting.getTf());
    assertEquals(2, arr[0]);
    assertEquals(23, arr[1]);

    reader.nextPosting(posting);
    arr = reader.getPositions();
    assertEquals(24, posting.getDocno());
    assertEquals(1, posting.getTf());
    assertEquals(1, arr[0]);

    PostingsListDocSortedPositionalPForDelta postings3 =
        PostingsListDocSortedPositionalPForDelta.create(postings2.serialize());
    postings3.setCollectionDocumentCount(20);

    // Verify tf and cf.
    assertEquals(6, postings2.getDf());
    assertEquals(16, postings2.getCf());

    reader = postings2.getPostingsReader();
    arr = null;

    reader.nextPosting(posting);
    arr = reader.getPositions();
    assertEquals(13, posting.getDocno());
    assertEquals(5, posting.getTf());
    assertEquals(1, arr[0]);
    assertEquals(4, arr[1]);
    assertEquals(5, arr[2]);
    assertEquals(10, arr[3]);
    assertEquals(23, arr[4]);

    reader.nextPosting(posting);
    arr = reader.getPositions();
    assertEquals(14, posting.getDocno());
    assertEquals(2, posting.getTf());
    assertEquals(2, arr[0]);
    assertEquals(23, arr[1]);

    reader.nextPosting(posting);
    arr = reader.getPositions();
    assertEquals(24, posting.getDocno());
    assertEquals(1, posting.getTf());
    assertEquals(1, arr[0]);
  }
View Full Code Here

    // Now merge and test.
    Posting p = new Posting();

    PostingsListDocSortedPositionalPForDelta merged;
    PostingsReader mergedReader;

    merged = PostingsListDocSortedPositionalPForDelta.create(PostingsListDocSortedPositionalPForDelta.merge(
        postings1b, postings2b, 30).serialize());

    merged.setCollectionDocumentCount(30);
    mergedReader = merged.getPostingsReader();

    assertEquals(7, merged.getNumberOfPostings());
    mergedReader.nextPosting(p);
    assertEquals(2, p.getDocno());
    assertEquals(3, p.getTf());

    mergedReader.nextPosting(p);
    assertEquals(11, p.getDocno());
    assertEquals(4, p.getTf());

    mergedReader.nextPosting(p);
    assertEquals(13, p.getDocno());
    assertEquals(5, p.getTf());

    mergedReader.nextPosting(p);
    assertEquals(14, p.getDocno());
    assertEquals(2, p.getTf());

    mergedReader.nextPosting(p);
    assertEquals(19, p.getDocno());
    assertEquals(1, p.getTf());

    mergedReader.nextPosting(p);
    assertEquals(24, p.getDocno());
    assertEquals(1, p.getTf());

    mergedReader.nextPosting(p);
    assertEquals(25, p.getDocno());
    assertEquals(2, p.getTf());

    merged = PostingsListDocSortedPositionalPForDelta.create(PostingsListDocSortedPositionalPForDelta.merge(
        postings2b, postings1b, 30).serialize());

    merged.setCollectionDocumentCount(30);
    mergedReader = merged.getPostingsReader();

    assertEquals(7, merged.getNumberOfPostings());
    mergedReader.nextPosting(p);
    assertEquals(2, p.getDocno());
    assertEquals(3, p.getTf());

    mergedReader.nextPosting(p);
    assertEquals(11, p.getDocno());
    assertEquals(4, p.getTf());

    mergedReader.nextPosting(p);
    assertEquals(13, p.getDocno());
    assertEquals(5, p.getTf());

    mergedReader.nextPosting(p);
    assertEquals(14, p.getDocno());
    assertEquals(2, p.getTf());

    mergedReader.nextPosting(p);
    assertEquals(19, p.getDocno());
    assertEquals(1, p.getTf());

    mergedReader.nextPosting(p);
    assertEquals(24, p.getDocno());
    assertEquals(1, p.getTf());

    mergedReader.nextPosting(p);
    assertEquals(25, p.getDocno());
    assertEquals(2, p.getTf());
  }
View Full Code Here

    // Now merge and test.
    Posting p = new Posting();

    PostingsListDocSortedPositionalPForDelta merged;
    PostingsReader mergedReader;

    merged = PostingsListDocSortedPositionalPForDelta.create(PostingsListDocSortedPositionalPForDelta.merge(
        postings1b, postings2b, 30).serialize());

    merged.setCollectionDocumentCount(30);
    mergedReader = merged.getPostingsReader();

    assertEquals(2, merged.getNumberOfPostings());
    mergedReader.nextPosting(p);
    assertEquals(2, p.getDocno());
    assertEquals(3, p.getTf());

    mergedReader.nextPosting(p);
    assertEquals(13, p.getDocno());
    assertEquals(5, p.getTf());

    merged = PostingsListDocSortedPositionalPForDelta.create(PostingsListDocSortedPositionalPForDelta.merge(
        postings2b, postings1b, 30).serialize());

    merged.setCollectionDocumentCount(30);
    mergedReader = merged.getPostingsReader();

    assertEquals(2, merged.getNumberOfPostings());
    mergedReader.nextPosting(p);
    assertEquals(2, p.getDocno());
    assertEquals(3, p.getTf());

    mergedReader.nextPosting(p);
    assertEquals(13, p.getDocno());
    assertEquals(5, p.getTf());
  }
View Full Code Here

    // Now merge and test.
    Posting p = new Posting();

    PostingsListDocSortedPositionalPForDelta merged;
    PostingsReader mergedReader;

    merged = PostingsListDocSortedPositionalPForDelta.create(PostingsListDocSortedPositionalPForDelta.merge(
        postings1b, postings2b, 30).serialize());

    merged.setCollectionDocumentCount(30);
    mergedReader = merged.getPostingsReader();

    assertEquals(5, merged.getNumberOfPostings());
    mergedReader.nextPosting(p);
    assertEquals(2, p.getDocno());
    assertEquals(3, p.getTf());

    mergedReader.nextPosting(p);
    assertEquals(11, p.getDocno());
    assertEquals(4, p.getTf());

    mergedReader.nextPosting(p);
    assertEquals(13, p.getDocno());
    assertEquals(5, p.getTf());

    mergedReader.nextPosting(p);
    assertEquals(19, p.getDocno());
    assertEquals(1, p.getTf());

    mergedReader.nextPosting(p);
    assertEquals(25, p.getDocno());
    assertEquals(2, p.getTf());

    merged = PostingsListDocSortedPositionalPForDelta.create(PostingsListDocSortedPositionalPForDelta.merge(
        postings2b, postings1b, 30).serialize());

    merged.setCollectionDocumentCount(30);
    mergedReader = merged.getPostingsReader();

    assertEquals(5, merged.getNumberOfPostings());
    mergedReader.nextPosting(p);
    assertEquals(2, p.getDocno());
    assertEquals(3, p.getTf());

    mergedReader.nextPosting(p);
    assertEquals(11, p.getDocno());
    assertEquals(4, p.getTf());

    mergedReader.nextPosting(p);
    assertEquals(13, p.getDocno());
    assertEquals(5, p.getTf());

    mergedReader.nextPosting(p);
    assertEquals(19, p.getDocno());
    assertEquals(1, p.getTf());

    mergedReader.nextPosting(p);
    assertEquals(25, p.getDocno());
    assertEquals(2, p.getTf());
  }
View Full Code Here

    for(int qid: queries.keySet()) {
      for(int termid: queries.get(qid)) {
        if(!termidHistory.contains(termid)) {
          termidHistory.add(termid);
          PostingsList pl = env.getPostingsList(env.getTermFromId(termid));
          PostingsReader reader = pl.getPostingsReader();

          positions.clear();
          positionsMap.clear();
          int[] data = new int[pl.getDf()];
          int index = 0;
          while (reader.nextPosting(posting)) {
            data[index] = newDocids[posting.getDocno()];
            positionsMap.put(data[index], new TermPositions(reader.getPositions(), reader.getTf()));
            docLengths.put(data[index], env.getDocumentLength(posting.getDocno()));
            index++;
          }
          Arrays.sort(data);
View Full Code Here

        OutputCollector<IntWritable, HMapIFW> output, Reporter reporter) throws IOException {

      sLogger.debug(mCollectionDocCount);
      postings.setCollectionDocumentCount(mCollectionDocCount);

      PostingsReader reader1 = postings.getPostingsReader();

      if (reader1.getNumberOfPostings() > dfCut)
        return;

      // set per postings list
      mModel.setDF(reader1.getNumberOfPostings());

      // performing PCP
      while (reader1.nextPosting(e1)) {

        // Here's a hidden dependency: How we do the blocking depends on
        // how the postings are sorted. If the postings are sorted in
        // ascending docno, then we can break out of the loop after
        // we've gone past the block bounds (as in code below).
        // Otherwise (say, if postings are sorted by tf), we have to go
        // through all postings.
        // -- Jimmy, 2008/09/03
        //

        if (e1.getDocno() < mBlockStart)
          continue;
        if (e1.getDocno() >= mBlockEnd)
          break;

        HMapIFW map = new HMapIFW();

        sLogger.debug(key + ": " + e1);

        PostingsReader reader2 = postings.getPostingsReader();

        while (reader2.nextPosting(e2)) {

          sLogger.debug(key + ": " + e1 + ", " + e2);

          if (e1.getDocno() == e2.getDocno())
            continue;
View Full Code Here

      // The current concept.
      String concept = vocab[conceptID].getKey();

      // Get df and cf information for the concept.
      PostingsReader reader = env.getPostingsReader(new Expression(concept));
      if (reader == null) {
        continue;
      }
      PostingsList list = reader.getPostingsList();
      int df = list.getDf();
      long cf = list.getCf();
      env.clearPostingsReaderCache();

      // Construct concept evidence.
View Full Code Here

TOP

Related Classes of ivory.core.data.index.PostingsReader

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.