Package ivory.core.data.index

Examples of ivory.core.data.index.Posting


        PostingsListDocSortedPositional.create(postings2a.serialize());

    postings2b.setCollectionDocumentCount(30);

    // Now merge and test.
    Posting p = new Posting();

    PostingsListDocSortedPositional merged;
    PostingsReader mergedReader;

    merged = PostingsListDocSortedPositional.create(PostingsListDocSortedPositional.merge(
        postings1b, postings2b, 30).serialize());

    merged.setCollectionDocumentCount(30);
    mergedReader = merged.getPostingsReader();

    assertEquals(7, merged.getNumberOfPostings());
    mergedReader.nextPosting(p);
    assertEquals(2, p.getDocno());
    assertEquals(3, p.getTf());

    mergedReader.nextPosting(p);
    assertEquals(11, p.getDocno());
    assertEquals(4, p.getTf());

    mergedReader.nextPosting(p);
    assertEquals(13, p.getDocno());
    assertEquals(5, p.getTf());

    mergedReader.nextPosting(p);
    assertEquals(14, p.getDocno());
    assertEquals(2, p.getTf());

    mergedReader.nextPosting(p);
    assertEquals(19, p.getDocno());
    assertEquals(1, p.getTf());

    mergedReader.nextPosting(p);
    assertEquals(24, p.getDocno());
    assertEquals(1, p.getTf());

    mergedReader.nextPosting(p);
    assertEquals(25, p.getDocno());
    assertEquals(2, p.getTf());

    merged = PostingsListDocSortedPositional.create(PostingsListDocSortedPositional.merge(
        postings2b, postings1b, 30).serialize());

    merged.setCollectionDocumentCount(30);
    mergedReader = merged.getPostingsReader();

    assertEquals(7, merged.getNumberOfPostings());
    mergedReader.nextPosting(p);
    assertEquals(2, p.getDocno());
    assertEquals(3, p.getTf());

    mergedReader.nextPosting(p);
    assertEquals(11, p.getDocno());
    assertEquals(4, p.getTf());

    mergedReader.nextPosting(p);
    assertEquals(13, p.getDocno());
    assertEquals(5, p.getTf());

    mergedReader.nextPosting(p);
    assertEquals(14, p.getDocno());
    assertEquals(2, p.getTf());

    mergedReader.nextPosting(p);
    assertEquals(19, p.getDocno());
    assertEquals(1, p.getTf());

    mergedReader.nextPosting(p);
    assertEquals(24, p.getDocno());
    assertEquals(1, p.getTf());

    mergedReader.nextPosting(p);
    assertEquals(25, p.getDocno());
    assertEquals(2, p.getTf());
  }
View Full Code Here


        PostingsListDocSortedPositional.create(postings2a.serialize());

    postings2b.setCollectionDocumentCount(30);

    // Now merge and test.
    Posting p = new Posting();

    PostingsListDocSortedPositional merged;
    PostingsReader mergedReader;

    merged = PostingsListDocSortedPositional.create(PostingsListDocSortedPositional.merge(
        postings1b, postings2b, 30).serialize());

    merged.setCollectionDocumentCount(30);
    mergedReader = merged.getPostingsReader();

    assertEquals(2, merged.getNumberOfPostings());
    mergedReader.nextPosting(p);
    assertEquals(2, p.getDocno());
    assertEquals(3, p.getTf());

    mergedReader.nextPosting(p);
    assertEquals(13, p.getDocno());
    assertEquals(5, p.getTf());

    merged = PostingsListDocSortedPositional.create(PostingsListDocSortedPositional.merge(
        postings2b, postings1b, 30).serialize());

    merged.setCollectionDocumentCount(30);
    mergedReader = merged.getPostingsReader();

    assertEquals(2, merged.getNumberOfPostings());
    mergedReader.nextPosting(p);
    assertEquals(2, p.getDocno());
    assertEquals(3, p.getTf());

    mergedReader.nextPosting(p);
    assertEquals(13, p.getDocno());
    assertEquals(5, p.getTf());
  }
View Full Code Here

        PostingsListDocSortedPositional.create(postings2a.serialize());

    postings2b.setCollectionDocumentCount(30);

    // Now merge and test.
    Posting p = new Posting();

    PostingsListDocSortedPositional merged;
    PostingsReader mergedReader;

    merged = PostingsListDocSortedPositional.create(PostingsListDocSortedPositional.merge(
        postings1b, postings2b, 30).serialize());

    merged.setCollectionDocumentCount(30);
    mergedReader = merged.getPostingsReader();

    assertEquals(5, merged.getNumberOfPostings());
    mergedReader.nextPosting(p);
    assertEquals(2, p.getDocno());
    assertEquals(3, p.getTf());

    mergedReader.nextPosting(p);
    assertEquals(11, p.getDocno());
    assertEquals(4, p.getTf());

    mergedReader.nextPosting(p);
    assertEquals(13, p.getDocno());
    assertEquals(5, p.getTf());

    mergedReader.nextPosting(p);
    assertEquals(19, p.getDocno());
    assertEquals(1, p.getTf());

    mergedReader.nextPosting(p);
    assertEquals(25, p.getDocno());
    assertEquals(2, p.getTf());

    merged = PostingsListDocSortedPositional.create(PostingsListDocSortedPositional.merge(
        postings2b, postings1b, 30).serialize());

    merged.setCollectionDocumentCount(30);
    mergedReader = merged.getPostingsReader();

    assertEquals(5, merged.getNumberOfPostings());
    mergedReader.nextPosting(p);
    assertEquals(2, p.getDocno());
    assertEquals(3, p.getTf());

    mergedReader.nextPosting(p);
    assertEquals(11, p.getDocno());
    assertEquals(4, p.getTf());

    mergedReader.nextPosting(p);
    assertEquals(13, p.getDocno());
    assertEquals(5, p.getTf());

    mergedReader.nextPosting(p);
    assertEquals(19, p.getDocno());
    assertEquals(1, p.getTf());

    mergedReader.nextPosting(p);
    assertEquals(25, p.getDocno());
    assertEquals(2, p.getTf());
  }
View Full Code Here

    SpamPercentileScore spamScores = new SpamPercentileScore();
    spamScores.initialize(spamScoresPath, fs);
    int[] newDocids = DocumentUtility.spamSortDocids(spamScores);

    int collectionSize = env.readCollectionTermCount();
    Posting posting = new Posting();
    FSDataOutputStream out;

    BloomConfig bloomConfig =  new BloomConfig((int) env.getDocumentCount(),
                                               collectionSize, nbHash, bitsPerElement);
    //Deletes the output path if it already exists.
    fs.delete(new Path(outputPath));

    //Serialize and write the configuration parameters.
    out = fs.create(new Path(outputPath + "/" + BloomConfig.CONFIG_FILE));
    bloomConfig.write(out);
    out.close();

    for(int i = 0; i <= collectionSize; i++) {
      if(i % 100000 == 0) {
        if(i != 0) {
          out.close();
        }
        out = fs.create(new Path(outputPath + "/" + i));
      }

      try {
        PostingsList pl = env.getPostingsList(env.getTermFromId(i));
        PostingsReader reader = pl.getPostingsReader();
        Signature filter = null;

        //Decide which filter to use based on the configuration parameters
        int df = pl.getDf();
        if (df <= bloomConfig.getIdentityHashThreshold()) {
          filter = new BloomFilterHash(df * bloomConfig.getBitsPerElement(),
                                       bloomConfig.getHashCount());
        } else {
          filter = new BloomFilterIdentityHash(bloomConfig.getDocumentCount());
        }

        while (reader.nextPosting(posting)) {
          filter.add(newDocids[posting.getDocno()]);
        }

        out.writeInt(i);
        out.writeInt(df);
        filter.write(out);
View Full Code Here

    SpamPercentileScore spamScores = new SpamPercentileScore();
    spamScores.initialize(spamPath, fs);
    int[] newDocids = DocumentUtility.spamSortDocids(spamScores);

    Posting posting = new Posting();
    List<TermPositions> positions = Lists.newArrayList();
    Map<Integer, TermPositions> positionsMap = Maps.newHashMap();

    for(int qid: queries.keySet()) {
      for(int termid: queries.get(qid)) {
        if(!termidHistory.contains(termid)) {
          termidHistory.add(termid);
          PostingsList pl = env.getPostingsList(env.getTermFromId(termid));
          PostingsReader reader = pl.getPostingsReader();

          positions.clear();
          positionsMap.clear();
          int[] data = new int[pl.getDf()];
          int index = 0;
          while (reader.nextPosting(posting)) {
            data[index] = newDocids[posting.getDocno()];
            positionsMap.put(data[index], new TermPositions(reader.getPositions(), reader.getTf()));
            docLengths.put(data[index], env.getDocumentLength(posting.getDocno()));
            index++;
          }
          Arrays.sort(data);

          for(int i = 0; i < data.length; i++) {
View Full Code Here

    postings2.setCollectionDocumentCount(10);

    assertEquals(3, postings2.getDf());
    assertEquals(8, postings2.getCf());

    Posting posting = new Posting();

    PostingsReader reader = postings2.getPostingsReader();

    int arr[] = null;

    reader.nextPosting(posting);
    arr = reader.getPositions();
    assertEquals(13, posting.getDocno());
    assertEquals(5, posting.getTf());
    assertEquals(1, arr[0]);
    assertEquals(4, arr[1]);
    assertEquals(5, arr[2]);
    assertEquals(10, arr[3]);
    assertEquals(23, arr[4]);

    reader.nextPosting(posting);
    arr = reader.getPositions();
    assertEquals(14, posting.getDocno());
    assertEquals(2, posting.getTf());
    assertEquals(2, arr[0]);
    assertEquals(23, arr[1]);

    reader.nextPosting(posting);
    arr = reader.getPositions();
    assertEquals(24, posting.getDocno());
    assertEquals(1, posting.getTf());
    assertEquals(1, arr[0]);
  }
View Full Code Here

    // Verify tf and cf.
    assertEquals(3, postings2.getDf());
    assertEquals(8, postings2.getCf());

    Posting posting = new Posting();

    PostingsReader reader = postings2.getPostingsReader();

    int arr[] = null;

    reader.nextPosting(posting);
    arr = reader.getPositions();
    assertEquals(13, posting.getDocno());
    assertEquals(5, posting.getTf());
    assertEquals(1, arr[0]);
    assertEquals(4, arr[1]);
    assertEquals(5, arr[2]);
    assertEquals(10, arr[3]);
    assertEquals(23, arr[4]);

    reader.nextPosting(posting);
    arr = reader.getPositions();
    assertEquals(14, posting.getDocno());
    assertEquals(2, posting.getTf());
    assertEquals(2, arr[0]);
    assertEquals(23, arr[1]);

    reader.nextPosting(posting);
    arr = reader.getPositions();
    assertEquals(24, posting.getDocno());
    assertEquals(1, posting.getTf());
    assertEquals(1, arr[0]);

    // Set new tf and cf.
    postings2.setDf(6);
    postings2.setCf(16);

    // Verify tf and cf.
    assertEquals(6, postings2.getDf());
    assertEquals(16, postings2.getCf());

    reader = postings2.getPostingsReader();
    arr = null;

    reader.nextPosting(posting);
    arr = reader.getPositions();
    assertEquals(13, posting.getDocno());
    assertEquals(5, posting.getTf());
    assertEquals(1, arr[0]);
    assertEquals(4, arr[1]);
    assertEquals(5, arr[2]);
    assertEquals(10, arr[3]);
    assertEquals(23, arr[4]);

    reader.nextPosting(posting);
    arr = reader.getPositions();
    assertEquals(14, posting.getDocno());
    assertEquals(2, posting.getTf());
    assertEquals(2, arr[0]);
    assertEquals(23, arr[1]);

    reader.nextPosting(posting);
    arr = reader.getPositions();
    assertEquals(24, posting.getDocno());
    assertEquals(1, posting.getTf());
    assertEquals(1, arr[0]);

    PostingsListDocSortedPositionalPForDelta postings3 =
        PostingsListDocSortedPositionalPForDelta.create(postings2.serialize());
    postings3.setCollectionDocumentCount(20);

    // Verify tf and cf.
    assertEquals(6, postings2.getDf());
    assertEquals(16, postings2.getCf());

    reader = postings2.getPostingsReader();
    arr = null;

    reader.nextPosting(posting);
    arr = reader.getPositions();
    assertEquals(13, posting.getDocno());
    assertEquals(5, posting.getTf());
    assertEquals(1, arr[0]);
    assertEquals(4, arr[1]);
    assertEquals(5, arr[2]);
    assertEquals(10, arr[3]);
    assertEquals(23, arr[4]);

    reader.nextPosting(posting);
    arr = reader.getPositions();
    assertEquals(14, posting.getDocno());
    assertEquals(2, posting.getTf());
    assertEquals(2, arr[0]);
    assertEquals(23, arr[1]);

    reader.nextPosting(posting);
    arr = reader.getPositions();
    assertEquals(24, posting.getDocno());
    assertEquals(1, posting.getTf());
    assertEquals(1, arr[0]);
  }
View Full Code Here

        PostingsListDocSortedPositionalPForDelta.create(postings2a.serialize());

    postings2b.setCollectionDocumentCount(30);

    // Now merge and test.
    Posting p = new Posting();

    PostingsListDocSortedPositionalPForDelta merged;
    PostingsReader mergedReader;

    merged = PostingsListDocSortedPositionalPForDelta.create(PostingsListDocSortedPositionalPForDelta.merge(
        postings1b, postings2b, 30).serialize());

    merged.setCollectionDocumentCount(30);
    mergedReader = merged.getPostingsReader();

    assertEquals(7, merged.getNumberOfPostings());
    mergedReader.nextPosting(p);
    assertEquals(2, p.getDocno());
    assertEquals(3, p.getTf());

    mergedReader.nextPosting(p);
    assertEquals(11, p.getDocno());
    assertEquals(4, p.getTf());

    mergedReader.nextPosting(p);
    assertEquals(13, p.getDocno());
    assertEquals(5, p.getTf());

    mergedReader.nextPosting(p);
    assertEquals(14, p.getDocno());
    assertEquals(2, p.getTf());

    mergedReader.nextPosting(p);
    assertEquals(19, p.getDocno());
    assertEquals(1, p.getTf());

    mergedReader.nextPosting(p);
    assertEquals(24, p.getDocno());
    assertEquals(1, p.getTf());

    mergedReader.nextPosting(p);
    assertEquals(25, p.getDocno());
    assertEquals(2, p.getTf());

    merged = PostingsListDocSortedPositionalPForDelta.create(PostingsListDocSortedPositionalPForDelta.merge(
        postings2b, postings1b, 30).serialize());

    merged.setCollectionDocumentCount(30);
    mergedReader = merged.getPostingsReader();

    assertEquals(7, merged.getNumberOfPostings());
    mergedReader.nextPosting(p);
    assertEquals(2, p.getDocno());
    assertEquals(3, p.getTf());

    mergedReader.nextPosting(p);
    assertEquals(11, p.getDocno());
    assertEquals(4, p.getTf());

    mergedReader.nextPosting(p);
    assertEquals(13, p.getDocno());
    assertEquals(5, p.getTf());

    mergedReader.nextPosting(p);
    assertEquals(14, p.getDocno());
    assertEquals(2, p.getTf());

    mergedReader.nextPosting(p);
    assertEquals(19, p.getDocno());
    assertEquals(1, p.getTf());

    mergedReader.nextPosting(p);
    assertEquals(24, p.getDocno());
    assertEquals(1, p.getTf());

    mergedReader.nextPosting(p);
    assertEquals(25, p.getDocno());
    assertEquals(2, p.getTf());
  }
View Full Code Here

        PostingsListDocSortedPositionalPForDelta.create(postings2a.serialize());

    postings2b.setCollectionDocumentCount(30);

    // Now merge and test.
    Posting p = new Posting();

    PostingsListDocSortedPositionalPForDelta merged;
    PostingsReader mergedReader;

    merged = PostingsListDocSortedPositionalPForDelta.create(PostingsListDocSortedPositionalPForDelta.merge(
        postings1b, postings2b, 30).serialize());

    merged.setCollectionDocumentCount(30);
    mergedReader = merged.getPostingsReader();

    assertEquals(2, merged.getNumberOfPostings());
    mergedReader.nextPosting(p);
    assertEquals(2, p.getDocno());
    assertEquals(3, p.getTf());

    mergedReader.nextPosting(p);
    assertEquals(13, p.getDocno());
    assertEquals(5, p.getTf());

    merged = PostingsListDocSortedPositionalPForDelta.create(PostingsListDocSortedPositionalPForDelta.merge(
        postings2b, postings1b, 30).serialize());

    merged.setCollectionDocumentCount(30);
    mergedReader = merged.getPostingsReader();

    assertEquals(2, merged.getNumberOfPostings());
    mergedReader.nextPosting(p);
    assertEquals(2, p.getDocno());
    assertEquals(3, p.getTf());

    mergedReader.nextPosting(p);
    assertEquals(13, p.getDocno());
    assertEquals(5, p.getTf());
  }
View Full Code Here

        PostingsListDocSortedPositionalPForDelta.create(postings2a.serialize());

    postings2b.setCollectionDocumentCount(30);

    // Now merge and test.
    Posting p = new Posting();

    PostingsListDocSortedPositionalPForDelta merged;
    PostingsReader mergedReader;

    merged = PostingsListDocSortedPositionalPForDelta.create(PostingsListDocSortedPositionalPForDelta.merge(
        postings1b, postings2b, 30).serialize());

    merged.setCollectionDocumentCount(30);
    mergedReader = merged.getPostingsReader();

    assertEquals(5, merged.getNumberOfPostings());
    mergedReader.nextPosting(p);
    assertEquals(2, p.getDocno());
    assertEquals(3, p.getTf());

    mergedReader.nextPosting(p);
    assertEquals(11, p.getDocno());
    assertEquals(4, p.getTf());

    mergedReader.nextPosting(p);
    assertEquals(13, p.getDocno());
    assertEquals(5, p.getTf());

    mergedReader.nextPosting(p);
    assertEquals(19, p.getDocno());
    assertEquals(1, p.getTf());

    mergedReader.nextPosting(p);
    assertEquals(25, p.getDocno());
    assertEquals(2, p.getTf());

    merged = PostingsListDocSortedPositionalPForDelta.create(PostingsListDocSortedPositionalPForDelta.merge(
        postings2b, postings1b, 30).serialize());

    merged.setCollectionDocumentCount(30);
    mergedReader = merged.getPostingsReader();

    assertEquals(5, merged.getNumberOfPostings());
    mergedReader.nextPosting(p);
    assertEquals(2, p.getDocno());
    assertEquals(3, p.getTf());

    mergedReader.nextPosting(p);
    assertEquals(11, p.getDocno());
    assertEquals(4, p.getTf());

    mergedReader.nextPosting(p);
    assertEquals(13, p.getDocno());
    assertEquals(5, p.getTf());

    mergedReader.nextPosting(p);
    assertEquals(19, p.getDocno());
    assertEquals(1, p.getTf());

    mergedReader.nextPosting(p);
    assertEquals(25, p.getDocno());
    assertEquals(2, p.getTf());
  }
View Full Code Here

TOP

Related Classes of ivory.core.data.index.Posting

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.