Package ivory.core.data.index

Examples of ivory.core.data.index.Posting


  private static void testTerm(RetrievalEnvironment env, String term) {
    long startTime = System.currentTimeMillis();

    PostingsReader reader = null;
    Posting p = new Posting();
    int df = 0;
    String termOrig = term;
    String termTokenized = env.tokenize(termOrig)[0];

    LOG.info("term=" + termOrig + ", tokenized=" + termTokenized);
View Full Code Here


    SpamPercentileScore spamScores = new SpamPercentileScore();
    spamScores.initialize(spamScoresPath, fs);
    int[] newDocids = DocumentUtility.spamSortDocids(spamScores);

    int collectionSize = env.readCollectionTermCount();
    Posting posting = new Posting();
    FSDataOutputStream out;

    BloomConfig bloomConfig =  new BloomConfig((int) env.getDocumentCount(),
                                               collectionSize, nbHash, bitsPerElement);
    //Deletes the output path if it already exists.
    fs.delete(new Path(outputPath), true);

    //Serialize and write the configuration parameters.
    out = fs.create(new Path(outputPath + "/" + BloomConfig.CONFIG_FILE));
    bloomConfig.write(out);
    out.close();

    for(int i = 0; i <= collectionSize; i++) {
      if(i % 100000 == 0) {
        if(i != 0) {
          out.close();
        }
        out = fs.create(new Path(outputPath + "/" + i));
      }

      try {
        PostingsList pl = env.getPostingsList(env.getTermFromId(i));
        PostingsReader reader = pl.getPostingsReader();
        Signature filter = null;

        //Decide which filter to use based on the configuration parameters
        int df = pl.getDf();
        if (df <= bloomConfig.getIdentityHashThreshold()) {
          filter = new BloomFilterHash(df * bloomConfig.getBitsPerElement(),
                                       bloomConfig.getHashCount());
        } else {
          filter = new BloomFilterIdentityHash(bloomConfig.getDocumentCount());
        }

        while (reader.nextPosting(posting)) {
          filter.add(newDocids[posting.getDocno()]);
        }

        out.writeInt(i);
        out.writeInt(df);
        filter.write(out);
View Full Code Here

    SpamPercentileScore spamScores = new SpamPercentileScore();
    spamScores.initialize(spamScoresPath, fs);
    int[] newDocids = DocumentUtility.spamSortDocids(spamScores);

    int collectionSize = env.readCollectionTermCount();
    Posting posting = new Posting();
    FSDataOutputStream out;

    out = fs.create(new Path(outputPath + "/" + CompressedPostingsIO.LENGTH_FILE));
    out.writeInt(collectionSize);
    out.close();

    for(int i = 0; i <= collectionSize; i++) {
      if(i % 100000 == 0) {
        if(i != 0) {
          out.close();
        }
        out = fs.create(new Path(outputPath + "/" + i));
      }

      if(i % 1000 == 0) {
        LOGGER.info(i + " posting lists prepared...");
      }

      try {
        PostingsList pl = env.getPostingsList(env.getTermFromId(i));
        PostingsReader reader = pl.getPostingsReader();

        int[] data = new int[pl.getDf()];
        int index = 0;
        while (reader.nextPosting(posting)) {
          data[index++] = newDocids[posting.getDocno()];
        }
        Arrays.sort(data);
        CompressedPostings compPostings = CompressedPostings.newInstance(data);

        out.writeInt(i);
View Full Code Here

TOP

Related Classes of ivory.core.data.index.Posting

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.