Package ivory.core.data.index

Examples of ivory.core.data.index.PostingsList


        PostingsListDocSortedPositional.mergeList(merged, lists, collectionDocumentCount);
        lists.clear();
        mergedList.add(PostingsListDocSortedPositional.create(merged.serialize()));
        context.getCounter(Reduce.Merges).increment(1);
      } else {
        PostingsList pl = lists.remove(0);
        pl.setCollectionDocumentCount(collectionDocumentCount);
        mergedList.add(pl);
      }

      context.getCounter(ReduceTime.Merging).increment(System.currentTimeMillis() - startTime);
      return true;
View Full Code Here


    for(int qid: queries.keySet()) {
      for(int termid: queries.get(qid)) {
        if(!termidHistory.contains(termid)) {
          termidHistory.add(termid);
          PostingsList pl = env.getPostingsList(env.getTermFromId(termid));
          PostingsReader reader = pl.getPostingsReader();

          positions.clear();
          positionsMap.clear();
          int[] data = new int[pl.getDf()];
          int index = 0;
          while (reader.nextPosting(posting)) {
            data[index] = newDocids[posting.getDocno()];
            positionsMap.put(data[index], new TermPositions(reader.getPositions(), reader.getTf()));
            docLengths.put(data[index], env.getDocumentLength(posting.getDocno()));
            index++;
          }
          Arrays.sort(data);

          for(int i = 0; i < data.length; i++) {
            positions.add(positionsMap.get(data[i]));
          }

          output.writeInt(termid);
          output.writeInt(pl.getDf());
          CompressedPositionalPostings.newInstance(data, positions).write(output);
        }
      }
      LOGGER.info("Compressed query " + qid);
    }
View Full Code Here

      // Get df and cf information for the concept.
      PostingsReader reader = env.getPostingsReader(new Expression(concept));
      if (reader == null) {
        continue;
      }
      PostingsList list = reader.getPostingsList();
      int df = list.getDf();
      long cf = list.getCf();
      env.clearPostingsReaderCache();

      // Construct concept evidence.
      termEvidence.set(df, cf);
View Full Code Here

    if (mPostingsReaderCache != null) {
      reader = mPostingsReaderCache.get(expression);
    }

    if (reader == null) {
      PostingsList list = getPostingsList(expression);
      if (list == null) {
        return null;
      }

      reader = (PostingsReader) list.getPostingsReader();

      if (mPostingsReaderCache != null) {
        mPostingsReaderCache.put(expression, reader);
      }
    }
View Full Code Here

    terms = termOrPhrase.split("\\s+");
    if (terms.length > 1) {
      operator = "phrase";
      List<PostingsReader> prs = new ArrayList<PostingsReader>();
      for (String term : terms) {
        PostingsList pl = env.getPostingsList(term);
        // if any of the tokens is OOV, then the phrase is considered OOV
        if (pl == null) {
          isOOV = true;
          endOfList = true;
          return;
        }
        prs.add(pl.getPostingsReader());
      }
      postingsReader = new ProximityPostingsReaderOrderedWindow(prs.toArray(new PostingsReader[0]),
          2);
      postingsReader.nextPosting(curPosting);
      gte = new GlobalTermEvidence(env.getDefaultDf(), env.getDefaultCf());
      this.ge = ge;
      lastScoredDocno = 0;
    } else {
      operator = "term";
      PostingsList pl = env.getPostingsList(termOrPhrase);
      if (pl == null) {
        isOOV = true;
        endOfList = true;
      } else {
        postingsReader = pl.getPostingsReader();
        gte = new GlobalTermEvidence(pl.getDf(), pl.getCf());
        this.ge = ge;
        lastScoredDocno = 0;
      }
    }
  }
View Full Code Here

      LOG.error("couldn't find term id (0) for term \"" + term + "\"");
      return null;
    }
    //LOG.info("termid: "+termid);

    PostingsList value;
    try {
      value = postingsIndex.getPostingsList(termid);

      if (value == null) {
        LOG.error("[1] couldn't find PostingsList for term \"" + term + "\"");
        return null;
      }
    } catch (IOException e) {
      LOG.error("[2] couldn't find PostingsList for term \"" + term + "\"");
      return null;
    }

    if (numDocsLocal != -1) {
      value.setCollectionDocumentCount(numDocsLocal);
    } else {
      value.setCollectionDocumentCount(numDocs);
    }

    return value;
  }
View Full Code Here

        }
        out = fs.create(new Path(outputPath + "/" + i));
      }

      try {
        PostingsList pl = env.getPostingsList(env.getTermFromId(i));
        PostingsReader reader = pl.getPostingsReader();
        Signature filter = null;

        //Decide which filter to use based on the configuration parameters
        int df = pl.getDf();
        if (df <= bloomConfig.getIdentityHashThreshold()) {
          filter = new BloomFilterHash(df * bloomConfig.getBitsPerElement(),
                                       bloomConfig.getHashCount());
        } else {
          filter = new BloomFilterIdentityHash(bloomConfig.getDocumentCount());
View Full Code Here

      if(i % 1000 == 0) {
        LOGGER.info(i + " posting lists prepared...");
      }

      try {
        PostingsList pl = env.getPostingsList(env.getTermFromId(i));
        PostingsReader reader = pl.getPostingsReader();

        int[] data = new int[pl.getDf()];
        int index = 0;
        while (reader.nextPosting(posting)) {
          data[index++] = newDocids[posting.getDocno()];
        }
        Arrays.sort(data);
        CompressedPostings compPostings = CompressedPostings.newInstance(data);

        out.writeInt(i);
        out.writeInt(pl.getDf());
        compPostings.write(out);
      } catch(Exception e) {
        continue;
      }
    }
View Full Code Here

        PostingsListDocSortedPositional.mergeList(merged, lists, collectionDocumentCount);
        lists.clear();
        mergedList.add(PostingsListDocSortedPositional.create(merged.serialize()));
        context.getCounter(Reduce.Merges).increment(1);
      } else {
        PostingsList pl = lists.remove(0);
        pl.setCollectionDocumentCount(collectionDocumentCount);
        mergedList.add(pl);
      }

      context.getCounter(ReduceTime.Merging).increment(System.currentTimeMillis() - startTime);
      return true;
View Full Code Here

    terms = termOrPhrase.split("\\s+");
    if (terms.length > 1) {
      operator = "phrase";
      List<PostingsReader> prs = new ArrayList<PostingsReader>();
      for (String term : terms) {
        PostingsList pl = env.getPostingsList(term);
//        LOG.info(term+"->"+pl.getDf());
        // if any of the tokens is OOV, then the phrase is considerd OOV
        if (pl == null) {
          isOOV = true;
          endOfList = true;
          return;
        }
        prs.add(pl.getPostingsReader())
      }
      postingsReader = new ProximityPostingsReaderOrderedWindow(prs.toArray(new PostingsReader[0]), 2);
      postingsReader.nextPosting(curPosting);
      //      ProximityPostingsReaderOrderedWindow postingsReader2 = new ProximityPostingsReaderOrderedWindow(prs, 2);
      //      while (postingsReader2.hasMorePostings()){
      //        if(postingsReader2.nextPosting(curPosting)){
      //          if(postingsReader2.getTf()>0){
      //            //LOG.info(termOrPhrase+ " docno-->"+postingsReader2.getDocno());
      //            //LOG.info(termOrPhrase+" tf-->"+postingsReader2.getTf());
      //          }
      //        }
      //      }
      gte = new GlobalTermEvidence(env.getDefaultDf(), env.getDefaultCf());
      this.ge = ge;
      lastScoredDocno = 0;
    } else {
      operator = "term";
      PostingsList pl = env.getPostingsList(termOrPhrase);
      if (pl == null) {
        isOOV = true;
        endOfList = true;
      } else {
        postingsReader = pl.getPostingsReader();
        gte = new GlobalTermEvidence(pl.getDf(), pl.getCf());
        this.ge = ge;
        lastScoredDocno = 0;
      }
    }
    // //LOG.info("leaf done.");
View Full Code Here

TOP

Related Classes of ivory.core.data.index.PostingsList

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.