Package com.browseengine.bobo.search

Source Code of com.browseengine.bobo.search.BoboSearcher

package com.browseengine.bobo.search;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Comparator;
import java.util.LinkedList;
import java.util.List;

import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.search.Collector;
import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.Filter;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.Weight;

import com.browseengine.bobo.api.BoboSegmentReader;
import com.browseengine.bobo.docidset.RandomAccessDocIdSet;
import com.browseengine.bobo.facets.FacetCountCollector;
import com.browseengine.bobo.facets.FacetCountCollectorSource;
import com.browseengine.bobo.mapred.BoboMapFunctionWrapper;

public class BoboSearcher {
  private List<FacetHitCollector> _facetCollectors;
  private final BoboSegmentReader _boboSegmentReader;
  private final AtomicReaderContext _atomicReaderContext;

  public BoboSearcher(AtomicReaderContext ctx) {
    _atomicReaderContext = ctx;
    _facetCollectors = new LinkedList<FacetHitCollector>();
    _boboSegmentReader = (BoboSegmentReader) ctx.reader();
  }

  public void setFacetHitCollectorList(List<FacetHitCollector> facetHitCollectors) {
    if (facetHitCollectors != null) {
      _facetCollectors = facetHitCollectors;
    }
  }

  abstract static class FacetValidator {
    protected final FacetHitCollector[] _collectors;
    protected final int _numPostFilters;
    protected FacetCountCollector[] _countCollectors;
    public int _nextTarget;

    private void sortPostCollectors(final BoboSegmentReader reader) {
      Comparator<FacetHitCollector> comparator = new Comparator<FacetHitCollector>() {
        @Override
        public int compare(FacetHitCollector fhc1, FacetHitCollector fhc2) {
          double selectivity1 = fhc1._filter.getFacetSelectivity(reader);
          double selectivity2 = fhc2._filter.getFacetSelectivity(reader);
          if (selectivity1 < selectivity2) {
            return -1;
          } else if (selectivity1 > selectivity2) {
            return 1;
          }
          return 0;
        }
      };

      Arrays.sort(_collectors, 0, _numPostFilters, comparator);
    }

    public FacetValidator(FacetHitCollector[] collectors, int numPostFilters) throws IOException {
      _collectors = collectors;
      _numPostFilters = numPostFilters;
      _countCollectors = new FacetCountCollector[collectors.length];
    }

    /**
     * This method validates the doc against any multi-select enabled fields.
     * @param docid
     * @return true if all fields matched
     */
    public abstract boolean validate(final int docid) throws IOException;

    public void setNextReader(BoboSegmentReader reader, int docBase) throws IOException {
      ArrayList<FacetCountCollector> collectorList = new ArrayList<FacetCountCollector>();
      sortPostCollectors(reader);
      for (int i = 0; i < _collectors.length; ++i) {
        _collectors[i].setNextReader(reader, docBase);
        FacetCountCollector collector = _collectors[i]._currentPointers.facetCountCollector;
        if (collector != null) {
          collectorList.add(collector);
        }
      }
      _countCollectors = collectorList.toArray(new FacetCountCollector[collectorList.size()]);
    }

    public FacetCountCollector[] getCountCollectors() {
      List<FacetCountCollector> collectors = new ArrayList<FacetCountCollector>();
      collectors.addAll(Arrays.asList(_countCollectors));
      for (FacetHitCollector facetHitCollector : _collectors) {
        collectors.addAll(facetHitCollector._collectAllCollectorList);
        collectors.addAll(facetHitCollector._countCollectorList);
      }
      return collectors.toArray(new FacetCountCollector[collectors.size()]);
    }
  }

  private final static class DefaultFacetValidator extends FacetValidator {

    public DefaultFacetValidator(FacetHitCollector[] collectors, int numPostFilters)
        throws IOException {
      super(collectors, numPostFilters);
    }

    /**
     * This method validates the doc against any multi-select enabled fields.
     * @param docid
     * @return true if all fields matched
     */
    @Override
    public final boolean validate(final int docid) throws IOException {
      FacetHitCollector.CurrentPointers miss = null;

      for (int i = 0; i < _numPostFilters; i++) {
        FacetHitCollector.CurrentPointers cur = _collectors[i]._currentPointers;
        int sid = cur.doc;

        if (sid < docid) {
          sid = cur.postDocIDSetIterator.advance(docid);
          cur.doc = sid;
          if (sid == DocIdSetIterator.NO_MORE_DOCS) {
            // move this to front so that the call can find the failure faster
            FacetHitCollector tmp = _collectors[0];
            _collectors[0] = _collectors[i];
            _collectors[i] = tmp;
          }
        }

        if (sid > docid) // mismatch
        {
          if (miss != null) {
            // failed because we already have a mismatch
            _nextTarget = (miss.doc < cur.doc ? miss.doc : cur.doc);
            return false;
          }
          miss = cur;
        }
      }

      _nextTarget = docid + 1;

      if (miss != null) {
        miss.facetCountCollector.collect(docid);
        return false;
      } else {
        for (FacetCountCollector collector : _countCollectors) {
          collector.collect(docid);
        }
        return true;
      }
    }
  }

  private final static class OnePostFilterFacetValidator extends FacetValidator {
    private final FacetHitCollector _firsttime;

    OnePostFilterFacetValidator(FacetHitCollector[] collectors) throws IOException {
      super(collectors, 1);
      _firsttime = _collectors[0];
    }

    @Override
    public final boolean validate(int docid) throws IOException {
      FacetHitCollector.CurrentPointers miss = null;

      RandomAccessDocIdSet set = _firsttime._currentPointers.docidSet;
      if (set != null && !set.get(docid)) {
        miss = _firsttime._currentPointers;
      }

      _nextTarget = docid + 1;

      if (miss != null) {
        miss.facetCountCollector.collect(docid);
        return false;
      } else {
        for (FacetCountCollector collector : _countCollectors) {
          collector.collect(docid);
        }
        return true;
      }
    }
  }

  private final static class NoNeedFacetValidator extends FacetValidator {
    NoNeedFacetValidator(FacetHitCollector[] collectors) throws IOException {
      super(collectors, 0);
    }

    @Override
    public final boolean validate(int docid) throws IOException {
      for (FacetCountCollector collector : _countCollectors) {
        collector.collect(docid);
      }
      return true;
    }

  }

  protected FacetValidator createFacetValidator() throws IOException {

    FacetHitCollector[] collectors = new FacetHitCollector[_facetCollectors.size()];
    FacetCountCollectorSource[] countCollectors = new FacetCountCollectorSource[collectors.length];
    int numPostFilters;
    int i = 0;
    int j = collectors.length;

    for (FacetHitCollector facetCollector : _facetCollectors) {
      if (facetCollector._filter != null) {
        collectors[i] = facetCollector;
        countCollectors[i] = facetCollector._facetCountCollectorSource;
        i++;
      } else {
        j--;
        collectors[j] = facetCollector;
        countCollectors[j] = facetCollector._facetCountCollectorSource;
      }
    }
    numPostFilters = i;

    if (numPostFilters == 0) {
      return new NoNeedFacetValidator(collectors);
    } else if (numPostFilters == 1) {
      return new OnePostFilterFacetValidator(collectors);
    } else {
      return new DefaultFacetValidator(collectors, numPostFilters);
    }
  }

  public void search(Weight weight, Filter filter, Collector collector, int start,
      BoboMapFunctionWrapper mapReduceWrapper) throws IOException {
    final FacetValidator validator = createFacetValidator();
    int target = 0;
    if (filter == null) {
      int docStart = start;
      collector.setNextReader(_atomicReaderContext);
      validator.setNextReader(_boboSegmentReader, docStart);
      Scorer scorer = weight.scorer(_atomicReaderContext, true, true,
        _boboSegmentReader.getLiveDocs());
      if (scorer != null) {
        collector.setScorer(scorer);
        target = scorer.nextDoc();
        while (target != DocIdSetIterator.NO_MORE_DOCS) {
          if (validator.validate(target)) {
            collector.collect(target);
            target = scorer.nextDoc();
          } else {
            target = validator._nextTarget;
            target = scorer.advance(target);
          }
        }
      }
      if (mapReduceWrapper != null) {
        mapReduceWrapper.mapFullIndexReader(_boboSegmentReader, validator.getCountCollectors());
      }
      return;
    }

    DocIdSet filterDocIdSet = filter.getDocIdSet(_atomicReaderContext,
      _boboSegmentReader.getLiveDocs());
    // shall we use return or continue here ??
    if (filterDocIdSet == null) {
      return;
    }
    int docStart = start;
    collector.setNextReader(_atomicReaderContext);
    validator.setNextReader(_boboSegmentReader, docStart);
    Scorer scorer = weight.scorer(_atomicReaderContext, true, false,
      _boboSegmentReader.getLiveDocs());
    if (scorer != null) {
      collector.setScorer(scorer);
      DocIdSetIterator filterDocIdIterator = filterDocIdSet.iterator(); // CHECKME: use
                                                                        // ConjunctionScorer here?

      if (filterDocIdIterator == null) {
        return;
      }

      int doc = -1;
      target = filterDocIdIterator.nextDoc();
      if (mapReduceWrapper == null) {
        while (target < DocIdSetIterator.NO_MORE_DOCS) {
          if (doc < target) {
            doc = scorer.advance(target);
          }

          if (doc == target) // permitted by filter
          {
            if (validator.validate(doc)) {
              collector.collect(doc);

              target = filterDocIdIterator.nextDoc();
            } else {
              // skip to the next possible docid
              target = filterDocIdIterator.advance(validator._nextTarget);
            }
          } else // doc > target
          {
            if (doc == DocIdSetIterator.NO_MORE_DOCS) break;
            target = filterDocIdIterator.advance(doc);
          }
        }
      } else {
        // MapReduce wrapper is not null
        while (target < DocIdSetIterator.NO_MORE_DOCS) {
          if (doc < target) {
            doc = scorer.advance(target);
          }

          if (doc == target) // permitted by filter
          {
            if (validator.validate(doc)) {
              mapReduceWrapper.mapSingleDocument(doc, _boboSegmentReader);
              collector.collect(doc);

              target = filterDocIdIterator.nextDoc();
            } else {
              // skip to the next possible docid
              target = filterDocIdIterator.advance(validator._nextTarget);
            }
          } else // doc > target
          {
            if (doc == DocIdSetIterator.NO_MORE_DOCS) break;
            target = filterDocIdIterator.advance(doc);
          }
        }
        mapReduceWrapper.finalizeSegment(_boboSegmentReader, validator.getCountCollectors());
      }
    }

  }
}
TOP

Related Classes of com.browseengine.bobo.search.BoboSearcher

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.