Package com.browseengine.bobo.sort

Source Code of com.browseengine.bobo.sort.SortCollectorImpl

package com.browseengine.bobo.sort;

import it.unimi.dsi.fastutil.ints.Int2ObjectOpenHashMap;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Set;

import org.apache.log4j.Logger;

import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.TermFreqVector;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.SortField;

import com.browseengine.bobo.api.BoboIndexReader;
import com.browseengine.bobo.api.Browsable;
import com.browseengine.bobo.api.BrowseFacet;
import com.browseengine.bobo.api.BrowseHit;
import com.browseengine.bobo.api.BrowseHit.TermFrequencyVector;
import com.browseengine.bobo.api.FacetAccessible;
import com.browseengine.bobo.api.FacetSpec;
import com.browseengine.bobo.facets.data.FacetDataCache;
import com.browseengine.bobo.facets.data.PrimitiveLongArrayWrapper;
import com.browseengine.bobo.facets.impl.GroupByFacetCountCollector;
import com.browseengine.bobo.facets.impl.SimpleFacetHandler;
import com.browseengine.bobo.facets.CombinedFacetAccessible;
import com.browseengine.bobo.facets.FacetCountCollector;
import com.browseengine.bobo.facets.FacetHandler;
import com.browseengine.bobo.util.ListMerger;


public class SortCollectorImpl extends SortCollector {
  private static final Comparator<MyScoreDoc> MERGE_COMPATATOR = new Comparator<MyScoreDoc>()
  {
    public int compare(MyScoreDoc o1, MyScoreDoc o2) {
      Comparable s1 = o1.getValue();
      Comparable s2 = o2.getValue();
      int r;
      if (s1 == null) {
        if (s2 == null) {
          r = 0;
        } else {
          r = -1;
        }
      } else if (s2 == null) {
        r = 1;
      }
      else{
        int v = s1.compareTo(s2);
        if (v==0){
          r = o1.doc + o1.queue.base - o2.doc - o2.queue.base;
        } else {
          r = v;
        }
      }
      return r;
    }
  };

  private final LinkedList<DocIDPriorityQueue> _pqList;
  private final int _numHits;
  private int _totalHits;
  private int _totalGroups;
  private ScoreDoc _bottom;
  private ScoreDoc _tmpScoreDoc;
  private boolean _queueFull;
  private DocComparator _currentComparator;
  private DocComparatorSource _compSource;
  private DocIDPriorityQueue _currentQueue;
  private BoboIndexReader _currentReader=null;
  private FacetCountCollector _facetCountCollector;

  private final boolean _doScoring;
  private Scorer _scorer;
  private final int _offset;
  private final int _count;

  private final Browsable _boboBrowser;
  private final boolean _collectDocIdCache;
  private CombinedFacetAccessible _groupAccessible;
  private final List<FacetAccessible> _facetAccessibles;
  private final Int2ObjectOpenHashMap<ScoreDoc> _currentValueDocMaps;
  static class MyScoreDoc extends ScoreDoc {
    private static final long serialVersionUID = 1L;

    DocIDPriorityQueue queue;
    BoboIndexReader reader;
    Comparable sortValue;
   
    public MyScoreDoc(){
      this(0,0.0f,null,null);
    }

    public MyScoreDoc(int docid, float score, DocIDPriorityQueue queue,BoboIndexReader reader) {
      super(docid, score);
      this.queue = queue;
      this.reader = reader;
      this.sortValue = null;
    }

    Comparable getValue(){
      if(sortValue == null)
        sortValue = queue.sortValue(this);
      return sortValue;
    }
  }

  private CollectorContext _currentContext;
  private int[] _currentDocIdArray;
  private float[] _currentScoreArray;
  private int _docIdArrayCursor = 0;
  private int _docIdCacheCapacity = 0;
  private Set<String> _termVectorsToFetch;


  public SortCollectorImpl(DocComparatorSource compSource,
                           SortField[] sortFields,
                           Browsable boboBrowser,
                           int offset,
                           int count,
                           boolean doScoring,
                           boolean fetchStoredFields,
                           Set<String> termVectorsToFetch,
                           String groupBy,
                           int maxPerGroup,
                           boolean collectDocIdCache) {
    super(sortFields, fetchStoredFields);
    assert (offset>=0 && count>=0);
    _boboBrowser = boboBrowser;
    _compSource = compSource;
    _pqList = new LinkedList<DocIDPriorityQueue>();
    _numHits = offset + count;
    _offset = offset;
    _count = count;
    _totalHits = 0;
    _totalGroups = 0;
    _queueFull = false;
    _doScoring = doScoring;
    _tmpScoreDoc = new MyScoreDoc();
    _termVectorsToFetch = termVectorsToFetch;
    _collectDocIdCache = collectDocIdCache; // TODO: take maxPerGroup into consideration.
    if (groupBy != null) {
      this.groupBy = boboBrowser.getFacetHandler(groupBy);
      if (groupBy != null && _count > 0) {
        _currentValueDocMaps = new Int2ObjectOpenHashMap<ScoreDoc>(_count);
        _facetAccessibles = new LinkedList<FacetAccessible>();
        if (collectDocIdCache) {
          contextList = new LinkedList<CollectorContext>();
          docidarraylist = new LinkedList<int[]>();
          if (doScoring)
            scorearraylist = new LinkedList<float[]>();
        }
      }
      else {
        _currentValueDocMaps = null;
        _facetAccessibles = null;
      }
    }
    else {
      this.groupBy = null;
      _currentValueDocMaps = null;
      _facetAccessibles = null;
    }
  }

  @Override
  public boolean acceptsDocsOutOfOrder() {
    return _collector == null ? true : _collector.acceptsDocsOutOfOrder();
  }

  @Override
  public void collect(int doc) throws IOException {
    ++_totalHits;

    if (groupBy != null) {
      if (_facetCountCollector != null)
        _facetCountCollector.collect(doc);
      //Object val = groupBy.getRawFieldValues(_currentReader, doc)[0];

      if (_count > 0) {
        final float score = (_doScoring ? _scorer.score() : 0.0f);

        if (_collectDocIdCache) {
          if (_totalHits > _docIdCacheCapacity) {
            _currentDocIdArray = intarraymgr.get(BLOCK_SIZE);
            docidarraylist.add(_currentDocIdArray);
            if (_doScoring) {
              _currentScoreArray = floatarraymgr.get(BLOCK_SIZE);
              scorearraylist.add(_currentScoreArray);
            }
            _docIdCacheCapacity += BLOCK_SIZE;
            _docIdArrayCursor = 0;
          }
          _currentDocIdArray[_docIdArrayCursor] = doc;
          if (_doScoring)
            _currentScoreArray[_docIdArrayCursor] = score;
          ++_docIdArrayCursor;
          ++_currentContext.length;
        }

        _tmpScoreDoc.doc = doc;
        _tmpScoreDoc.score = score;
        if (!_queueFull || _currentComparator.compare(_bottom,_tmpScoreDoc) > 0) {
          final int order = ((FacetDataCache)groupBy.getFacetData(_currentReader)).orderArray.get(doc);
          ScoreDoc pre = _currentValueDocMaps.get(order);
          if (pre != null) {
            if ( _currentComparator.compare(pre, _tmpScoreDoc) > 0) {
              ScoreDoc tmp = pre;
              _bottom = _currentQueue.replace(_tmpScoreDoc, pre);
              _currentValueDocMaps.put(order, _tmpScoreDoc);
              _tmpScoreDoc = tmp;
            }
          }
          else {
            if (_queueFull){
              MyScoreDoc tmp = (MyScoreDoc)_bottom;
              _currentValueDocMaps.remove(((FacetDataCache)groupBy.getFacetData(tmp.reader)).orderArray.get(tmp.doc));
              _bottom = _currentQueue.replace(_tmpScoreDoc);
              _currentValueDocMaps.put(order, _tmpScoreDoc);
              _tmpScoreDoc = tmp;
            }
            else{
              ScoreDoc tmp = new MyScoreDoc(doc,score,_currentQueue,_currentReader);
              _bottom = _currentQueue.add(tmp);
              _currentValueDocMaps.put(order, tmp);
              _queueFull = (_currentQueue.size >= _numHits);
            }
          }
        }
      }
    }
    else {
      if (_count > 0) {
        final float score = (_doScoring ? _scorer.score() : 0.0f);

        if (_queueFull){
          _tmpScoreDoc.doc = doc;
          _tmpScoreDoc.score = score;
   
          if (_currentComparator.compare(_bottom,_tmpScoreDoc) > 0){
            ScoreDoc tmp = _bottom;
            _bottom = _currentQueue.replace(_tmpScoreDoc);
            _tmpScoreDoc = tmp;
          }
        }
        else{
          _bottom = _currentQueue.add(new MyScoreDoc(doc,score,_currentQueue,_currentReader));
          _queueFull = (_currentQueue.size >= _numHits);
        }
      }
    }

    if (_collector != null) _collector.collect(doc);
  }

  private void collectTotalGroups() {
    if (_facetCountCollector instanceof GroupByFacetCountCollector) {
      _totalGroups += ((GroupByFacetCountCollector)_facetCountCollector).getTotalGroups();
      return;
    }

    int[] count = _facetCountCollector.getCountDistribution();
    for (int c : count) {
      if (c > 0)
        ++_totalGroups;
    }
  }

  @Override
  public void setNextReader(IndexReader reader, int docBase) throws IOException {
    assert reader instanceof BoboIndexReader;
    _currentReader = (BoboIndexReader)reader;
    _currentComparator = _compSource.getComparator(reader,docBase);
    _currentQueue = new DocIDPriorityQueue(_currentComparator, _numHits, docBase);
    if (groupBy != null) {
      if (_facetCountCollector != null)
        collectTotalGroups();
      _facetCountCollector = ((SimpleFacetHandler)groupBy).getFacetCountCollectorSource(null, null, true).getFacetCountCollector(_currentReader, docBase);
      if (_facetAccessibles != null)
        _facetAccessibles.add(_facetCountCollector);
      if (_currentValueDocMaps != null)
        _currentValueDocMaps.clear();

      if (contextList != null) {
        _currentContext = new CollectorContext(_currentReader, docBase, _currentComparator);
        contextList.add(_currentContext);
      }
    }
    MyScoreDoc myScoreDoc = (MyScoreDoc)_tmpScoreDoc;
    myScoreDoc.queue = _currentQueue;
    myScoreDoc.reader = _currentReader;
    myScoreDoc.sortValue = null;
    _pqList.add(_currentQueue);
    _queueFull = false;
  }

  @Override
  public void setScorer(Scorer scorer) throws IOException {
    _scorer = scorer;
    _currentComparator.setScorer(scorer);
  }

  @Override
  public int getTotalHits(){
    return _totalHits;
  }

  @Override
  public int getTotalGroups(){
    return _totalGroups;
  }

  @Override
  public CombinedFacetAccessible getGroupAccessible(){
    return _groupAccessible;
  }

  @Override
  public BrowseHit[] topDocs() throws IOException{
    ArrayList<Iterator<MyScoreDoc>> iterList = new ArrayList<Iterator<MyScoreDoc>>(_pqList.size());
    for (DocIDPriorityQueue pq : _pqList){
      int count = pq.size();
      MyScoreDoc[] resList = new MyScoreDoc[count];
      for (int i = count - 1; i >= 0; i--) {
        resList[i] = (MyScoreDoc)pq.pop();
      }
      iterList.add(Arrays.asList(resList).iterator());
    }

    List<MyScoreDoc> resList;
    if (_count > 0) {
      if (groupBy == null) {
        resList = ListMerger.mergeLists(_offset, _count, iterList, MERGE_COMPATATOR);
      }
      else {
        int rawGroupValueType = 0// 0: unknown, 1: normal, 2: long[]

        PrimitiveLongArrayWrapper primitiveLongArrayWrapperTmp = new PrimitiveLongArrayWrapper(null);

        Object rawGroupValue = null;

        if (_facetCountCollector != null)
        {
          collectTotalGroups();
          _facetCountCollector = null;
        }
        _groupAccessible = new CombinedFacetAccessible(new FacetSpec(), _facetAccessibles);
        resList = new ArrayList<MyScoreDoc>(_count);
        Iterator<MyScoreDoc> mergedIter = ListMerger.mergeLists(iterList, MERGE_COMPATATOR);
        Set<Object> groupSet = new HashSet<Object>(_offset+_count);
        int offsetLeft = _offset;
        while(mergedIter.hasNext())
        {
          MyScoreDoc scoreDoc = mergedIter.next();
          Object[] vals = groupBy.getRawFieldValues(scoreDoc.reader, scoreDoc.doc);
          rawGroupValue = null;
          if (vals != null && vals.length > 0)
            rawGroupValue = vals[0];

          if (rawGroupValueType == 0) {
            if (rawGroupValue != null)
            {
              if (rawGroupValue instanceof long[])
                rawGroupValueType = 2;
              else
                rawGroupValueType = 1;
            }
          }
          if (rawGroupValueType == 2)
          {
            primitiveLongArrayWrapperTmp.data = (long[])rawGroupValue;
            rawGroupValue = primitiveLongArrayWrapperTmp;
          }

          if (!groupSet.contains(rawGroupValue))
          {
            if (offsetLeft > 0)
              --offsetLeft;
            else
            {
              resList.add(scoreDoc);
              if (resList.size() >= _count)
                break;
            }
            groupSet.add(new PrimitiveLongArrayWrapper(primitiveLongArrayWrapperTmp.data));
          }
        }
      }
    }
    else
      resList = Collections.EMPTY_LIST;

    Map<String,FacetHandler<?>> facetHandlerMap = _boboBrowser.getFacetHandlerMap();
    return buildHits(resList.toArray(new MyScoreDoc[resList.size()]), _sortFields, facetHandlerMap, _fetchStoredFields, _termVectorsToFetch,groupBy, _groupAccessible);
  }

  protected static BrowseHit[] buildHits(MyScoreDoc[] scoreDocs,SortField[] sortFields,Map<String,FacetHandler<?>> facetHandlerMap,boolean fetchStoredFields, Set<String> termVectorsToFetch, FacetHandler<?> groupBy, CombinedFacetAccessible groupAccessible)
  throws IOException
  {
    BrowseHit[] hits = new BrowseHit[scoreDocs.length];
    Collection<FacetHandler<?>> facetHandlers= facetHandlerMap.values();
    for (int i =scoreDocs.length-1; i >=0 ; i--)
    {
      MyScoreDoc fdoc = scoreDocs[i];
      BoboIndexReader reader = fdoc.reader;
      BrowseHit hit=new BrowseHit();
      if (fetchStoredFields)
      {
        hit.setStoredFields(reader.document(fdoc.doc));
      }
      if (termVectorsToFetch!=null && termVectorsToFetch.size()>0){
        Map<String,TermFrequencyVector> tvMap = new HashMap<String,TermFrequencyVector>();
        hit.setTermFreqMap(tvMap);
        for (String field : termVectorsToFetch){
          TermFreqVector tv = reader.getTermFreqVector(fdoc.doc, field);
          if (tv!=null){
            int[] freqs = tv.getTermFrequencies();
            String[] terms = tv.getTerms();
            tvMap.put(field, new TermFrequencyVector(terms, freqs));
          }
        }
      }
      Map<String,String[]> map = new HashMap<String,String[]>();
      Map<String,Object[]> rawMap = new HashMap<String,Object[]>();
      for (FacetHandler<?> facetHandler : facetHandlers)
      {
          map.put(facetHandler.getName(),facetHandler.getFieldValues(reader,fdoc.doc));
          rawMap.put(facetHandler.getName(),facetHandler.getRawFieldValues(reader,fdoc.doc));
      }
      hit.setFieldValues(map);
      hit.setRawFieldValues(rawMap);
      hit.setDocid(fdoc.doc+fdoc.queue.base);
      hit.setScore(fdoc.score);
      hit.setComparable(fdoc.getValue());
      if (groupBy != null) {
        hit.setGroupValue(hit.getField(groupBy.getName()));
        hit.setRawGroupValue(hit.getRawField(groupBy.getName()));
        if (hit.getGroupValue() != null && groupAccessible != null) {
          BrowseFacet facet = groupAccessible.getFacet(hit.getGroupValue());
          hit.setGroupHitsCount(facet.getFacetValueHitCount());
        }
      }
      hits[i] = hit;
    }
    return hits;
  }
}
TOP

Related Classes of com.browseengine.bobo.sort.SortCollectorImpl

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.