Package org.apache.solr.request.mdrill

Source Code of org.apache.solr.request.mdrill.MdrillGroupBy$EmptyPrecontains

package org.apache.solr.request.mdrill;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashSet;
import java.util.PriorityQueue;
import java.util.Map.Entry;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.LinkedBlockingQueue;

import org.apache.log4j.Logger;
import org.apache.lucene.index.SegmentReader;
import org.apache.lucene.queryParser.ParseException;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.search.DocIterator;
import org.apache.solr.search.DocSet;
import org.apache.solr.search.SolrIndexSearcher;

import com.alimama.mdrill.utils.EncodeUtils;
import com.alimama.mdrill.utils.UniqConfig;

import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.request.compare.ColumnKey;
import org.apache.solr.request.compare.GroupbyRow;
import org.apache.solr.request.compare.MergerGroupByGroupbyRowCompare;
import org.apache.solr.request.compare.RecordCount;
import org.apache.solr.request.compare.ShardGroupByTermNum;
import org.apache.solr.request.join.HigoJoinInvert;
import org.apache.solr.request.mdrill.MdrillUtils.*;

/**
* 多列group by 分类 汇总的实现
* @author yannian.mu
*/
public class MdrillGroupBy {
    private static Logger LOG = Logger.getLogger(MdrillGroupBy.class);

  public static Integer MAX_CROSS_ROWS=UniqConfig.defaultCrossMaxLimit();
  private SolrIndexSearcher searcher;
  private SolrQueryRequest req;
  public int mergercount=0;

  private RecordCount recordCount ;
 

  private SegmentReader reader;
  private MdrillParseGroupby parse;
 
  private ShardGroupByTermNum smallestShardGroup=null;
  private MdrillParseGroupby.fetchContaioner container=null;

  public MdrillGroupBy(SolrIndexSearcher _searcher,SegmentReader reader,SolrParams _params,SolrQueryRequest req)
  {
    this.reader=reader;
    this.searcher=_searcher;
    this.req=req;
    this.parse=new MdrillParseGroupby(_params);
    this.recordCount = new RecordCount();
    this.recordCount.setFinalResult(false);
    this.recordCount.setMaxUniqSize(this.parse.maxlimit);
  }
   
  public NamedList get(String[] fields, DocSet baseDocs) throws IOException,
      ParseException {
    long t1=System.currentTimeMillis();
    this.container=this.parse.createContainer(fields, baseDocs, this.reader, this.searcher, this.req);
    long t2=System.currentTimeMillis();
    QuickHashMap<GroupListCache.GroupList,RefRow> groups=this.makeTopGroups(fields);
    long t3=System.currentTimeMillis();
    this.transGroupValue(groups);
    long t4=System.currentTimeMillis();
   
    NamedList rtn= this.toNameList()
    container.free(groups);
    long t5=System.currentTimeMillis();
    LOG.info("##FacetCross## time taken "+",total:"+(t5-t1)+",init:"+(t2-t1)+",makeGroups:"+(t3-t2)+",transGroupValue:"+(t4-t3)+",groups.size:"+groups.size());
    return rtn;
  }
 
 
  public QuickHashMap<GroupListCache.GroupList,RefRow> makeTopGroups(String[] fields) throws IOException
  {
    GroupListCache.GroupList group = GroupListCache.GroupList.INSTANCE(container.groupListCache, container.groupbySize);
    QuickHashMap<GroupListCache.GroupList,RefRow> groups=new QuickHashMap<GroupListCache.GroupList,RefRow>(this.parse.limit_offset_maxgroups+1);
   
   
    boolean issetDist=this.parse.isMustSetDistResult();
    if(container.groupNonEmptySize==0)
    {
      group.reset();
      RefRow cnt = this.makeOrGetGroup(groups, group);
     
      if(container.countOnly())
      {
        cnt.val+=container.baseDocs.size();
      }else{
        DocIterator iter = container.baseDocs.iterator();
        if(container.noDist()){
          while (iter.hasNext()) {
            int doc = iter.nextDoc();
            cnt.val++;
            container.updateStat(cnt, doc);
          }
        }else if(container.noStat()){
          while (iter.hasNext()) {
            int doc = iter.nextDoc();
            cnt.val++;
            if(issetDist)
            {
              container.updateDist(cnt, doc);
            }
          }
        }else{
          while (iter.hasNext()) {
            int doc = iter.nextDoc();
            cnt.val++;
            container.updateStat(cnt, doc);
            if(issetDist)
            {
            container.updateDist(cnt, doc);
            }
          }
        }
      }
    }else{
      DocIterator iter = container.baseDocs.iterator();

      if (container.countOnly())
      {
        while (iter.hasNext()) {
          int doc = iter.nextDoc();
          if (container.toGroupsByJoin(doc, group)&&container.pre.contains(group)) {
            RefRow cnt = this.makeOrGetGroup(groups, group);
            cnt.val++;
            this.delayPut(groups, cnt,group);
          }
       
        }

      } else if (container.noDist()) {
        while (iter.hasNext()) {
          int doc = iter.nextDoc();

          if (container.toGroupsByJoin(doc, group)&&container.pre.contains(group)) {
            RefRow cnt = this.makeOrGetGroup(groups, group);
            cnt.val++;
            container.updateStat(cnt, doc);
            this.delayPut(groups, cnt, group);
          }
       
        }

      } else if (container.noStat()) {
        while (iter.hasNext()) {
          int doc = iter.nextDoc();

          if (container.toGroupsByJoin(doc, group)&&container.pre.contains(group)) {
            RefRow cnt = this.makeOrGetGroup(groups, group);
            cnt.val++;
            if(issetDist)
            {
              container.updateDist(cnt, doc);
            }
            this.delayPut(groups, cnt, group);
          }
       
        }
      } else {
        while (iter.hasNext()) {
          int doc = iter.nextDoc();

          if (container.toGroupsByJoin(doc, group)&&container.pre.contains(group)) {
            RefRow cnt = this.makeOrGetGroup(groups, group);
            cnt.val++;
            container.updateStat(cnt, doc);
            if(issetDist)
            {
              container.updateDist(cnt, doc);
            }
            this.delayPut(groups, cnt, group);
          }
       
        }
      }
    }
   
    TopMaps(groups);
    return groups;
  }
 
   
  private void TopMaps(QuickHashMap<GroupListCache.GroupList,RefRow> groups)
  {
    long t1=System.currentTimeMillis();
    int groupsize=groups.size();
    if(groupsize<=this.parse.limit_offset)
    {
      return ;
    }
    PriorityQueue<ShardGroupByTermNum> res = new PriorityQueue<ShardGroupByTermNum>(this.parse.limit_offset, Collections.reverseOrder(this.container.cmpTermNum));
    LinkedBlockingQueue<GroupListCache.GroupList> toremove=new LinkedBlockingQueue<GroupListCache.GroupList>();;
    QuickHashMap<GroupListCache.GroupList,RefRow> debug=new QuickHashMap<GroupListCache.GroupList, MdrillUtils.RefRow>(this.parse.limit_offset);

    for(Entry<GroupListCache.GroupList,RefRow> e:groups.entrySet())
    {
      debug.put(e.getKey(), e.getValue());
      ShardGroupByTermNum mrow=new ShardGroupByTermNum(e.getKey(), e.getValue());
      if (res.size() < this.parse.limit_offset) {
        res.add(mrow);
      } else if (this.container.cmpTermNum.compare(res.peek(), mrow) > 0) {
        res.add(mrow);
        ShardGroupByTermNum free=res.poll();
        toremove.add(free.key);
      }else{
        toremove.add(mrow.key);
      }
    }
    int cnt1=0;

    for(GroupListCache.GroupList torm:toremove)
    {
      groups.remove(torm);
      this.container.freeRow(torm);
      this.container.groupListCache.add(torm);
      cnt1++;
    }
   
    smallestShardGroup=res.peek();
   
    long t2=System.currentTimeMillis();
    LOG.info("TopMaps groups.size="+groupsize+"@"+debug.size() +" to "+groups.size()+"@"+this.parse.limit_offset+",res.size="+res.size()+",remove="+cnt1+",timetaken="+(t2-t1)+",mergercount="+this.mergercount);
  }

  private NamedList toNameList() {
    java.util.ArrayList<GroupbyRow> recommendations = new ArrayList<GroupbyRow>(this.container.res.size());
    recommendations.addAll(this.container.res);
    Collections.sort(recommendations, this.container.cmpString);

    Integer index = 0;
    NamedList res = new NamedList();
    res.add("count", recordCount.toNamedList());
   
    ConcurrentHashMap<Long,String> cache=null;

    boolean issetCrc=this.parse.crcOutputSet!=null;
    MergerGroupByGroupbyRowCompare mergerCmp=null;
    if(issetCrc)
    {
      synchronized (MdrillUtils.CRC_CACHE_SIZE) {
        cache=MdrillUtils.CRC_CACHE_SIZE.get(this.parse.crcOutputSet);
        if(cache==null)
        {
          cache=new ConcurrentHashMap<Long,String>();
          MdrillUtils.CRC_CACHE_SIZE.put(this.parse.crcOutputSet, cache);

        }
      }
     
      FacetComponent.FieldFacet facet=new FacetComponent.FieldFacet(this.parse.params, "solrCorssFields_s");
      mergerCmp=facet.createMergerGroupCmp();
    }
   
    ArrayList<Object> list=new ArrayList<Object>();
   
    for (GroupbyRow kv : recommendations) {
      if (index >= this.parse.offset) {
        if(issetCrc)
        {
          kv.ToCrcSet(mergerCmp,cache);
        }
        list.add(kv.toNamedList());
      }
      index++;
    }
    res.add("list", list);
    return res;
  }
     

  private void setCrossRow(RefRow ref,String groupname) throws ParseException, IOException
    {
      this.recordCount.setCrcRecord(groupname);
      GroupbyRow row = new GroupbyRow(new ColumnKey(groupname), ref.val);
      row.setCross(this.parse.crossFs, this.parse.distFS);
      if(this.parse.hasStat())
    {
        for(int i=0;i<this.parse.crossFs.length;i++)
        {
          RefRowStat s=ref.stat[i];
          if(s.issetup)
          {
            row.addStat(i, 1, s.sum);
            row.addStat(i, 2, s.max);
            row.addStat(i, 3, s.min);
            row.addStat(i, 4, (double)s.cnt);
          }else{
            row.addStat(i, 1, 0d);
            row.addStat(i, 2, 0d);
            row.addStat(i, 3, 0d);
            row.addStat(i, 4, 0d);
          }
        }
      }
     
      if(this.parse.hasDist())
      {
        for(int i=0;i<this.parse.distFS.length;i++)
        {
          row.setDistinct(i, ref.dist[i]);
        }
      }
     
      QueuePutUtils.put2Queue(row, this.container.res, this.parse.limit_offset, this.container.cmpString);
    }
   

 
  public static interface Iprecontains{
    public boolean contains(GroupListCache.GroupList g);
  }
  public static class PreContains implements Iprecontains{
    HashSet<GroupListCache.GroupList> preSet;

    public PreContains(HashSet<GroupListCache.GroupList> preSet) {
      this.preSet = preSet;
    }
   
    public boolean contains(GroupListCache.GroupList g)
    {
      return this.preSet.contains(g);
    }
  }
  public static class EmptyPrecontains implements Iprecontains{
   
    public boolean contains(GroupListCache.GroupList g)
    {
      return true;
    }
  }
 
 
  public void transGroupValue(QuickHashMap<GroupListCache.GroupList,RefRow> groups) throws ParseException, IOException
  {
    TermNumToString[] tm= this.container.prefetch(groups);
     for(Entry<GroupListCache.GroupList,RefRow> e:groups.entrySet())
     {
       int[] group=e.getKey().list;
       StringBuffer buff=new StringBuffer();
       String j="";
       for(int i=0;i<container.ufs.length;i++)
       {
         Integer termNum=group[i];
         buff.append(j);
         if(container.ufs.cols[i]!=null)
         {
           buff.append(EncodeUtils.encode(tm[i].getTermValue(termNum)));
         }else{
           buff.append("-");
         }
         j=UniqConfig.GroupJoinString();
       }
      
       int joinoffset=container.ufs.length;
      for(HigoJoinInvert inv: this.container.joinInvert)
      {
        int fc=inv.fieldCount();
        for(int i=0;i<fc;i++)
        {
           buff.append(j);
           buff.append(EncodeUtils.encode(inv.getTermNumValue(group[joinoffset+i], i)));
           j=UniqConfig.GroupJoinString();
        }
        joinoffset+=inv.fieldCount();
      }
      
       String groupname=buff.toString();
       this.setCrossRow(e.getValue(), groupname);
     }
  }
 
  private void delayPut(QuickHashMap<GroupListCache.GroupList,RefRow> groups,RefRow cnt,GroupListCache.GroupList group)
  {
    if(cnt.delayPut)
    {
      if( this.container.cmpTermNum.compare(smallestShardGroup,new ShardGroupByTermNum(group, cnt))>0){
        cnt.delayPut=false;
        groups.put(group.copy( this.container.groupListCache), cnt);

      }else{
        this.container.freeRow(group);
      }
    }
  }

  private RefRow makeOrGetGroup(QuickHashMap<GroupListCache.GroupList, RefRow> groups,GroupListCache.GroupList group) {
    RefRow cnt = groups.get(group);
    if (cnt == null) {
      if (groups.size() >= this.parse.limit_offset_maxgroups) {
        mergercount++;
        if (mergercount >= this.parse.limit_offset_maxgroups_merger) {
          return this.container.getEmptyRow();
        }
        this.recordCount.setCrcRecord("-");
        this.recordCount.setIsoversize(true);
        TopMaps(groups);
      }

      cnt = this.container.createRow(group);
      if (smallestShardGroup == null) {
        groups.put(group.copy(this.container.groupListCache), cnt);
      } else {
        cnt.delayPut = true;
      }
    }
    return cnt;
  }
 
}
TOP

Related Classes of org.apache.solr.request.mdrill.MdrillGroupBy$EmptyPrecontains

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.