Source Code of org.apache.lucene.search.grouping.SearchGroupDocs

package org.apache.lucene.search.grouping;


/**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */


import java.io.IOException;
import java.util.Collection;
import java.util.HashMap;


import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.Collector;
import org.apache.lucene.search.FieldCache;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.TopDocsCollector;
import org.apache.lucene.search.TopFieldCollector;
import org.apache.lucene.search.TopScoreDocCollector;


/**
 * SecondPassGroupingCollector is the second of two passes
 * necessary to collect grouped docs.  This pass gathers the
 * top N documents per top group computed from the
 * first pass.
 *
 * <p>See {@link org.apache.lucene.search.grouping} for more
 * details including a full code example.</p>
 *
 * @lucene.experimental
 */
public class SecondPassGroupingCollector extends Collector {
  private final HashMap<String, SearchGroupDocs> groupMap;


  private FieldCache.StringIndex index;
  private final String groupField;
  private final int maxDocsPerGroup;
  private final SentinelIntSet ordSet;
  private final SearchGroupDocs[] groupDocs;
  private final Collection<SearchGroup> groups;
  private final Sort withinGroupSort;
  private final Sort groupSort;


  private int totalHitCount;
  private int totalGroupedHitCount;


  public SecondPassGroupingCollector(String groupField, Collection<SearchGroup> groups, Sort groupSort, Sort withinGroupSort,
                                     int maxDocsPerGroup, boolean getScores, boolean getMaxScores, boolean fillSortFields)
    throws IOException {


    //System.out.println("SP init");
    if (groups.size() == 0) {
      throw new IllegalArgumentException("no groups to collect (groups.size() is 0)");
    }


    this.groupSort = groupSort;
    this.withinGroupSort = withinGroupSort;
    this.groups = groups;
    this.groupField = groupField;
    this.maxDocsPerGroup = maxDocsPerGroup;


    groupMap = new HashMap<String, SearchGroupDocs>(groups.size());


    for (SearchGroup group : groups) {
      //System.out.println("  prep group=" + (group.groupValue == null ? "null" : group.groupValue.utf8ToString()));
      final TopDocsCollector collector;
      if (withinGroupSort == null) {
        // Sort by score
        collector = TopScoreDocCollector.create(maxDocsPerGroup, true);
      } else {
        // Sort by fields
        collector = TopFieldCollector.create(withinGroupSort, maxDocsPerGroup, fillSortFields, getScores, getMaxScores, true);
      }
      groupMap.put(group.groupValue,
                   new SearchGroupDocs(group.groupValue,
                                       collector));
    }


    ordSet = new SentinelIntSet(groupMap.size(), -1);
    groupDocs = new SearchGroupDocs[ordSet.keys.length];
  }


  @Override
  public void setScorer(Scorer scorer) throws IOException {
    for (SearchGroupDocs group : groupMap.values()) {
      group.collector.setScorer(scorer);
    }
  }


  @Override
  public void collect(int doc) throws IOException {
    final int slot = ordSet.find(index.order[doc]);
    //System.out.println("SP.collect doc=" + doc + " slot=" + slot);
    totalHitCount++;
    if (slot >= 0) {
      totalGroupedHitCount++;
      groupDocs[slot].collector.collect(doc);
    }
  }


  @Override
  public void setNextReader(IndexReader reader, int docBase) throws IOException {
    //System.out.println("SP.setNextReader");
    for (SearchGroupDocs group : groupMap.values()) {
      group.collector.setNextReader(reader, docBase);
    }
    index = FieldCache.DEFAULT.getStringIndex(reader, groupField);


    // Rebuild ordSet
    ordSet.clear();
    for (SearchGroupDocs group : groupMap.values()) {
      //System.out.println("  group=" + (group.groupValue == null ? "null" : group.groupValue.utf8ToString()));
      int ord = group.groupValue == null ? 0 : index.binarySearchLookup(group.groupValue);
      if (ord >= 0) {
        groupDocs[ordSet.put(ord)] = group;
      }
    }
  }


  @Override
  public boolean acceptsDocsOutOfOrder() {
    return false;
  }


  public TopGroups getTopGroups(int withinGroupOffset) {
    final GroupDocs[] groupDocsResult = new GroupDocs[groups.size()];


    int groupIDX = 0;
    for(SearchGroup group : groups) {
      final SearchGroupDocs groupDocs = groupMap.get(group.groupValue);
      final TopDocs topDocs = groupDocs.collector.topDocs(withinGroupOffset, maxDocsPerGroup);
      groupDocsResult[groupIDX++] = new GroupDocs(topDocs.getMaxScore(),
                                                  topDocs.totalHits,
                                                  topDocs.scoreDocs,
                                                  groupDocs.groupValue,
                                                  group.sortValues);
    }


    return new TopGroups(groupSort.getSort(),
                         withinGroupSort == null ? null : withinGroupSort.getSort(),
                         totalHitCount, totalGroupedHitCount, groupDocsResult);
  }
}




// TODO: merge with SearchGroup or not?
// ad: don't need to build a new hashmap
// disad: blows up the size of SearchGroup if we need many of them, and couples implementations
class SearchGroupDocs {
  public final String groupValue;
  public final TopDocsCollector collector;


  public SearchGroupDocs(String groupValue, TopDocsCollector collector) {
    this.groupValue = groupValue;
    this.collector = collector;
  }
}
Source Code of org.apache.lucene.search.grouping.SearchGroupDocs

Related Classes of org.apache.lucene.search.grouping.SearchGroupDocs