Package org.apache.lucene.search.grouping

Source Code of org.apache.lucene.search.grouping.SearchGroupDocs

package org.apache.lucene.search.grouping;

/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements.  See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

import java.io.IOException;
import java.util.Collection;
import java.util.HashMap;

import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.Collector;
import org.apache.lucene.search.FieldCache;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.TopDocsCollector;
import org.apache.lucene.search.TopFieldCollector;
import org.apache.lucene.search.TopScoreDocCollector;

/**
* SecondPassGroupingCollector is the second of two passes
* necessary to collect grouped docs.  This pass gathers the
* top N documents per top group computed from the
* first pass.
*
* <p>See {@link org.apache.lucene.search.grouping} for more
* details including a full code example.</p>
*
* @lucene.experimental
*/
public class SecondPassGroupingCollector extends Collector {
  private final HashMap<String, SearchGroupDocs> groupMap;

  private FieldCache.StringIndex index;
  private final String groupField;
  private final int maxDocsPerGroup;
  private final SentinelIntSet ordSet;
  private final SearchGroupDocs[] groupDocs;
  private final Collection<SearchGroup> groups;
  private final Sort withinGroupSort;
  private final Sort groupSort;

  private int totalHitCount;
  private int totalGroupedHitCount;

  public SecondPassGroupingCollector(String groupField, Collection<SearchGroup> groups, Sort groupSort, Sort withinGroupSort,
                                     int maxDocsPerGroup, boolean getScores, boolean getMaxScores, boolean fillSortFields)
    throws IOException {

    //System.out.println("SP init");
    if (groups.size() == 0) {
      throw new IllegalArgumentException("no groups to collect (groups.size() is 0)");
    }

    this.groupSort = groupSort;
    this.withinGroupSort = withinGroupSort;
    this.groups = groups;
    this.groupField = groupField;
    this.maxDocsPerGroup = maxDocsPerGroup;

    groupMap = new HashMap<String, SearchGroupDocs>(groups.size());

    for (SearchGroup group : groups) {
      //System.out.println("  prep group=" + (group.groupValue == null ? "null" : group.groupValue.utf8ToString()));
      final TopDocsCollector collector;
      if (withinGroupSort == null) {
        // Sort by score
        collector = TopScoreDocCollector.create(maxDocsPerGroup, true);
      } else {
        // Sort by fields
        collector = TopFieldCollector.create(withinGroupSort, maxDocsPerGroup, fillSortFields, getScores, getMaxScores, true);
      }
      groupMap.put(group.groupValue,
                   new SearchGroupDocs(group.groupValue,
                                       collector));
    }

    ordSet = new SentinelIntSet(groupMap.size(), -1);
    groupDocs = new SearchGroupDocs[ordSet.keys.length];
  }

  @Override
  public void setScorer(Scorer scorer) throws IOException {
    for (SearchGroupDocs group : groupMap.values()) {
      group.collector.setScorer(scorer);
    }
  }

  @Override
  public void collect(int doc) throws IOException {
    final int slot = ordSet.find(index.order[doc]);
    //System.out.println("SP.collect doc=" + doc + " slot=" + slot);
    totalHitCount++;
    if (slot >= 0) {
      totalGroupedHitCount++;
      groupDocs[slot].collector.collect(doc);
    }
  }

  @Override
  public void setNextReader(IndexReader reader, int docBase) throws IOException {
    //System.out.println("SP.setNextReader");
    for (SearchGroupDocs group : groupMap.values()) {
      group.collector.setNextReader(reader, docBase);
    }
    index = FieldCache.DEFAULT.getStringIndex(reader, groupField);

    // Rebuild ordSet
    ordSet.clear();
    for (SearchGroupDocs group : groupMap.values()) {
      //System.out.println("  group=" + (group.groupValue == null ? "null" : group.groupValue.utf8ToString()));
      int ord = group.groupValue == null ? 0 : index.binarySearchLookup(group.groupValue);
      if (ord >= 0) {
        groupDocs[ordSet.put(ord)] = group;
      }
    }
  }

  @Override
  public boolean acceptsDocsOutOfOrder() {
    return false;
  }

  public TopGroups getTopGroups(int withinGroupOffset) {
    final GroupDocs[] groupDocsResult = new GroupDocs[groups.size()];

    int groupIDX = 0;
    for(SearchGroup group : groups) {
      final SearchGroupDocs groupDocs = groupMap.get(group.groupValue);
      final TopDocs topDocs = groupDocs.collector.topDocs(withinGroupOffset, maxDocsPerGroup);
      groupDocsResult[groupIDX++] = new GroupDocs(topDocs.getMaxScore(),
                                                  topDocs.totalHits,
                                                  topDocs.scoreDocs,
                                                  groupDocs.groupValue,
                                                  group.sortValues);
    }

    return new TopGroups(groupSort.getSort(),
                         withinGroupSort == null ? null : withinGroupSort.getSort(),
                         totalHitCount, totalGroupedHitCount, groupDocsResult);
  }
}


// TODO: merge with SearchGroup or not?
// ad: don't need to build a new hashmap
// disad: blows up the size of SearchGroup if we need many of them, and couples implementations
class SearchGroupDocs {
  public final String groupValue;
  public final TopDocsCollector collector;

  public SearchGroupDocs(String groupValue, TopDocsCollector collector) {
    this.groupValue = groupValue;
    this.collector = collector;
  }
}
TOP

Related Classes of org.apache.lucene.search.grouping.SearchGroupDocs

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.