Source Code of org.exist.indexing.lucene.QueryFacetCollector

/*
 *  eXist Open Source Native XML Database
 *  Copyright (C) 2013 The eXist Project
 *  http://exist-db.org
 *
 *  This program is free software; you can redistribute it and/or
 *  modify it under the terms of the GNU Lesser General Public License
 *  as published by the Free Software Foundation; either version 2
 *  of the License, or (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU Lesser General Public License for more details.
 *
 *  You should have received a copy of the GNU Lesser General Public
 *  License along with this library; if not, write to the Free Software
 *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 *
 *  $Id$
 */
package org.exist.indexing.lucene;


import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Set;


import org.apache.lucene.facet.encoding.DGapVInt8IntDecoder;
import org.apache.lucene.facet.params.CategoryListParams;
import org.apache.lucene.facet.params.FacetSearchParams;
import org.apache.lucene.facet.params.CategoryListParams.OrdinalPolicy;
import org.apache.lucene.facet.search.CountingFacetsAggregator;
import org.apache.lucene.facet.search.FacetArrays;
import org.apache.lucene.facet.search.FacetRequest;
import org.apache.lucene.facet.search.FacetResult;
import org.apache.lucene.facet.search.FacetResultNode;
import org.apache.lucene.facet.search.FacetResultsHandler;
import org.apache.lucene.facet.search.FacetsAggregator;
import org.apache.lucene.facet.search.FastCountingFacetsAggregator;
import org.apache.lucene.facet.search.FloatFacetResultsHandler;
import org.apache.lucene.facet.search.IntFacetResultsHandler;
import org.apache.lucene.facet.search.TopKFacetResultsHandler;
import org.apache.lucene.facet.search.TopKInEachNodeHandler;
import org.apache.lucene.facet.search.FacetRequest.FacetArraysSource;
import org.apache.lucene.facet.search.FacetRequest.ResultMode;
import org.apache.lucene.facet.search.FacetRequest.SortOrder;
import org.apache.lucene.facet.search.FacetsCollector.MatchingDocs;
import org.apache.lucene.facet.taxonomy.ParallelTaxonomyArrays;
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
import org.apache.lucene.facet.util.PartitionsUtils;
import org.apache.lucene.index.AtomicReader;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.search.Collector;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.util.FixedBitSet;
import org.exist.dom.DefaultDocumentSet;
import org.exist.dom.DocumentSet;


/**
 * @author <a href="mailto:shabanovd@gmail.com">Dmitriy Shabanov</a>
 *
 */
public abstract class QueryFacetCollector extends Collector {


    protected Scorer scorer;


    protected AtomicReaderContext context;
    protected AtomicReader reader;
    protected NumericDocValues docIdValues;


    protected final DocumentSet docs;


    protected final List<MatchingDocs> matchingDocs = new ArrayList<>();
    protected final FacetArrays facetArrays;
    
    protected final TaxonomyReader taxonomyReader;
    protected final FacetSearchParams searchParams;


    protected int totalHits;
    protected FixedBitSet bits;
    protected float[] scores;


    protected DefaultDocumentSet docbits;
    //private FixedBitSet docbits;


    protected QueryFacetCollector(
            final DocumentSet docs, 
            
            final FacetSearchParams searchParams, 


            final TaxonomyReader taxonomyReader) {
        
        this.docs = docs;
        
        this.searchParams = searchParams;
        this.taxonomyReader = taxonomyReader;
        
//        this.facetArrays = new FacetArrays(taxonomyReader.getSize());
        this.facetArrays = new FacetArrays(
                PartitionsUtils.partitionSize(searchParams.indexingParams, taxonomyReader));
        
        docbits = new DefaultDocumentSet(1031);//docs.getDocumentCount());
        //docbits = new FixedBitSet(docs.getDocumentCount());


    }


    @Override
    public void setScorer(Scorer scorer) throws IOException {
        this.scorer = scorer;
    }


    @Override
    public void setNextReader(AtomicReaderContext atomicReaderContext)
            throws IOException {
        reader = atomicReaderContext.reader();
        docIdValues = reader.getNumericDocValues(LuceneUtil.FIELD_DOC_ID);


        if (bits != null) {
            matchingDocs.add(new MatchingDocs(context, bits, totalHits, scores));
        }
        bits = new FixedBitSet(reader.maxDoc());
        totalHits = 0;
        scores = new float[64]; // some initial size
        context = atomicReaderContext;
    }
    
    protected void finish() {
      if (bits != null) {
        matchingDocs.add(new MatchingDocs(this.context, bits, totalHits, scores));
        bits = null;
        scores = null;
        context = null;
      }
    }


    @Override
    public boolean acceptsDocsOutOfOrder() {
        return false;
    }


    @Override
    public abstract void collect(int doc);


    private boolean verifySearchParams(FacetSearchParams fsp) {
        // verify that all category lists were encoded with DGapVInt
        for (FacetRequest fr : fsp.facetRequests) {
            CategoryListParams clp = fsp.indexingParams.getCategoryListParams(fr.categoryPath);
            if (clp.createEncoder().createMatchingDecoder().getClass() != DGapVInt8IntDecoder.class) {
                return false;
            }
        }


        return true;
    }


    private FacetsAggregator getAggregator() {
        if (verifySearchParams(searchParams)) {
            return new FastCountingFacetsAggregator();
        } else {
            return new CountingFacetsAggregator();
        }
    }


    private Set<CategoryListParams> getCategoryLists() {
        if (searchParams.indexingParams.getAllCategoryListParams().size() == 1) {
            return Collections.singleton(searchParams.indexingParams.getCategoryListParams(null));
        }


        HashSet<CategoryListParams> clps = new HashSet<>();
        for (FacetRequest fr : searchParams.facetRequests) {
            clps.add(searchParams.indexingParams.getCategoryListParams(fr.categoryPath));
        }
        return clps;
    }


    private FacetResultsHandler createFacetResultsHandler(FacetRequest fr) {
        if (fr.getDepth() == 1 && fr.getSortOrder() == SortOrder.DESCENDING) {
            FacetArraysSource fas = fr.getFacetArraysSource();
            if (fas == FacetArraysSource.INT) {
                return new IntFacetResultsHandler(taxonomyReader, fr, facetArrays);
            }


            if (fas == FacetArraysSource.FLOAT) {
                return new FloatFacetResultsHandler(taxonomyReader, fr, facetArrays);
            }
        }


        if (fr.getResultMode() == ResultMode.PER_NODE_IN_TREE) {
            return new TopKInEachNodeHandler(taxonomyReader, fr, facetArrays);
        }
        return new TopKFacetResultsHandler(taxonomyReader, fr, facetArrays);
    }


    private static FacetResult emptyResult(int ordinal, FacetRequest fr) {
        FacetResultNode root = new FacetResultNode(ordinal, 0);
        root.label = fr.categoryPath;
        return new FacetResult(fr, root, 0);
    }
    
    List<FacetResult> facetResults = null;
    
    public List<FacetResult> getFacetResults() throws IOException {
        if (facetResults == null) {
            finish();
            facetResults = accumulate();
        }
        return facetResults;
    }


    private List<FacetResult> accumulate() throws IOException {
        
        // aggregate facets per category list (usually only one category list)
        FacetsAggregator aggregator = getAggregator();
        for (CategoryListParams clp : getCategoryLists()) {
            for (MatchingDocs md : matchingDocs) {
                aggregator.aggregate(md, clp, facetArrays);
            }
        }


        ParallelTaxonomyArrays arrays = taxonomyReader.getParallelTaxonomyArrays();


        // compute top-K
        final int[] children = arrays.children();
        final int[] siblings = arrays.siblings();
        List<FacetResult> res = new ArrayList<>();
        for (FacetRequest fr : searchParams.facetRequests) {
            int rootOrd = taxonomyReader.getOrdinal(fr.categoryPath);
            // category does not exist
            if (rootOrd == TaxonomyReader.INVALID_ORDINAL) {
                // Add empty FacetResult
                res.add(emptyResult(rootOrd, fr));
                continue;
            }
            CategoryListParams clp = searchParams.indexingParams.getCategoryListParams(fr.categoryPath);
            // someone might ask to aggregate ROOT category
            if (fr.categoryPath.length > 0) { 
                OrdinalPolicy ordinalPolicy = clp.getOrdinalPolicy(fr.categoryPath.components[0]);
                if (ordinalPolicy == OrdinalPolicy.NO_PARENTS) {
                    // rollup values
                    aggregator.rollupValues(fr, rootOrd, children, siblings, facetArrays);
                }
            }


            FacetResultsHandler frh = createFacetResultsHandler(fr);
            res.add(frh.compute());
        }
        return res;
    }
}
Source Code of org.exist.indexing.lucene.QueryFacetCollector

Related Classes of org.exist.indexing.lucene.QueryFacetCollector