Package org.exist.indexing.lucene

Source Code of org.exist.indexing.lucene.QueryFacetCollector

/*
*  eXist Open Source Native XML Database
*  Copyright (C) 2013 The eXist Project
*  http://exist-db.org
*
*  This program is free software; you can redistribute it and/or
*  modify it under the terms of the GNU Lesser General Public License
*  as published by the Free Software Foundation; either version 2
*  of the License, or (at your option) any later version.
*
*  This program is distributed in the hope that it will be useful,
*  but WITHOUT ANY WARRANTY; without even the implied warranty of
*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
*  GNU Lesser General Public License for more details.
*
*  You should have received a copy of the GNU Lesser General Public
*  License along with this library; if not, write to the Free Software
*  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
*
*  $Id$
*/
package org.exist.indexing.lucene;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Set;

import org.apache.lucene.facet.encoding.DGapVInt8IntDecoder;
import org.apache.lucene.facet.params.CategoryListParams;
import org.apache.lucene.facet.params.FacetSearchParams;
import org.apache.lucene.facet.params.CategoryListParams.OrdinalPolicy;
import org.apache.lucene.facet.search.CountingFacetsAggregator;
import org.apache.lucene.facet.search.FacetArrays;
import org.apache.lucene.facet.search.FacetRequest;
import org.apache.lucene.facet.search.FacetResult;
import org.apache.lucene.facet.search.FacetResultNode;
import org.apache.lucene.facet.search.FacetResultsHandler;
import org.apache.lucene.facet.search.FacetsAggregator;
import org.apache.lucene.facet.search.FastCountingFacetsAggregator;
import org.apache.lucene.facet.search.FloatFacetResultsHandler;
import org.apache.lucene.facet.search.IntFacetResultsHandler;
import org.apache.lucene.facet.search.TopKFacetResultsHandler;
import org.apache.lucene.facet.search.TopKInEachNodeHandler;
import org.apache.lucene.facet.search.FacetRequest.FacetArraysSource;
import org.apache.lucene.facet.search.FacetRequest.ResultMode;
import org.apache.lucene.facet.search.FacetRequest.SortOrder;
import org.apache.lucene.facet.search.FacetsCollector.MatchingDocs;
import org.apache.lucene.facet.taxonomy.ParallelTaxonomyArrays;
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
import org.apache.lucene.facet.util.PartitionsUtils;
import org.apache.lucene.index.AtomicReader;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.search.Collector;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.util.FixedBitSet;
import org.exist.dom.DefaultDocumentSet;
import org.exist.dom.DocumentSet;

/**
* @author <a href="mailto:shabanovd@gmail.com">Dmitriy Shabanov</a>
*
*/
public abstract class QueryFacetCollector extends Collector {

    protected Scorer scorer;

    protected AtomicReaderContext context;
    protected AtomicReader reader;
    protected NumericDocValues docIdValues;

    protected final DocumentSet docs;

    protected final List<MatchingDocs> matchingDocs = new ArrayList<>();
    protected final FacetArrays facetArrays;
   
    protected final TaxonomyReader taxonomyReader;
    protected final FacetSearchParams searchParams;

    protected int totalHits;
    protected FixedBitSet bits;
    protected float[] scores;

    protected DefaultDocumentSet docbits;
    //private FixedBitSet docbits;

    protected QueryFacetCollector(
            final DocumentSet docs,
           
            final FacetSearchParams searchParams,

            final TaxonomyReader taxonomyReader) {
       
        this.docs = docs;
       
        this.searchParams = searchParams;
        this.taxonomyReader = taxonomyReader;
       
//        this.facetArrays = new FacetArrays(taxonomyReader.getSize());
        this.facetArrays = new FacetArrays(
                PartitionsUtils.partitionSize(searchParams.indexingParams, taxonomyReader));
       
        docbits = new DefaultDocumentSet(1031);//docs.getDocumentCount());
        //docbits = new FixedBitSet(docs.getDocumentCount());

    }

    @Override
    public void setScorer(Scorer scorer) throws IOException {
        this.scorer = scorer;
    }

    @Override
    public void setNextReader(AtomicReaderContext atomicReaderContext)
            throws IOException {
        reader = atomicReaderContext.reader();
        docIdValues = reader.getNumericDocValues(LuceneUtil.FIELD_DOC_ID);

        if (bits != null) {
            matchingDocs.add(new MatchingDocs(context, bits, totalHits, scores));
        }
        bits = new FixedBitSet(reader.maxDoc());
        totalHits = 0;
        scores = new float[64]; // some initial size
        context = atomicReaderContext;
    }
   
    protected void finish() {
      if (bits != null) {
        matchingDocs.add(new MatchingDocs(this.context, bits, totalHits, scores));
        bits = null;
        scores = null;
        context = null;
      }
    }

    @Override
    public boolean acceptsDocsOutOfOrder() {
        return false;
    }

    @Override
    public abstract void collect(int doc);

    private boolean verifySearchParams(FacetSearchParams fsp) {
        // verify that all category lists were encoded with DGapVInt
        for (FacetRequest fr : fsp.facetRequests) {
            CategoryListParams clp = fsp.indexingParams.getCategoryListParams(fr.categoryPath);
            if (clp.createEncoder().createMatchingDecoder().getClass() != DGapVInt8IntDecoder.class) {
                return false;
            }
        }

        return true;
    }

    private FacetsAggregator getAggregator() {
        if (verifySearchParams(searchParams)) {
            return new FastCountingFacetsAggregator();
        } else {
            return new CountingFacetsAggregator();
        }
    }

    private Set<CategoryListParams> getCategoryLists() {
        if (searchParams.indexingParams.getAllCategoryListParams().size() == 1) {
            return Collections.singleton(searchParams.indexingParams.getCategoryListParams(null));
        }

        HashSet<CategoryListParams> clps = new HashSet<>();
        for (FacetRequest fr : searchParams.facetRequests) {
            clps.add(searchParams.indexingParams.getCategoryListParams(fr.categoryPath));
        }
        return clps;
    }

    private FacetResultsHandler createFacetResultsHandler(FacetRequest fr) {
        if (fr.getDepth() == 1 && fr.getSortOrder() == SortOrder.DESCENDING) {
            FacetArraysSource fas = fr.getFacetArraysSource();
            if (fas == FacetArraysSource.INT) {
                return new IntFacetResultsHandler(taxonomyReader, fr, facetArrays);
            }

            if (fas == FacetArraysSource.FLOAT) {
                return new FloatFacetResultsHandler(taxonomyReader, fr, facetArrays);
            }
        }

        if (fr.getResultMode() == ResultMode.PER_NODE_IN_TREE) {
            return new TopKInEachNodeHandler(taxonomyReader, fr, facetArrays);
        }
        return new TopKFacetResultsHandler(taxonomyReader, fr, facetArrays);
    }

    private static FacetResult emptyResult(int ordinal, FacetRequest fr) {
        FacetResultNode root = new FacetResultNode(ordinal, 0);
        root.label = fr.categoryPath;
        return new FacetResult(fr, root, 0);
    }
   
    List<FacetResult> facetResults = null;
   
    public List<FacetResult> getFacetResults() throws IOException {
        if (facetResults == null) {
            finish();
            facetResults = accumulate();
        }
        return facetResults;
    }

    private List<FacetResult> accumulate() throws IOException {
       
        // aggregate facets per category list (usually only one category list)
        FacetsAggregator aggregator = getAggregator();
        for (CategoryListParams clp : getCategoryLists()) {
            for (MatchingDocs md : matchingDocs) {
                aggregator.aggregate(md, clp, facetArrays);
            }
        }

        ParallelTaxonomyArrays arrays = taxonomyReader.getParallelTaxonomyArrays();

        // compute top-K
        final int[] children = arrays.children();
        final int[] siblings = arrays.siblings();
        List<FacetResult> res = new ArrayList<>();
        for (FacetRequest fr : searchParams.facetRequests) {
            int rootOrd = taxonomyReader.getOrdinal(fr.categoryPath);
            // category does not exist
            if (rootOrd == TaxonomyReader.INVALID_ORDINAL) {
                // Add empty FacetResult
                res.add(emptyResult(rootOrd, fr));
                continue;
            }
            CategoryListParams clp = searchParams.indexingParams.getCategoryListParams(fr.categoryPath);
            // someone might ask to aggregate ROOT category
            if (fr.categoryPath.length > 0) {
                OrdinalPolicy ordinalPolicy = clp.getOrdinalPolicy(fr.categoryPath.components[0]);
                if (ordinalPolicy == OrdinalPolicy.NO_PARENTS) {
                    // rollup values
                    aggregator.rollupValues(fr, rootOrd, children, siblings, facetArrays);
                }
            }

            FacetResultsHandler frh = createFacetResultsHandler(fr);
            res.add(frh.compute());
        }
        return res;
    }
}
TOP

Related Classes of org.exist.indexing.lucene.QueryFacetCollector

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.