Source Code of org.apache.lucene.facet.index.FacetFields

package org.apache.lucene.facet.index;


import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map.Entry;
import java.util.Map;


import org.apache.lucene.document.BinaryDocValuesField;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.TextField;
import org.apache.lucene.facet.params.CategoryListParams;
import org.apache.lucene.facet.params.FacetIndexingParams;
import org.apache.lucene.facet.taxonomy.CategoryPath;
import org.apache.lucene.facet.taxonomy.TaxonomyWriter;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IntsRef;


/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */


/**
 * A utility class for adding facet fields to a document. Usually one field will
 * be added for all facets, however per the
 * {@link FacetIndexingParams#getCategoryListParams(CategoryPath)}, one field
 * may be added for every group of facets.
 * 
 * @lucene.experimental
 */
public class FacetFields {


  // The drill-down field is added with a TokenStream, hence why it's based on
  // TextField type. However in practice, it is added just like StringField.
  // Therefore we set its IndexOptions to DOCS_ONLY.
  private static final FieldType DRILL_DOWN_TYPE = new FieldType(TextField.TYPE_NOT_STORED);
  static {
    DRILL_DOWN_TYPE.setIndexOptions(IndexOptions.DOCS_ONLY);
    DRILL_DOWN_TYPE.setOmitNorms(true);
    DRILL_DOWN_TYPE.freeze();
  }
  
  protected final TaxonomyWriter taxonomyWriter;


  protected final FacetIndexingParams indexingParams;


  /**
   * Constructs a new instance with the {@link FacetIndexingParams#DEFAULT
   * default} facet indexing params.
   * 
   * @param taxonomyWriter
   *          used to resolve given categories to ordinals
   */
  public FacetFields(TaxonomyWriter taxonomyWriter) {
    this(taxonomyWriter, FacetIndexingParams.DEFAULT);
  }


  /**
   * Constructs a new instance with the given facet indexing params.
   * 
   * @param taxonomyWriter
   *          used to resolve given categories to ordinals
   * @param params
   *          determines under which fields the categories should be indexed
   */
  public FacetFields(TaxonomyWriter taxonomyWriter, FacetIndexingParams params) {
    this.taxonomyWriter = taxonomyWriter;
    this.indexingParams = params;
  }


  /**
   * Creates a mapping between a {@link CategoryListParams} and all
   * {@link CategoryPath categories} that are associated with it.
   */
  protected Map<CategoryListParams,Iterable<CategoryPath>> createCategoryListMapping(
      Iterable<CategoryPath> categories) {
    if (indexingParams.getAllCategoryListParams().size() == 1) {
      return Collections.singletonMap(indexingParams.getCategoryListParams(null), categories);
    }
    HashMap<CategoryListParams,Iterable<CategoryPath>> categoryLists = 
        new HashMap<CategoryListParams,Iterable<CategoryPath>>();
    for (CategoryPath cp : categories) {
      // each category may be indexed under a different field, so add it to the right list.
      CategoryListParams clp = indexingParams.getCategoryListParams(cp);
      List<CategoryPath> list = (List<CategoryPath>) categoryLists.get(clp);
      if (list == null) {
        list = new ArrayList<CategoryPath>();
        categoryLists.put(clp, list);
      }
      list.add(cp);
    }
    return categoryLists;
  }
  
  /**
   * Returns the category list data, as a mapping from key to {@link BytesRef}
   * which includes the encoded data. Every ordinal in {@code ordinals}
   * corrspond to a {@link CategoryPath} returned from {@code categories}.
   */
  protected Map<String,BytesRef> getCategoryListData(CategoryListParams categoryListParams, 
      IntsRef ordinals, Iterable<CategoryPath> categories /* needed for AssociationsFacetFields */) 
      throws IOException {
    return new CountingListBuilder(categoryListParams, indexingParams, taxonomyWriter).build(ordinals, categories);
  }
  
  /**
   * Returns a {@link DrillDownStream} for writing the categories drill-down
   * terms.
   */
  protected DrillDownStream getDrillDownStream(Iterable<CategoryPath> categories) {
    return new DrillDownStream(categories, indexingParams);
  }
  
  /**
   * Returns the {@link FieldType} with which the drill-down terms should be
   * indexed. The default is {@link IndexOptions#DOCS_ONLY}.
   */
  protected FieldType drillDownFieldType() {
    return DRILL_DOWN_TYPE;
  }


  /**
   * Add the counting list data to the document under the given field. Note that
   * the field is determined by the {@link CategoryListParams}.
   */
  protected void addCountingListData(Document doc, Map<String,BytesRef> categoriesData, String field) {
    for (Entry<String,BytesRef> entry : categoriesData.entrySet()) {
      doc.add(new BinaryDocValuesField(field + entry.getKey(), entry.getValue()));
    }
  }
  
  /** Adds the needed facet fields to the document. */
  public void addFields(Document doc, Iterable<CategoryPath> categories) throws IOException {
    if (categories == null) {
      throw new IllegalArgumentException("categories should not be null");
    }


    // TODO: add reuse capabilities to this class, per CLP objects:
    // - drill-down field
    // - counting list field
    // - DrillDownStream
    // - CountingListStream


    final Map<CategoryListParams,Iterable<CategoryPath>> categoryLists = createCategoryListMapping(categories);


    // for each CLP we add a different field for drill-down terms as well as for
    // counting list data.
    IntsRef ordinals = new IntsRef(32); // should be enough for most common applications
    for (Entry<CategoryListParams, Iterable<CategoryPath>> e : categoryLists.entrySet()) {
      final CategoryListParams clp = e.getKey();
      final String field = clp.field;


      // build category list data
      ordinals.length = 0; // reset
      int maxNumOrds = 0;
      for (CategoryPath cp : e.getValue()) {
        int ordinal = taxonomyWriter.addCategory(cp);
        maxNumOrds += cp.length; // ordinal and potentially all parents
        if (ordinals.ints.length < maxNumOrds) {
          ordinals.grow(maxNumOrds);
        }
        ordinals.ints[ordinals.length++] = ordinal;
      }
      Map<String,BytesRef> categoriesData = getCategoryListData(clp, ordinals, e.getValue());
      
      // add the counting list data
      addCountingListData(doc, categoriesData, field);
      
      // add the drill-down field
      DrillDownStream drillDownStream = getDrillDownStream(e.getValue());
      Field drillDown = new Field(field, drillDownStream, drillDownFieldType());
      doc.add(drillDown);
    }
  }


}
Source Code of org.apache.lucene.facet.index.FacetFields

Related Classes of org.apache.lucene.facet.index.FacetFields