Package org.apache.lucene.facet.taxonomy

Source Code of org.apache.lucene.facet.taxonomy.TestTaxonomyFacetCounts2

package org.apache.lucene.facet.taxonomy;

/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements.  See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Random;

import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.StringField;
import org.apache.lucene.facet.FacetField;
import org.apache.lucene.facet.FacetResult;
import org.apache.lucene.facet.FacetTestCase;
import org.apache.lucene.facet.Facets;
import org.apache.lucene.facet.FacetsCollector;
import org.apache.lucene.facet.FacetsConfig;
import org.apache.lucene.facet.LabelAndValue;
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
import org.apache.lucene.facet.taxonomy.TaxonomyWriter;
import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader;
import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.NoMergePolicy;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.IOUtils;
import org.junit.AfterClass;
import org.junit.BeforeClass;
import org.junit.Test;

public class TestTaxonomyFacetCounts2 extends FacetTestCase {
 
  private static final Term A = new Term("f", "a");
  private static final String CP_A = "A", CP_B = "B";
  private static final String CP_C = "C", CP_D = "D"; // indexed w/ NO_PARENTS
  private static final int NUM_CHILDREN_CP_A = 5, NUM_CHILDREN_CP_B = 3;
  private static final int NUM_CHILDREN_CP_C = 5, NUM_CHILDREN_CP_D = 5;
  private static final FacetField[] CATEGORIES_A, CATEGORIES_B;
  private static final FacetField[] CATEGORIES_C, CATEGORIES_D;
  static {
    CATEGORIES_A = new FacetField[NUM_CHILDREN_CP_A];
    for (int i = 0; i < NUM_CHILDREN_CP_A; i++) {
      CATEGORIES_A[i] = new FacetField(CP_A, Integer.toString(i));
    }
    CATEGORIES_B = new FacetField[NUM_CHILDREN_CP_B];
    for (int i = 0; i < NUM_CHILDREN_CP_B; i++) {
      CATEGORIES_B[i] = new FacetField(CP_B, Integer.toString(i));
    }
   
    // NO_PARENTS categories
    CATEGORIES_C = new FacetField[NUM_CHILDREN_CP_C];
    for (int i = 0; i < NUM_CHILDREN_CP_C; i++) {
      CATEGORIES_C[i] = new FacetField(CP_C, Integer.toString(i));
    }
   
    // Multi-level categories
    CATEGORIES_D = new FacetField[NUM_CHILDREN_CP_D];
    for (int i = 0; i < NUM_CHILDREN_CP_D; i++) {
      String val = Integer.toString(i);
      CATEGORIES_D[i] = new FacetField(CP_D, val, val + val); // e.g. D/1/11, D/2/22...
    }
  }
 
  private static Directory indexDir, taxoDir;
  private static Map<String,Integer> allExpectedCounts, termExpectedCounts;

  @AfterClass
  public static void afterClassCountingFacetsAggregatorTest() throws Exception {
    IOUtils.close(indexDir, taxoDir);
  }
 
  private static List<FacetField> randomCategories(Random random) {
    // add random categories from the two dimensions, ensuring that the same
    // category is not added twice.
    int numFacetsA = random.nextInt(3) + 1; // 1-3
    int numFacetsB = random.nextInt(2) + 1; // 1-2
    ArrayList<FacetField> categories_a = new ArrayList<>();
    categories_a.addAll(Arrays.asList(CATEGORIES_A));
    ArrayList<FacetField> categories_b = new ArrayList<>();
    categories_b.addAll(Arrays.asList(CATEGORIES_B));
    Collections.shuffle(categories_a, random);
    Collections.shuffle(categories_b, random);

    ArrayList<FacetField> categories = new ArrayList<>();
    categories.addAll(categories_a.subList(0, numFacetsA));
    categories.addAll(categories_b.subList(0, numFacetsB));
   
    // add the NO_PARENT categories
    categories.add(CATEGORIES_C[random().nextInt(NUM_CHILDREN_CP_C)]);
    categories.add(CATEGORIES_D[random().nextInt(NUM_CHILDREN_CP_D)]);

    return categories;
  }

  private static void addField(Document doc) {
    doc.add(new StringField(A.field(), A.text(), Store.NO));
  }

  private static void addFacets(Document doc, FacetsConfig config, boolean updateTermExpectedCounts)
      throws IOException {
    List<FacetField> docCategories = randomCategories(random());
    for (FacetField ff : docCategories) {
      doc.add(ff);
      String cp = ff.dim + "/" + ff.path[0];
      allExpectedCounts.put(cp, allExpectedCounts.get(cp) + 1);
      if (updateTermExpectedCounts) {
        termExpectedCounts.put(cp, termExpectedCounts.get(cp) + 1);
      }
    }
    // add 1 to each NO_PARENTS dimension
    allExpectedCounts.put(CP_B, allExpectedCounts.get(CP_B) + 1);
    allExpectedCounts.put(CP_C, allExpectedCounts.get(CP_C) + 1);
    allExpectedCounts.put(CP_D, allExpectedCounts.get(CP_D) + 1);
    if (updateTermExpectedCounts) {
      termExpectedCounts.put(CP_B, termExpectedCounts.get(CP_B) + 1);
      termExpectedCounts.put(CP_C, termExpectedCounts.get(CP_C) + 1);
      termExpectedCounts.put(CP_D, termExpectedCounts.get(CP_D) + 1);
    }
  }

  private static FacetsConfig getConfig() {
    FacetsConfig config = new FacetsConfig();
    config.setMultiValued("A", true);
    config.setMultiValued("B", true);
    config.setRequireDimCount("B", true);
    config.setHierarchical("D", true);
    return config;
  }

  private static void indexDocsNoFacets(IndexWriter indexWriter) throws IOException {
    int numDocs = atLeast(2);
    for (int i = 0; i < numDocs; i++) {
      Document doc = new Document();
      addField(doc);
      indexWriter.addDocument(doc);
    }
    indexWriter.commit(); // flush a segment
  }
 
  private static void indexDocsWithFacetsNoTerms(IndexWriter indexWriter, TaxonomyWriter taxoWriter,
                                                 Map<String,Integer> expectedCounts) throws IOException {
    Random random = random();
    int numDocs = atLeast(random, 2);
    FacetsConfig config = getConfig();
    for (int i = 0; i < numDocs; i++) {
      Document doc = new Document();
      addFacets(doc, config, false);
      indexWriter.addDocument(config.build(taxoWriter, doc));
    }
    indexWriter.commit(); // flush a segment
  }
 
  private static void indexDocsWithFacetsAndTerms(IndexWriter indexWriter, TaxonomyWriter taxoWriter,
                                                  Map<String,Integer> expectedCounts) throws IOException {
    Random random = random();
    int numDocs = atLeast(random, 2);
    FacetsConfig config = getConfig();
    for (int i = 0; i < numDocs; i++) {
      Document doc = new Document();
      addFacets(doc, config, true);
      addField(doc);
      indexWriter.addDocument(config.build(taxoWriter, doc));
    }
    indexWriter.commit(); // flush a segment
  }
 
  private static void indexDocsWithFacetsAndSomeTerms(IndexWriter indexWriter, TaxonomyWriter taxoWriter,
                                                      Map<String,Integer> expectedCounts) throws IOException {
    Random random = random();
    int numDocs = atLeast(random, 2);
    FacetsConfig config = getConfig();
    for (int i = 0; i < numDocs; i++) {
      Document doc = new Document();
      boolean hasContent = random.nextBoolean();
      if (hasContent) {
        addField(doc);
      }
      addFacets(doc, config, hasContent);
      indexWriter.addDocument(config.build(taxoWriter, doc));
    }
    indexWriter.commit(); // flush a segment
  }
 
  // initialize expectedCounts w/ 0 for all categories
  private static Map<String,Integer> newCounts() {
    Map<String,Integer> counts = new HashMap<>();
    counts.put(CP_A, 0);
    counts.put(CP_B, 0);
    counts.put(CP_C, 0);
    counts.put(CP_D, 0);
    for (FacetField ff : CATEGORIES_A) {
      counts.put(ff.dim + "/" + ff.path[0], 0);
    }
    for (FacetField ff : CATEGORIES_B) {
      counts.put(ff.dim + "/" + ff.path[0], 0);
    }
    for (FacetField ff : CATEGORIES_C) {
      counts.put(ff.dim + "/" + ff.path[0], 0);
    }
    for (FacetField ff : CATEGORIES_D) {
      counts.put(ff.dim + "/" + ff.path[0], 0);
    }
    return counts;
  }
 
  @BeforeClass
  public static void beforeClassCountingFacetsAggregatorTest() throws Exception {
    indexDir = newDirectory();
    taxoDir = newDirectory();
   
    // create an index which has:
    // 1. Segment with no categories, but matching results
    // 2. Segment w/ categories, but no results
    // 3. Segment w/ categories and results
    // 4. Segment w/ categories, but only some results
   
    IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
    conf.setMergePolicy(NoMergePolicy.INSTANCE); // prevent merges, so we can control the index segments
    IndexWriter indexWriter = new IndexWriter(indexDir, conf);
    TaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir);

    allExpectedCounts = newCounts();
    termExpectedCounts = newCounts();
   
    // segment w/ no categories
    indexDocsNoFacets(indexWriter);

    // segment w/ categories, no content
    indexDocsWithFacetsNoTerms(indexWriter, taxoWriter, allExpectedCounts);

    // segment w/ categories and content
    indexDocsWithFacetsAndTerms(indexWriter, taxoWriter, allExpectedCounts);
   
    // segment w/ categories and some content
    indexDocsWithFacetsAndSomeTerms(indexWriter, taxoWriter, allExpectedCounts);
   
    IOUtils.close(indexWriter, taxoWriter);
  }
 
  @Test
  public void testDifferentNumResults() throws Exception {
    // test the collector w/ FacetRequests and different numResults
    DirectoryReader indexReader = DirectoryReader.open(indexDir);
    TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoDir);
    IndexSearcher searcher = newSearcher(indexReader);
   
    FacetsCollector sfc = new FacetsCollector();
    TermQuery q = new TermQuery(A);
    searcher.search(q, sfc);
    Facets facets = getTaxonomyFacetCounts(taxoReader, getConfig(), sfc);
    FacetResult result = facets.getTopChildren(NUM_CHILDREN_CP_A, CP_A);
    assertEquals(-1, result.value.intValue());
    for(LabelAndValue labelValue : result.labelValues) {
      assertEquals(termExpectedCounts.get(CP_A + "/" + labelValue.label), labelValue.value);
    }
    result = facets.getTopChildren(NUM_CHILDREN_CP_B, CP_B);
    assertEquals(termExpectedCounts.get(CP_B), result.value);
    for(LabelAndValue labelValue : result.labelValues) {
      assertEquals(termExpectedCounts.get(CP_B + "/" + labelValue.label), labelValue.value);
    }
   
    IOUtils.close(indexReader, taxoReader);
  }
 
  @Test
  public void testAllCounts() throws Exception {
    DirectoryReader indexReader = DirectoryReader.open(indexDir);
    TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoDir);
    IndexSearcher searcher = newSearcher(indexReader);
   
    FacetsCollector sfc = new FacetsCollector();
    searcher.search(new MatchAllDocsQuery(), sfc);

    Facets facets = getTaxonomyFacetCounts(taxoReader, getConfig(), sfc);
   
    FacetResult result = facets.getTopChildren(NUM_CHILDREN_CP_A, CP_A);
    assertEquals(-1, result.value.intValue());
    int prevValue = Integer.MAX_VALUE;
    for(LabelAndValue labelValue : result.labelValues) {
      assertEquals(allExpectedCounts.get(CP_A + "/" + labelValue.label), labelValue.value);
      assertTrue("wrong sort order of sub results: labelValue.value=" + labelValue.value + " prevValue=" + prevValue, labelValue.value.intValue() <= prevValue);
      prevValue = labelValue.value.intValue();
    }

    result = facets.getTopChildren(NUM_CHILDREN_CP_B, CP_B);
    assertEquals(allExpectedCounts.get(CP_B), result.value);
    prevValue = Integer.MAX_VALUE;
    for(LabelAndValue labelValue : result.labelValues) {
      assertEquals(allExpectedCounts.get(CP_B + "/" + labelValue.label), labelValue.value);
      assertTrue("wrong sort order of sub results: labelValue.value=" + labelValue.value + " prevValue=" + prevValue, labelValue.value.intValue() <= prevValue);
      prevValue = labelValue.value.intValue();
    }

    IOUtils.close(indexReader, taxoReader);
  }
 
  @Test
  public void testBigNumResults() throws Exception {
    DirectoryReader indexReader = DirectoryReader.open(indexDir);
    TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoDir);
    IndexSearcher searcher = newSearcher(indexReader);
   
    FacetsCollector sfc = new FacetsCollector();
    searcher.search(new MatchAllDocsQuery(), sfc);

    Facets facets = getTaxonomyFacetCounts(taxoReader, getConfig(), sfc);

    FacetResult result = facets.getTopChildren(Integer.MAX_VALUE, CP_A);
    assertEquals(-1, result.value.intValue());
    for(LabelAndValue labelValue : result.labelValues) {
      assertEquals(allExpectedCounts.get(CP_A + "/" + labelValue.label), labelValue.value);
    }
    result = facets.getTopChildren(Integer.MAX_VALUE, CP_B);
    assertEquals(allExpectedCounts.get(CP_B), result.value);
    for(LabelAndValue labelValue : result.labelValues) {
      assertEquals(allExpectedCounts.get(CP_B + "/" + labelValue.label), labelValue.value);
    }
   
    IOUtils.close(indexReader, taxoReader);
  }
 
  @Test
  public void testNoParents() throws Exception {
    DirectoryReader indexReader = DirectoryReader.open(indexDir);
    TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoDir);
    IndexSearcher searcher = newSearcher(indexReader);
   
    FacetsCollector sfc = new FacetsCollector();
    searcher.search(new MatchAllDocsQuery(), sfc);

    Facets facets = getTaxonomyFacetCounts(taxoReader, getConfig(), sfc);

    FacetResult result = facets.getTopChildren(NUM_CHILDREN_CP_C, CP_C);
    assertEquals(allExpectedCounts.get(CP_C), result.value);
    for(LabelAndValue labelValue : result.labelValues) {
      assertEquals(allExpectedCounts.get(CP_C + "/" + labelValue.label), labelValue.value);
    }
    result = facets.getTopChildren(NUM_CHILDREN_CP_D, CP_D);
    assertEquals(allExpectedCounts.get(CP_C), result.value);
    for(LabelAndValue labelValue : result.labelValues) {
      assertEquals(allExpectedCounts.get(CP_D + "/" + labelValue.label), labelValue.value);
    }
   
    IOUtils.close(indexReader, taxoReader);
  }
}
TOP

Related Classes of org.apache.lucene.facet.taxonomy.TestTaxonomyFacetCounts2

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.