Source Code of org.apache.lucene.facet.search.TestDemoFacets

package org.apache.lucene.facet.search;


/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */


import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.PrintStream;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Set;


import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.facet.FacetTestCase;
import org.apache.lucene.facet.FacetTestUtils;
import org.apache.lucene.facet.codecs.facet45.Facet45Codec;
import org.apache.lucene.facet.index.FacetFields;
import org.apache.lucene.facet.params.CategoryListParams;
import org.apache.lucene.facet.params.FacetIndexingParams;
import org.apache.lucene.facet.params.FacetSearchParams;
import org.apache.lucene.facet.taxonomy.CategoryPath;
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
import org.apache.lucene.facet.taxonomy.TaxonomyWriter;
import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader;
import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter;
import org.apache.lucene.facet.util.PrintTaxonomyStats;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.similarities.DefaultSimilarity;
import org.apache.lucene.search.similarities.PerFieldSimilarityWrapper;
import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util._TestUtil;


public class TestDemoFacets extends FacetTestCase {


  private DirectoryTaxonomyWriter taxoWriter;
  private RandomIndexWriter writer;
  private FacetFields facetFields;


  private void add(String ... categoryPaths) throws IOException {
    Document doc = new Document();
    
    List<CategoryPath> paths = new ArrayList<CategoryPath>();
    for(String categoryPath : categoryPaths) {
      paths.add(new CategoryPath(categoryPath, '/'));
    }
    facetFields.addFields(doc, paths);
    writer.addDocument(doc);
  }


  public void test() throws Exception {
    Directory dir = newDirectory();
    Directory taxoDir = newDirectory();
    writer = new RandomIndexWriter(random(), dir);


    // Writes facet ords to a separate directory from the
    // main index:
    taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode.CREATE);


    // Reused across documents, to add the necessary facet
    // fields:
    facetFields = new FacetFields(taxoWriter);


    add("Author/Bob", "Publish Date/2010/10/15");
    add("Author/Lisa", "Publish Date/2010/10/20");
    add("Author/Lisa", "Publish Date/2012/1/1");
    add("Author/Susan", "Publish Date/2012/1/7");
    add("Author/Frank", "Publish Date/1999/5/5");


    // NRT open
    IndexSearcher searcher = newSearcher(writer.getReader());
    writer.close();


    // NRT open
    TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoWriter);
    taxoWriter.close();


    // Count both "Publish Date" and "Author" dimensions:
    FacetSearchParams fsp = new FacetSearchParams(
        new CountFacetRequest(new CategoryPath("Publish Date"), 10), 
        new CountFacetRequest(new CategoryPath("Author"), 10));


    // Aggregatses the facet counts:
    FacetsCollector c = FacetsCollector.create(fsp, searcher.getIndexReader(), taxoReader);


    // MatchAllDocsQuery is for "browsing" (counts facets
    // for all non-deleted docs in the index); normally
    // you'd use a "normal" query, and use MultiCollector to
    // wrap collecting the "normal" hits and also facets:
    searcher.search(new MatchAllDocsQuery(), c);


    // Retrieve & verify results:
    List<FacetResult> results = c.getFacetResults();
    assertEquals(2, results.size());
    assertEquals("Publish Date (0)\n  2012 (2)\n  2010 (2)\n  1999 (1)\n",
        FacetTestUtils.toSimpleString(results.get(0)));
    assertEquals("Author (0)\n  Lisa (2)\n  Frank (1)\n  Susan (1)\n  Bob (1)\n",
        FacetTestUtils.toSimpleString(results.get(1)));


    
    // Now user drills down on Publish Date/2010:
    fsp = new FacetSearchParams(new CountFacetRequest(new CategoryPath("Author"), 10));
    DrillDownQuery q2 = new DrillDownQuery(fsp.indexingParams, new MatchAllDocsQuery());
    q2.add(new CategoryPath("Publish Date/2010", '/'));
    c = FacetsCollector.create(fsp, searcher.getIndexReader(), taxoReader);
    searcher.search(q2, c);
    results = c.getFacetResults();
    assertEquals(1, results.size());
    assertEquals("Author (0)\n  Lisa (1)\n  Bob (1)\n",
        FacetTestUtils.toSimpleString(results.get(0)));


    // Smoke test PrintTaxonomyStats:
    ByteArrayOutputStream bos = new ByteArrayOutputStream();
    PrintTaxonomyStats.printStats(taxoReader, new PrintStream(bos, false, "UTF-8"), true);
    String result = bos.toString("UTF-8");
    assertTrue(result.indexOf("/Author: 4 immediate children; 5 total categories") != -1);
    assertTrue(result.indexOf("/Publish Date: 3 immediate children; 12 total categories") != -1);
    // Make sure at least a few nodes of the tree came out:
    assertTrue(result.indexOf("  /1999") != -1);
    assertTrue(result.indexOf("  /2012") != -1);
    assertTrue(result.indexOf("      /20") != -1);


    taxoReader.close();
    searcher.getIndexReader().close();
    dir.close();
    taxoDir.close();
  }


  public void testReallyNoNormsForDrillDown() throws Exception {
    Directory dir = newDirectory();
    Directory taxoDir = newDirectory();
    IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
    iwc.setSimilarity(new PerFieldSimilarityWrapper() {
        final Similarity sim = new DefaultSimilarity();


        @Override
        public Similarity get(String name) {
          assertEquals("field", name);
          return sim;
        }
      });
    RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc);
    TaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode.CREATE);
    FacetFields facetFields = new FacetFields(taxoWriter);      


    Document doc = new Document();
    doc.add(newTextField("field", "text", Field.Store.NO));
    facetFields.addFields(doc, Collections.singletonList(new CategoryPath("a/path", '/')));
    writer.addDocument(doc);
    writer.close();
    taxoWriter.close();
    dir.close();
    taxoDir.close();
  }


  public void testAllParents() throws Exception {
    Directory dir = newDirectory();
    Directory taxoDir = newDirectory();
    RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
    DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode.CREATE);


    CategoryListParams clp = new CategoryListParams("$facets") {
        @Override
        public OrdinalPolicy getOrdinalPolicy(String fieldName) {
          return OrdinalPolicy.ALL_PARENTS;
        }
      };
    FacetIndexingParams fip = new FacetIndexingParams(clp);


    FacetFields facetFields = new FacetFields(taxoWriter, fip);


    Document doc = new Document();
    doc.add(newTextField("field", "text", Field.Store.NO));
    facetFields.addFields(doc, Collections.singletonList(new CategoryPath("a/path", '/')));
    writer.addDocument(doc);


    // NRT open
    IndexSearcher searcher = newSearcher(writer.getReader());
    writer.close();


    // NRT open
    TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoWriter);
    taxoWriter.close();
    
    FacetSearchParams fsp = new FacetSearchParams(fip,
                                                  new CountFacetRequest(new CategoryPath("a", '/'), 10));


    // Aggregate the facet counts:
    FacetsCollector c = FacetsCollector.create(fsp, searcher.getIndexReader(), taxoReader);


    // MatchAllDocsQuery is for "browsing" (counts facets
    // for all non-deleted docs in the index); normally
    // you'd use a "normal" query, and use MultiCollector to
    // wrap collecting the "normal" hits and also facets:
    searcher.search(new MatchAllDocsQuery(), c);
    List<FacetResult> results = c.getFacetResults();
    assertEquals(1, results.size());
    assertEquals(1, (int) results.get(0).getFacetResultNode().value);


    // LUCENE-4913:
    for(FacetResultNode childNode : results.get(0).getFacetResultNode().subResults) {
      assertTrue(childNode.ordinal != 0);
    }


    searcher.getIndexReader().close();
    taxoReader.close();
    dir.close();
    taxoDir.close();
  }


  public void testLabelWithDelimiter() throws Exception {
    Directory dir = newDirectory();
    Directory taxoDir = newDirectory();
    RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
    DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode.CREATE);


    FacetFields facetFields = new FacetFields(taxoWriter);


    Document doc = new Document();
    doc.add(newTextField("field", "text", Field.Store.NO));
    BytesRef br = new BytesRef(new byte[] {(byte) 0xee, (byte) 0x92, (byte) 0xaa, (byte) 0xef, (byte) 0x9d, (byte) 0x89});
    facetFields.addFields(doc, Collections.singletonList(new CategoryPath("dim/" + br.utf8ToString(), '/')));
    try {
      writer.addDocument(doc);
    } catch (IllegalArgumentException iae) {
      // expected
    }
    writer.close();
    taxoWriter.close();
    dir.close();
    taxoDir.close();
  }
  
  // LUCENE-4583: make sure if we require > 32 KB for one
  // document, we don't hit exc when using Facet42DocValuesFormat
  public void testManyFacetsInOneDocument() throws Exception {
    Directory dir = newDirectory();
    Directory taxoDir = newDirectory();
    IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
    iwc.setCodec(new Facet45Codec());
    RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc);
    DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode.CREATE);


    FacetFields facetFields = new FacetFields(taxoWriter);


    int numLabels = _TestUtil.nextInt(random(), 40000, 100000);


    Document doc = new Document();
    doc.add(newTextField("field", "text", Field.Store.NO));
    List<CategoryPath> paths = new ArrayList<CategoryPath>();
    for(int i=0;i<numLabels;i++) {
      paths.add(new CategoryPath("dim", "" + i));
    }
    facetFields.addFields(doc, paths);
    writer.addDocument(doc);


    // NRT open
    IndexSearcher searcher = newSearcher(writer.getReader());
    writer.close();


    // NRT open
    TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoWriter);
    taxoWriter.close();
    
    FacetSearchParams fsp = new FacetSearchParams(new CountFacetRequest(new CategoryPath("dim"), Integer.MAX_VALUE));


    // Aggregate the facet counts:
    FacetsCollector c = FacetsCollector.create(fsp, searcher.getIndexReader(), taxoReader);


    // MatchAllDocsQuery is for "browsing" (counts facets
    // for all non-deleted docs in the index); normally
    // you'd use a "normal" query, and use MultiCollector to
    // wrap collecting the "normal" hits and also facets:
    searcher.search(new MatchAllDocsQuery(), c);
    List<FacetResult> results = c.getFacetResults();
    assertEquals(1, results.size());
    FacetResultNode root = results.get(0).getFacetResultNode();
    assertEquals(numLabels, root.subResults.size());
    Set<String> allLabels = new HashSet<String>();
    for(FacetResultNode childNode : root.subResults) {
      assertEquals(2, childNode.label.length);
      allLabels.add(childNode.label.components[1]);
      assertEquals(1, (int) childNode.value);
    }
    assertEquals(numLabels, allLabels.size());


    IOUtils.close(searcher.getIndexReader(), taxoReader, dir, taxoDir);
  }
}
Source Code of org.apache.lucene.facet.search.TestDemoFacets

Related Classes of org.apache.lucene.facet.search.TestDemoFacets