Package org.apache.lucene.facet

Source Code of org.apache.lucene.facet.FacetTestBase

package org.apache.lucene.facet;

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Index;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.Field.TermVector;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermDocs;
import org.apache.lucene.index.TermEnum;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.store.Directory;

import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util._TestUtil;
import org.apache.lucene.facet.index.CategoryDocumentBuilder;
import org.apache.lucene.facet.index.params.CategoryListParams;
import org.apache.lucene.facet.index.params.DefaultFacetIndexingParams;
import org.apache.lucene.facet.index.params.FacetIndexingParams;
import org.apache.lucene.facet.search.params.FacetRequest;
import org.apache.lucene.facet.search.params.FacetSearchParams;
import org.apache.lucene.facet.search.results.FacetResult;
import org.apache.lucene.facet.search.results.FacetResultNode;
import org.apache.lucene.facet.taxonomy.CategoryPath;
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
import org.apache.lucene.facet.taxonomy.TaxonomyWriter;
import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader;
import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter;
import org.junit.AfterClass;
import org.junit.BeforeClass;

/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements.  See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

/** Base faceted search test. */
public abstract class FacetTestBase extends LuceneTestCase {
 
  /** Holds a search and taxonomy Directories pair. */
  private static final class SearchTaxoDirPair {
    Directory searchDir, taxoDir;
    SearchTaxoDirPair() {}
  }
 
  private static HashMap<Integer, SearchTaxoDirPair> dirsPerPartitionSize;
  private static File TEST_DIR;
 
  /** Documents text field. */
  protected static final String CONTENT_FIELD = "content";
 
  /** taxonomy Reader for the test. */
  protected TaxonomyReader taxoReader;
 
  /** Index Reader for the test. */
  protected IndexReader indexReader;
 
  /** Searcher for the test. */
  protected IndexSearcher searcher;
 
  @BeforeClass
  public static void beforeClassFacetTestBase() throws Exception {
    TEST_DIR = _TestUtil.getTempDir("facets");
    dirsPerPartitionSize = new HashMap<Integer, FacetTestBase.SearchTaxoDirPair>();
  }
 
  @AfterClass
  public static void afterClassFacetTestBase() throws Exception {
    for (SearchTaxoDirPair pair : dirsPerPartitionSize.values()) {
      IOUtils.close(pair.searchDir, pair.taxoDir);
    }
  }
 
  /** documents text (for the text field). */
  private static final String[] DEFAULT_CONTENT = {
      "the white car is the one I want.",
      "the white dog does not belong to anyone.",
  };
 
  /** Facets: facets[D][F] == category-path no. F for document no. D. */
  private static final CategoryPath[][] DEFAULT_CATEGORIES = {
      { new CategoryPath("root","a","f1"), new CategoryPath("root","a","f2") },
      { new CategoryPath("root","a","f1"), new CategoryPath("root","a","f3") },
  };
 
  /** categories to be added to specified doc */
  protected List<CategoryPath> getCategories(int doc) {
    return Arrays.asList(DEFAULT_CATEGORIES[doc]);
  }
 
  /** Number of documents to index */
  protected int numDocsToIndex() {
    return DEFAULT_CONTENT.length;
  }
 
  /** content to be added to specified doc */
  protected String getContent(int doc) {
    return DEFAULT_CONTENT[doc];
  }
 
  /** Prepare index (in RAM) with single partition */
  protected final void initIndex() throws Exception {
    initIndex(Integer.MAX_VALUE);
  }
 
  /** Prepare index (in RAM) with some documents and some facets */
  protected final void initIndex(int partitionSize) throws Exception {
    initIndex(partitionSize, false);
  }

  /** Prepare index (in RAM/Disk) with some documents and some facets */
  protected final void initIndex(int partitionSize, boolean forceDisk) throws Exception {
    if (VERBOSE) {
      System.out.println("Partition Size: " + partitionSize+"  forceDisk: "+forceDisk);
    }

    SearchTaxoDirPair pair = dirsPerPartitionSize.get(Integer.valueOf(partitionSize));
    if (pair == null) {
      pair = new SearchTaxoDirPair();
      if (forceDisk) {
        pair.searchDir = newFSDirectory(new File(TEST_DIR, "index"));
        pair.taxoDir = newFSDirectory(new File(TEST_DIR, "taxo"));
      } else {
        pair.searchDir = newDirectory();
        pair.taxoDir = newDirectory();
      }
     
      RandomIndexWriter iw = new RandomIndexWriter(random, pair.searchDir, getIndexWriterConfig(getAnalyzer()));
      TaxonomyWriter taxo = new DirectoryTaxonomyWriter(pair.taxoDir, OpenMode.CREATE);
     
      populateIndex(iw, taxo, getFacetIndexingParams(partitionSize));
     
      // commit changes (taxonomy prior to search index for consistency)
      taxo.commit();
      iw.commit();
      taxo.close();
      iw.close();
     
      dirsPerPartitionSize.put(Integer.valueOf(partitionSize), pair);
    }
   
    // prepare for searching
    taxoReader = new DirectoryTaxonomyReader(pair.taxoDir);
    indexReader = IndexReader.open(pair.searchDir);
    searcher = newSearcher(indexReader);
  }
 
  /** Returns indexing params for the main index */
  protected IndexWriterConfig getIndexWriterConfig(Analyzer analyzer) {
    return newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
  }

  /** Returns a default facet indexing params */
  protected FacetIndexingParams getFacetIndexingParams(final int partSize) {
    return new DefaultFacetIndexingParams() {
      @Override
      protected int fixedPartitionSize() {
        return partSize;
      }
    };
  }
 
  /**
   * Faceted Search Params for the test.
   * Sub classes should override in order to test with different faceted search params.
   */
  protected FacetSearchParams getFacetedSearchParams() {
    return getFacetedSearchParams(Integer.MAX_VALUE);
  }

  /**
   * Faceted Search Params with specified partition size.
   * @see #getFacetedSearchParams()
   */
  protected FacetSearchParams getFacetedSearchParams(int partitionSize) {
    FacetSearchParams res = new FacetSearchParams(getFacetIndexingParams(partitionSize));
    return res;
  }

  /**
   * Populate the test index+taxonomy for this test.
   * <p>Subclasses can override this to test different scenarios
   */
  protected void populateIndex(RandomIndexWriter iw, TaxonomyWriter taxo, FacetIndexingParams iParams)
      throws IOException, CorruptIndexException {
    // add test documents
    int numDocsToIndex = numDocsToIndex();
    for (int doc=0; doc<numDocsToIndex; doc++) {
      indexDoc(iParams, iw, taxo, getContent(doc), getCategories(doc));
    }
   
    // also add a document that would be deleted, so that all tests are also working against deletions in the index
    String content4del = "ContentOfDocToDelete";
    indexDoc(iParams, iw, taxo, content4del, getCategories(0));
    iw.commit(); // commit it
    iw.deleteDocuments(new Term(CONTENT_FIELD,content4del)); // now delete the committed doc
  }
 
  /** Close all indexes */
  protected void closeAll() throws Exception {
    // close and nullify everything
    IOUtils.close(taxoReader, indexReader, searcher);
    taxoReader = null;
    indexReader = null;
    searcher = null;
  }
 
  /**
   * Analyzer to use for the test.
   * Sub classes should override in order to test with different analyzer.
   */
  protected Analyzer getAnalyzer() {
    return new MockAnalyzer(random, MockTokenizer.WHITESPACE, false);
  }
 
  /** convenience method: convert sub results to an array */ 
  protected static FacetResultNode[] resultNodesAsArray(FacetResultNode parentRes) {
    ArrayList<FacetResultNode> a = new ArrayList<FacetResultNode>();
    for (FacetResultNode frn : parentRes.getSubResults()) {
      a.add(frn);
    }
    return a.toArray(new FacetResultNode[0]);
  }
 
  /** utility Create a dummy document with specified categories and content */
  protected final void indexDoc(FacetIndexingParams iParams, RandomIndexWriter iw,
      TaxonomyWriter tw, String content, List<CategoryPath> categories) throws IOException,
      CorruptIndexException {
    Document d = new Document();
    CategoryDocumentBuilder builder = new CategoryDocumentBuilder(tw, iParams);
    builder.setCategoryPaths(categories);
    builder.build(d);
    d.add(new Field("content", content, Store.YES, Index.ANALYZED, TermVector.NO));
    iw.addDocument(d);
  }
 
  /** Build the "truth" with ALL the facets enumerating indexes content. */
  protected Map<CategoryPath, Integer> facetCountsTruth() throws IOException {
    FacetIndexingParams iParams = getFacetIndexingParams(Integer.MAX_VALUE);
    String delim = String.valueOf(iParams.getFacetDelimChar());
    Map<CategoryPath, Integer> res = new HashMap<CategoryPath, Integer>();
    HashSet<Term> handledTerms = new HashSet<Term>();
    for (CategoryListParams clp : iParams.getAllCategoryListParams()) {
      Term baseTerm = clp.getTerm().createTerm("");
      if (!handledTerms.add(baseTerm)) {
        continue; // already handled this term (for another list)
      }
      TermEnum te = indexReader.terms(baseTerm);
      while (te.next()) {
        Term t = te.term();
        if (!t.field().equals(baseTerm.field())) {
          break; // hit a different field
        }
        TermDocs tp = indexReader.termDocs(t);
        int cnt = 0;
        while (tp.next()) {
          if (!indexReader.isDeleted(tp.doc())) { // ignore deleted docs
            cnt++;
          }
        }
        res.put(new CategoryPath(t.text().split(delim)), cnt);
      }
    }
    return res;
  }
 
  /** Validate counts for returned facets, and that there are not too many results */
  protected static void assertCountsAndCardinality(Map<CategoryPath, Integer> facetCountsTruth, List<FacetResult> facetResults) throws Exception {
    for (FacetResult fr : facetResults) {
      FacetResultNode topResNode = fr.getFacetResultNode();
      FacetRequest freq = fr.getFacetRequest();
      if (VERBOSE) {
        System.out.println(freq.getCategoryPath().toString()+ "\t\t" + topResNode);
      }
      assertCountsAndCardinality(facetCountsTruth, topResNode, freq.getNumResults());
    }
  }
   
  /** Validate counts for returned facets, and that there are not too many results */
  private static void assertCountsAndCardinality(Map<CategoryPath,Integer> facetCountsTruth,  FacetResultNode resNode, int reqNumResults) throws Exception {
    int actualNumResults = resNode.getNumSubResults();
    if (VERBOSE) {
      System.out.println("NumResults: " + actualNumResults);
    }
    assertTrue("Too many results!", actualNumResults <= reqNumResults);
    for (FacetResultNode subRes : resNode.getSubResults()) {
      assertEquals("wrong count for: "+subRes, facetCountsTruth.get(subRes.getLabel()).intValue(), (int)subRes.getValue());
      assertCountsAndCardinality(facetCountsTruth, subRes, reqNumResults); // recurse into child results
    }
  }
 
  /** Validate results equality */
  protected static void assertSameResults(List<FacetResult> expected,
                                          List<FacetResult> actual) {
    String expectedResults = resStringValueOnly(expected);
    String actualResults = resStringValueOnly(actual);
    if (!expectedResults.equals(actualResults)) {
      System.err.println("Results are not the same!");
      System.err.println("Expected:\n" + expectedResults);
      System.err.println("Actual" + actualResults);
      throw new NotSameResultError();
    }
  }
 
  /** exclude the residue and numDecendants because it is incorrect in sampling */
  private static final String resStringValueOnly(List<FacetResult> results) {
    StringBuilder sb = new StringBuilder();
    for (FacetResult facetRes : results) {
      sb.append(facetRes.toString()).append('\n');
    }
    return sb.toString().replaceAll("Residue:.*.0", "").replaceAll("Num valid Descendants.*", "");
  }
 
  /** Special Error class for ability to ignore only this error and retry... */
  public static class NotSameResultError extends Error {
    public NotSameResultError() {
      super("Results are not the same!");
    }
  }
 
}
TOP

Related Classes of org.apache.lucene.facet.FacetTestBase

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.