Package org.apache.lucene.facet.taxonomy

Examples of org.apache.lucene.facet.taxonomy.TaxonomyWriter


    List<CategoryPath> categoryPaths = new ArrayList<CategoryPath>();
    categoryPaths.add(new CategoryPath("a", "b"));

    RandomIndexWriter indexWriter = new RandomIndexWriter(random, indexDir, newIndexWriterConfig(
        TEST_VERSION_CURRENT, new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)));
    TaxonomyWriter taxo = new LuceneTaxonomyWriter(taxoDir);

    // a category document builder will add the categories to a document
    // once build() is called
    Document doc = new Document();
    indexWriter.addDocument(new EnhancementsDocumentBuilder(taxo,
        indexingParams).setCategoryPaths(categoryPaths).build(doc));

    IndexReader indexReader = indexWriter.getReader();
    indexWriter.close();

    Term term = DrillDown.term(indexingParams, new CategoryPath("a","b"));
    EnhancementsPayloadIterator iterator = new EnhancementsPayloadIterator(
        indexingParams.getCategoryEnhancements(), indexReader, term);

    assertTrue("EnhancementsPayloadIterator failure", iterator.init());
    assertTrue("Missing document 0", iterator.setdoc(0));
    assertNull("Unexpected data for CategoryEnhancementDummy2", iterator
        .getCategoryData(new CategoryEnhancementDummy1()));
    byte[] dummy3 = (byte[]) iterator
        .getCategoryData(new CategoryEnhancementDummy3());
    assertTrue("Bad array returned for CategoryEnhancementDummy3", Arrays
        .equals(dummy3, CategoryEnhancementDummy3.CATEGORY_TOKEN_BYTES));
    indexReader.close();
    indexDir.close();
    taxo.close();
    taxoDir.close();
  }
View Full Code Here


    List<CategoryPath> categoryPaths = new ArrayList<CategoryPath>();
    categoryPaths.add(new CategoryPath("a", "b"));

    RandomIndexWriter indexWriter = new RandomIndexWriter(random, indexDir, newIndexWriterConfig(
        TEST_VERSION_CURRENT, new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)));
    TaxonomyWriter taxo = new LuceneTaxonomyWriter(taxoDir);

    // a category document builder will add the categories to a document
    // once build() is called
    Document doc = new Document();
    indexWriter.addDocument(new EnhancementsDocumentBuilder(taxo,
        indexingParams).setCategoryPaths(categoryPaths).build(doc));

    IndexReader indexReader = indexWriter.getReader();
    indexWriter.close();

    Term term = DrillDown.term(indexingParams, new CategoryPath("a","b"));
    EnhancementsPayloadIterator iterator = new EnhancementsPayloadIterator(
        indexingParams.getCategoryEnhancements(), indexReader, term);

    assertTrue("EnhancementsPayloadIterator failure", iterator.init());
    assertTrue("Missing document 0", iterator.setdoc(0));
    byte[] dummy2 = (byte[]) iterator
        .getCategoryData(new CategoryEnhancementDummy2());
    assertTrue("Bad array returned for CategoryEnhancementDummy2", Arrays
        .equals(dummy2, CategoryEnhancementDummy2.CATEGORY_TOKEN_BYTES));
    byte[] dummy3 = (byte[]) iterator
        .getCategoryData(new CategoryEnhancementDummy3());
    assertTrue("Bad array returned for CategoryEnhancementDummy3", Arrays
        .equals(dummy3, CategoryEnhancementDummy3.CATEGORY_TOKEN_BYTES));
    indexReader.close();
    taxo.close();
    indexDir.close();
    taxoDir.close();
  }
View Full Code Here

    // create and open an index writer
    IndexWriter iw = new IndexWriter(indexDir, new IndexWriterConfig(ExampleUtils.EXAMPLE_VER, SimpleUtils.analyzer));

    // create and open a taxonomy writer
    TaxonomyWriter taxo = new LuceneTaxonomyWriter(taxoDir, OpenMode.CREATE);

    // loop over sample documents
    int nDocsAdded = 0;
    int nFacetsAdded = 0;
    for (int docNum = 0; docNum < SimpleUtils.docTexts.length; docNum++) {
      ExampleUtils.log(" ++++ DOC ID: " + docNum);
      // obtain the sample categories for current document
      CategoryContainer categoryContainer = new CategoryContainer();
      for (CategoryPath path : SimpleUtils.categories[docNum]) {
        categoryContainer.addCategory(path);
        ExampleUtils.log("\t ++++ PATH: " + path);
      }
      // and also those with associations
      CategoryPath[] associationsPaths = AssociationUtils.categories[docNum];
      AssociationProperty[] associationProps = AssociationUtils.associations[docNum];
      for (int i = 0; i < associationsPaths.length; i++) {
        categoryContainer.addCategory(associationsPaths[i], associationProps[i]);
        ExampleUtils.log("\t $$$$ Association: ("
            + associationsPaths[i] + "," + associationProps[i]
            + ")");
      }

      // we do not alter indexing parameters!
      // a category document builder will add the categories to a document
      // once build() is called
      CategoryDocumentBuilder categoryDocBuilder = new EnhancementsDocumentBuilder(
          taxo, AssociationUtils.assocIndexingParams);
      categoryDocBuilder.setCategories(categoryContainer);

      // create a plain Lucene document and add some regular Lucene fields
      // to it
      Document doc = new Document();
      doc.add(new Field(SimpleUtils.TITLE, SimpleUtils.docTitles[docNum],
          Store.YES, Index.ANALYZED));
      doc.add(new Field(SimpleUtils.TEXT, SimpleUtils.docTexts[docNum],
          Store.NO, Index.ANALYZED));

      // invoke the category document builder for adding categories to the
      // document and,
      // as required, to the taxonomy index
      categoryDocBuilder.build(doc);

      // finally add the document to the index
      iw.addDocument(doc);

      nDocsAdded++;
      nFacetsAdded += categoryContainer.size();
    }

    // commit changes.
    // we commit changes to the taxonomy index prior to committing them to
    // the search index.
    // this is important, so that all facets referred to by documents in the
    // search index
    // will indeed exist in the taxonomy index.
    taxo.commit();
    iw.commit();

    // close the taxonomy index and the index - all modifications are
    // now safely in the provided directories: indexDir and taxoDir.
    taxo.close();
    iw.close();

    ExampleUtils.log("Indexed " + nDocsAdded + " documents with overall "
        + nFacetsAdded + " facets.");
  }
View Full Code Here

    // create and open an index writer
    IndexWriter iw = new IndexWriter(indexDir, new IndexWriterConfig(ExampleUtils.EXAMPLE_VER, SimpleUtils.analyzer));

    // create and open a taxonomy writer
    TaxonomyWriter taxo = new LuceneTaxonomyWriter(taxoDir, OpenMode.CREATE);

    // loop over  sample documents
    int nDocsAdded = 0;
    int nFacetsAdded = 0;
    for (int docNum=0; docNum<SimpleUtils.docTexts.length; docNum++) {

      // obtain the sample facets for current document
      List<CategoryPath> facetList = SimpleUtils.categoryPathArrayToList(SimpleUtils.categories[docNum]);

      // we do not alter indexing parameters! 
      // a category document builder will add the categories to a document once build() is called
      DocumentBuilder categoryDocBuilder = new CategoryDocumentBuilder(taxo).setCategoryPaths(facetList);

      // create a plain Lucene document and add some regular Lucene fields to it
      Document doc = new Document();
      doc.add(new Field(SimpleUtils.TITLE, SimpleUtils.docTitles[docNum], Store.YES, Index.ANALYZED));
      doc.add(new Field(SimpleUtils.TEXT, SimpleUtils.docTexts[docNum], Store.NO, Index.ANALYZED));

      // invoke the category document builder for adding categories to the document and,
      // as required, to the taxonomy index
      categoryDocBuilder.build(doc);

      // finally add the document to the index
      iw.addDocument(doc);

      nDocsAdded ++;
      nFacetsAdded += facetList.size();
    }

    // commit changes.
    // we commit changes to the taxonomy index prior to committing them to the search index.
    // this is important, so that all facets referred to by documents in the search index
    // will indeed exist in the taxonomy index.
    taxo.commit();
    iw.commit();

    // close the taxonomy index and the index - all modifications are
    // now safely in the provided directories: indexDir and taxoDir.
    taxo.close();
    iw.close();

    ExampleUtils.log("Indexed "+nDocsAdded+" documents with overall "+nFacetsAdded+" facets.");
  }
View Full Code Here

  }

  @Test
  public void testNonTopLevelOrdinalPolicy() throws Exception {
    Directory dir = newDirectory();
    TaxonomyWriter taxonomy = null;
    taxonomy = new LuceneTaxonomyWriter(dir);

    int[] topLevelOrdinals = new int[10];
    String[] topLevelStrings = new String[10];
    for (int i = 0; i < 10; i++) {
      topLevelStrings[i] = Integer.valueOf(random.nextInt(30)).toString();
      topLevelOrdinals[i] = taxonomy.addCategory(new CategoryPath(
          topLevelStrings[i]));
    }
    int[] nonTopLevelOrdinals = new int[300];
    for (int i = 0; i < 300; i++) {
      int nComponents = 2 + random.nextInt(10);
      String[] components = new String[nComponents];
      components[0] = topLevelStrings[i % 10];
      for (int j = 1; j < components.length; j++) {
        components[j] = (Integer.valueOf(random.nextInt(30))).toString();
      }
      nonTopLevelOrdinals[i] = taxonomy.addCategory(new CategoryPath(
          components));
    }
    // check ordinal policy
    OrdinalPolicy ordinalPolicy = new NonTopLevelOrdinalPolicy();
    ordinalPolicy.init(taxonomy);
    assertFalse("top level ordinal policy should not match root", ordinalPolicy
        .shouldAdd(TaxonomyReader.ROOT_ORDINAL));
    for (int i = 0; i < 10; i++) {
      assertFalse("top level ordinal policy should not match "
          + topLevelOrdinals[i],
          ordinalPolicy.shouldAdd(topLevelOrdinals[i]));
    }
    for (int i = 0; i < 300; i++) {
      assertTrue("top level ordinal policy should match "
          + nonTopLevelOrdinals[i],
          ordinalPolicy.shouldAdd(nonTopLevelOrdinals[i]));
    }

    // check illegal ordinal
    assertFalse("Should not add illegal ordinal", ordinalPolicy.shouldAdd(100000));
    taxonomy.close();
    dir.close();
  }
View Full Code Here

  }

  @Test
  public void testNonTopLevelPathPolicy() throws Exception {
    Directory dir = newDirectory();
    TaxonomyWriter taxonomy = null;
    taxonomy = new LuceneTaxonomyWriter(dir);

    CategoryPath[] topLevelPaths = new CategoryPath[10];
    String[] topLevelStrings = new String[10];
    for (int i = 0; i < 10; i++) {
      topLevelStrings[i] = Integer.valueOf(random.nextInt(30)).toString();

      topLevelPaths[i] = new CategoryPath(topLevelStrings[i]);
      taxonomy.addCategory(topLevelPaths[i]);
    }
    CategoryPath[] nonTopLevelPaths = new CategoryPath[300];
    for (int i = 0; i < 300; i++) {
      int nComponents = 2 + random.nextInt(10);
      String[] components = new String[nComponents];
      components[0] = topLevelStrings[i % 10];
      for (int j = 1; j < components.length; j++) {
        components[j] = (Integer.valueOf(random.nextInt(30))).toString();
      }
      nonTopLevelPaths[i] = new CategoryPath(components);
      taxonomy.addCategory(nonTopLevelPaths[i]);
    }
    // check ordinal policy
    PathPolicy pathPolicy = new NonTopLevelPathPolicy();
    assertFalse("top level path policy should not match root",
        pathPolicy.shouldAdd(new CategoryPath()));
    for (int i = 0; i < 10; i++) {
      assertFalse("top level path policy should not match "
          + topLevelPaths[i],
          pathPolicy.shouldAdd(topLevelPaths[i]));
    }
    for (int i = 0; i < 300; i++) {
      assertTrue("top level path policy should match "
          + nonTopLevelPaths[i],
          pathPolicy.shouldAdd(nonTopLevelPaths[i]));
    }
    taxonomy.close();
    dir.close();
  }
View Full Code Here

   * @throws IOException
   */
  @Test
  public void testTokensDefaultParams() throws IOException {
    Directory directory = newDirectory();
    TaxonomyWriter taxonomyWriter = new LuceneTaxonomyWriter(
        directory);
    DefaultFacetIndexingParams indexingParams = new DefaultFacetIndexingParams();
    CategoryTokenizer tokenizer = new CategoryTokenizer(
        new CategoryAttributesStream(categoryContainer),
        indexingParams);

    // count the number of tokens
    Set<String> categoryTerms = new HashSet<String>();
    for (int i = 0; i < initialCatgeories.length; i++) {
      categoryTerms.add(initialCatgeories[i]
          .toString(indexingParams.getFacetDelimChar()));
    }

    int nTokens;
    for (nTokens = 0; tokenizer.incrementToken(); nTokens++) {
      if (!categoryTerms.remove(tokenizer.termAttribute.toString())) {
        fail("Unexpected term: " + tokenizer.termAttribute.toString());
      }
    }
    assertTrue("all category terms should have been found", categoryTerms
        .isEmpty());

    // should be 6 - all categories and parents
    assertEquals("Wrong number of tokens", 3, nTokens);

    taxonomyWriter.close();
    directory.close();
  }
View Full Code Here

   * @throws IOException
   */
  @Test
  public void testLongCategoryPath() throws IOException {
    Directory directory = newDirectory();
    TaxonomyWriter taxonomyWriter = new LuceneTaxonomyWriter(
        directory);

    List<CategoryPath> longCategory = new ArrayList<CategoryPath>();
    longCategory.add(new CategoryPath("one", "two", "three", "four",
        "five", "six", "seven"));

    DefaultFacetIndexingParams indexingParams = new DefaultFacetIndexingParams();
    CategoryTokenizer tokenizer = new CategoryTokenizer(
        new CategoryAttributesStream(new CategoryAttributesIterable(
            longCategory)), indexingParams);

    // count the number of tokens
    String categoryTerm = longCategory.get(0).toString(
        indexingParams.getFacetDelimChar());

    assertTrue("Missing token", tokenizer.incrementToken());
    if (!categoryTerm.equals(tokenizer.termAttribute.toString())) {
      fail("Unexpected term: " + tokenizer.termAttribute.toString());
    }

    assertFalse("Unexpected token", tokenizer.incrementToken());

    taxonomyWriter.close();
    directory.close();
  }
View Full Code Here

    taxoDir = newDirectory();
    // preparations - index, taxonomy, content
    RandomIndexWriter writer = new RandomIndexWriter(random, dir, newIndexWriterConfig(TEST_VERSION_CURRENT,
        new MockAnalyzer(random, MockTokenizer.KEYWORD, false)));
   
    TaxonomyWriter taxoWriter = new LuceneTaxonomyWriter(taxoDir);
   
    EnhancementsDocumentBuilder builder = new EnhancementsDocumentBuilder(
        taxoWriter, new DefaultEnhancementsIndexingParams(
            new AssociationEnhancement()));
   
    // index documents, 50% have only 'b' and all have 'a'
    for (int i = 0; i < 100; i++) {
      Document doc = new Document();
      CategoryContainer container = new CategoryContainer();
      container.addCategory(aint, new AssociationIntProperty(2));
      container.addCategory(afloat, new AssociationFloatProperty(0.5f));
      if (i % 2 == 0) { // 50
        container.addCategory(bint, new AssociationIntProperty(3));
        container.addCategory(bfloat, new AssociationFloatProperty(0.2f));
      }
      builder.setCategories(container).build(doc);
      writer.addDocument(doc);
    }
   
    taxoWriter.close();
    reader = writer.getReader();
    writer.close();
  }
View Full Code Here

  private void populateIndex(FacetIndexingParams iParams, Directory indexDir,
      Directory taxoDir) throws Exception {
    RandomIndexWriter writer = new RandomIndexWriter(random, indexDir,
        newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random, MockTokenizer.KEYWORD, false)));
    TaxonomyWriter taxoWriter = new LuceneTaxonomyWriter(taxoDir);

    for (CategoryPath[] categories : perDocCategories) {
      writer.addDocument(new CategoryDocumentBuilder(taxoWriter, iParams)
          .setCategoryPaths(Arrays.asList(categories)).build(
              new Document()));

    }
    taxoWriter.commit();
    writer.commit();
    taxoWriter.close();
    writer.close();
  }
View Full Code Here

TOP

Related Classes of org.apache.lucene.facet.taxonomy.TaxonomyWriter

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.