Package net.sf.katta.integrationTest.lib.lucene

Source Code of net.sf.katta.integrationTest.lib.lucene.LuceneClientTest

/**
* Copyright 2008 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package net.sf.katta.integrationTest.lib.lucene;

import java.io.File;
import java.util.List;
import java.util.Set;

import net.sf.katta.client.DeployClient;
import net.sf.katta.client.IDeployClient;
import net.sf.katta.client.IndexState;
import net.sf.katta.integrationTest.support.AbstractIntegrationTest;
import net.sf.katta.lib.lucene.DocumentFrequencyWritable;
import net.sf.katta.lib.lucene.Hit;
import net.sf.katta.lib.lucene.Hits;
import net.sf.katta.lib.lucene.ILuceneClient;
import net.sf.katta.lib.lucene.LuceneClient;
import net.sf.katta.node.Node;
import net.sf.katta.testutil.TestResources;
import net.sf.katta.testutil.TestUtil;
import net.sf.katta.util.KattaException;

import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.MapWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.log4j.Logger;
import org.apache.lucene.analysis.KeywordAnalyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.NumericField;
import org.apache.lucene.document.Field.Index;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriter.MaxFieldLength;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.junit.Test;

import static org.junit.Assert.assertArrayEquals;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertNull;
import static org.junit.Assert.assertThat;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;

import static org.hamcrest.CoreMatchers.instanceOf;

/**
* Test for {@link LuceneClient}.
*/
public class LuceneClientTest extends AbstractIntegrationTest {

  private static Logger LOG = Logger.getLogger(LuceneClientTest.class);

  private static final String INDEX1 = "index1";
  private static final String INDEX2 = "index2";
  private static final String INDEX3 = "index3";

  public LuceneClientTest() {
    super(2);
  }

  @Test
  public void testAddRemoveIndices() throws Exception {
    ILuceneClient client = new LuceneClient(_protocol);
    IDeployClient deployClient = new DeployClient(_protocol);

    int listenerCountBeforeDeploys = _protocol.getRegisteredListenerCount();
    deployClient.addIndex("newIndex1", INDEX_FILE.getAbsolutePath(), 1).joinDeployment();
    deployClient.addIndex("newIndex2", INDEX_FILE.getAbsolutePath(), 1).joinDeployment();
    deployClient.addIndex("newIndex3", INDEX_FILE.getAbsolutePath(), 1).joinDeployment();
    final Query query = new QueryParser(Version.LUCENE_30, "", new KeywordAnalyzer()).parse("content: the");
    client.search(query, new String[] { "newIndex1" }, 10);

    deployClient.removeIndex("newIndex1");
    deployClient.removeIndex("newIndex2");
    deployClient.removeIndex("newIndex3");
    Thread.sleep(2000);
    assertEquals(listenerCountBeforeDeploys, _protocol.getRegisteredListenerCount());
  }

  @Test
  public void testInstantiateClientBeforeIndex() throws Exception {
    ILuceneClient client = new LuceneClient(_miniCluster.getZkConfiguration());
    deployTestIndices(1, getNodeCount());
    List<Node> nodes = _miniCluster.getNodes();
    for (Node node : nodes) {
      TestUtil.waitUntilNodeServesShards(_protocol, node.getName(), SHARD_COUNT);
    }
    Thread.sleep(2000);

    final Query query = new QueryParser(Version.LUCENE_30, "", new KeywordAnalyzer()).parse("content: the");
    client.count(query, new String[] { INDEX_NAME });
    client.close();
  }

  @Test
  public void testCount() throws Exception {
    deployTestIndices(1, 1);
    ILuceneClient client = new LuceneClient(_miniCluster.getZkConfiguration());
    final Query query = new QueryParser(Version.LUCENE_30, "", new KeywordAnalyzer()).parse("content: the");
    final int count = client.count(query, new String[] { INDEX_NAME });
    assertEquals(937, count);
    client.close();
  }

  @Test
  public void testGetDetails() throws Exception {
    deployTestIndices(1, 1);
    ILuceneClient client = new LuceneClient(_miniCluster.getZkConfiguration());
    final Query query = new QueryParser(Version.LUCENE_30, "", new KeywordAnalyzer()).parse("content: the");
    final Hits hits = client.search(query, new String[] { INDEX_NAME }, 10);
    assertNotNull(hits);
    assertEquals(10, hits.getHits().size());
    for (final Hit hit : hits.getHits()) {
      final MapWritable details = client.getDetails(hit);
      final Set<Writable> keySet = details.keySet();
      assertFalse(keySet.isEmpty());
      assertNotNull(details.get(new Text("path")));
      assertNotNull(details.get(new Text("category")));
    }
    client.close();
  }

  @Test
  public void testGetDetailsWithFieldNames() throws Exception {
    deployTestIndices(1, 1);
    ILuceneClient client = new LuceneClient(_miniCluster.getZkConfiguration());
    final Query query = new QueryParser(Version.LUCENE_30, "", new KeywordAnalyzer()).parse("content: the");
    final Hits hits = client.search(query, new String[] { INDEX_NAME }, 10);
    assertNotNull(hits);
    assertEquals(10, hits.getHits().size());
    for (final Hit hit : hits.getHits()) {
      final MapWritable details = client.getDetails(hit, new String[] { "path" });
      final Set<Writable> keySet = details.keySet();
      assertFalse(keySet.isEmpty());
      assertNotNull(details.get(new Text("path")));
      assertNull(details.get(new Text("category")));
    }
    client.close();
  }

  @Test
  public void testGetBinaryDetails() throws Exception {
    File index = _temporaryFolder.newFolder("indexWithBinaryData");
    String textFieldName = "textField";
    String binaryFieldName = "binaryField";
    String textFieldContent = "sample text";
    byte[] bytesFieldContent = new byte[] { 1, 2, 3 };

    IndexWriter indexWriter = new IndexWriter(FSDirectory.open(index), new StandardAnalyzer(Version.LUCENE_30), true,
            MaxFieldLength.UNLIMITED);
    Document document = new Document();
    document.add(new Field(binaryFieldName, bytesFieldContent, Store.YES));
    document.add(new Field(textFieldName, textFieldContent, Store.NO, Index.ANALYZED));
    indexWriter.addDocument(document);
    indexWriter.optimize();
    indexWriter.close();
    DeployClient deployClient = new DeployClient(_miniCluster.getProtocol());
    IndexState indexState = deployClient.addIndex(index.getName(), index.getParentFile().getAbsolutePath(), 1)
            .joinDeployment();
    assertEquals(IndexState.DEPLOYED, indexState);

    ILuceneClient client = new LuceneClient(_miniCluster.getZkConfiguration());
    final Query query = new QueryParser(Version.LUCENE_30, "", new KeywordAnalyzer()).parse(textFieldName + ": "
            + textFieldContent);
    final Hits hits = client.search(query, new String[] { index.getName() }, 10);
    assertNotNull(hits);
    assertEquals(1, hits.getHits().size());
    final Hit hit = hits.getHits().get(0);
    final MapWritable details = client.getDetails(hit);
    final Set<Writable> keySet = details.keySet();
    assertEquals(1, keySet.size());
    final Writable writable = details.get(new Text(binaryFieldName));
    assertNotNull(writable);
    assertThat(writable, instanceOf(BytesWritable.class));
    BytesWritable bytesWritable = (BytesWritable) writable;
    bytesWritable.setCapacity(bytesWritable.getLength());// getBytes() returns
    // the full array
    assertArrayEquals(bytesFieldContent, bytesWritable.getBytes());
    client.close();
  }

  @Test
  public void testGetDetailsConcurrently() throws KattaException, ParseException, InterruptedException {
    deployTestIndices(1, 1);
    ILuceneClient client = new LuceneClient(_miniCluster.getZkConfiguration());
    final Query query = new QueryParser(Version.LUCENE_30, "", new KeywordAnalyzer()).parse("content: the");
    final Hits hits = client.search(query, new String[] { INDEX_NAME }, 10);
    assertNotNull(hits);
    assertEquals(10, hits.getHits().size());
    List<MapWritable> detailList = client.getDetails(hits.getHits());
    assertEquals(hits.getHits().size(), detailList.size());
    for (int i = 0; i < detailList.size(); i++) {
      final MapWritable details1 = client.getDetails(hits.getHits().get(i));
      final MapWritable details2 = detailList.get(i);
      assertEquals(details1.entrySet(), details2.entrySet());
      final Set<Writable> keySet = details2.keySet();
      assertFalse(keySet.isEmpty());
      final Writable writable = details2.get(new Text("path"));
      assertNotNull(writable);
    }
    client.close();
  }

  @Test
  public void testSearch() throws Exception {
    deploy3Indices();
    ILuceneClient client = new LuceneClient(_miniCluster.getZkConfiguration());
    final Query query = new QueryParser(Version.LUCENE_30, "", new KeywordAnalyzer()).parse("foo: bar");
    final Hits hits = client.search(query, new String[] { INDEX3, INDEX2 });
    assertNotNull(hits);
    for (final Hit hit : hits.getHits()) {
      writeToLog(hit);
    }
    assertEquals(8, hits.size());
    assertEquals(8, hits.getHits().size());
    client.close();
  }

  @Test
  public void testFieldSortWithNoResultShard() throws Exception {
    File sortIndex1 = _temporaryFolder.newFolder("sortIndex1");
    File sortIndex2 = _temporaryFolder.newFolder("sortIndex2");
    IndexWriter indexWriter1 = new IndexWriter(FSDirectory.open(sortIndex1), new StandardAnalyzer(Version.LUCENE_30),
            true, MaxFieldLength.UNLIMITED);
    IndexWriter indexWriter2 = new IndexWriter(FSDirectory.open(sortIndex2), new StandardAnalyzer(Version.LUCENE_30),
            true, MaxFieldLength.UNLIMITED);

    Document document = new Document();
    document.add(new Field("text", "abc", Field.Store.YES, Index.NOT_ANALYZED));
    document.add(new NumericField("timesort", Field.Store.YES, false).setLongValue(1234567890123l));
    indexWriter1.addDocument(document);
    indexWriter1.close();

    document = new Document();
    document.add(new Field("text", "abc2", Field.Store.YES, Index.NOT_ANALYZED));
    document.add(new NumericField("timesort", Field.Store.YES, false).setLongValue(1234567890123l));
    indexWriter2.addDocument(document);
    indexWriter2.close();

    DeployClient deployClient = new DeployClient(_miniCluster.getProtocol());
    String indexName = "sortIndex";
    IndexState indexState = deployClient.addIndex(indexName, sortIndex1.getParentFile().getAbsolutePath(), 1)
            .joinDeployment();
    assertEquals(IndexState.DEPLOYED, indexState);

    // query and compare results
    ILuceneClient client = new LuceneClient(_miniCluster.getZkConfiguration());
    Sort sort = new Sort(new SortField[] { new SortField("timesort", SortField.LONG) });

    // query both documents
    Query query = new QueryParser(Version.LUCENE_30, "", new KeywordAnalyzer()).parse("text:ab*");
    Hits hits = client.search(query, new String[] { indexName }, 20, sort);
    assertEquals(2, hits.size());

    // query only one document
    query = new QueryParser(Version.LUCENE_30, "", new KeywordAnalyzer()).parse("text:abc2");
    hits = client.search(query, new String[] { indexName }, 20, sort);
    assertEquals(1, hits.size());

    // query only one document on one node
    _miniCluster.shutdownNode(0);
    TestUtil.waitUntilIndexBalanced(_protocol, indexName);
    query = new QueryParser(Version.LUCENE_30, "", new KeywordAnalyzer()).parse("text:abc2");
    hits = client.search(query, new String[] { indexName }, 20, sort);
    assertEquals(1, hits.size());
    client.close();
  }

  @SuppressWarnings("unchecked")
  @Test
  public void testSortedSearch() throws Exception {
    // write and deploy test index
    File sortIndex = _temporaryFolder.newFolder("sortIndex2");
    String queryTerm = "2";
    String sortFieldName = "sortField";
    String textFieldName = "textField";
    IndexWriter indexWriter = new IndexWriter(FSDirectory.open(sortIndex), new StandardAnalyzer(Version.LUCENE_30),
            true, MaxFieldLength.UNLIMITED);
    for (int i = 0; i < 20; i++) {
      Document document = new Document();
      document.add(new Field(sortFieldName, "" + i, Store.NO, Index.NOT_ANALYZED));
      String textField = "sample text";
      if (i % 2 == 0) {// produce some different scores
        for (int j = 0; j < i; j++) {
          textField += " " + queryTerm;
        }
      }
      document.add(new Field(textFieldName, textField, Store.NO, Index.ANALYZED));
      indexWriter.addDocument(document);
    }
    indexWriter.optimize();
    indexWriter.close();
    DeployClient deployClient = new DeployClient(_miniCluster.getProtocol());
    IndexState indexState = deployClient.addIndex(sortIndex.getName(), sortIndex.getParentFile().getAbsolutePath(), 1)
            .joinDeployment();
    assertEquals(IndexState.DEPLOYED, indexState);

    // query and compare results
    ILuceneClient client = new LuceneClient(_miniCluster.getZkConfiguration());
    final Query query = new QueryParser(Version.LUCENE_30, "", new KeywordAnalyzer()).parse(textFieldName + ": "
            + queryTerm);
    Sort sort = new Sort(new SortField[] { new SortField("sortField", SortField.INT) });
    final Hits hits = client.search(query, new String[] { sortIndex.getName() }, 20, sort);
    assertNotNull(hits);
    List<Hit> hitsList = hits.getHits();
    for (final Hit hit : hitsList) {
      writeToLog(hit);
    }
    assertEquals(9, hits.size());
    assertEquals(9, hitsList.size());
    assertEquals(1, hitsList.get(0).getSortFields().length);
    for (int i = 0; i < hitsList.size() - 1; i++) {
      int compareTo = hitsList.get(i).getSortFields()[0].compareTo(hitsList.get(i + 1).getSortFields()[0]);
      assertTrue("results not after field", compareTo == 0 || compareTo == -1);
    }
    client.close();
  }

  @Test
  public void testSearchLimit() throws Exception {
    deploy3Indices();
    ILuceneClient client = new LuceneClient(_miniCluster.getZkConfiguration());
    final Query query = new QueryParser(Version.LUCENE_30, "", new KeywordAnalyzer()).parse("foo: bar");
    final Hits hits = client.search(query, new String[] { INDEX3, INDEX2 }, 1);
    assertNotNull(hits);
    for (final Hit hit : hits.getHits()) {
      writeToLog(hit);
    }
    assertEquals(8, hits.size());
    assertEquals(1, hits.getHits().size());
    for (final Hit hit : hits.getHits()) {
      LOG.info(hit.getNode() + " -- " + hit.getScore() + " -- " + hit.getDocId());
    }
    client.close();
  }

  @Test
  public void testKatta20SearchLimitMaxNumberOfHits() throws Exception {
    deployTestIndices(1, getNodeCount());
    final Query query = new QueryParser(Version.LUCENE_30, "", new KeywordAnalyzer()).parse("foo: bar");
    ILuceneClient client = new LuceneClient(_miniCluster.getZkConfiguration());
    final Hits expectedHits = client.search(query, new String[] { INDEX_NAME }, 4);
    assertNotNull(expectedHits);
    LOG.info("Expected hits:");
    for (final Hit hit : expectedHits.getHits()) {
      writeToLog(hit);
    }
    assertEquals(4, expectedHits.getHits().size());

    for (int i = 0; i < 100; i++) {
      // Now we redo the search, but limit the max number of hits. We expect the
      // same ordering of hits.
      for (int maxHits = 1; maxHits < expectedHits.size() + 1; maxHits++) {
        final Hits hits = client.search(query, new String[] { INDEX_NAME }, maxHits);
        assertNotNull(hits);
        assertEquals(maxHits, hits.getHits().size());
        for (int j = 0; j < hits.getHits().size(); j++) {
          // writeToLog("expected: ", expectedHits.getHits().get(j));
          // writeToLog("actual : ", hits.getHits().get(j));
          assertEquals(expectedHits.getHits().get(j).getScore(), hits.getHits().get(j).getScore(), 0.0);
        }
      }
    }
    client.close();
  }

  @Test
  public void testSearchSimiliarity() throws Exception {
    deploy3Indices();
    ILuceneClient client = new LuceneClient(_miniCluster.getZkConfiguration());
    final Query query = new QueryParser(Version.LUCENE_30, "", new KeywordAnalyzer()).parse("foo: bar");
    final Hits hits = client.search(query, new String[] { INDEX2 });
    assertNotNull(hits);
    assertEquals(4, hits.getHits().size());
    for (final Hit hit : hits.getHits()) {
      LOG.info(hit.getNode() + " -- " + hit.getScore() + " -- " + hit.getDocId());
    }
    client.close();
  }

  @Test
  public void testNonExistentShard() throws Exception {
    final Query query = new QueryParser(Version.LUCENE_30, "", new KeywordAnalyzer()).parse("foo: bar");
    ILuceneClient client = new LuceneClient(_miniCluster.getZkConfiguration());
    try {
      client.search(query, new String[] { "doesNotExist" });
      fail("Should have failed.");
    } catch (KattaException e) {
      assertEquals("Index [pattern(s)] '[doesNotExist]' do not match to any deployed index: []", e.getMessage());
    }
    client.close();
  }

  @Test
  public void testIndexPattern() throws Exception {
    deploy3Indices();
    ILuceneClient client = new LuceneClient(_miniCluster.getZkConfiguration());
    final Query query = new QueryParser(Version.LUCENE_30, "", new KeywordAnalyzer()).parse("foo: bar");
    final Hits hits = client.search(query, new String[] { "index[2-3]+" });
    assertNotNull(hits);
    for (final Hit hit : hits.getHits()) {
      writeToLog(hit);
    }
    assertEquals(8, hits.size());
    assertEquals(8, hits.getHits().size());
    client.close();
  }

  @Test
  public void testNumDocGreaterMaxInteger_KATTA_140() throws Exception {
    deployTestIndices(1, 1);
    LuceneClient client = new LuceneClient(_miniCluster.getZkConfiguration()) {
      @Override
      protected DocumentFrequencyWritable getDocFrequencies(Query q, String[] indexNames) throws KattaException {
        DocumentFrequencyWritable docFreq = new DocumentFrequencyWritable();
        docFreq.put("foo", "bar", 23);
        docFreq.addNumDocs(Integer.MAX_VALUE);
        docFreq.addNumDocs(23);
        // docFreq.
        return docFreq;
      }
    };
    final Query query = new QueryParser(Version.LUCENE_30, "", new KeywordAnalyzer()).parse("foo: bar");
    client.search(query, new String[] { INDEX_NAME }, 10, null);
    // client.search(query, new String[] { INDEX_NAME }, 10, new Sort(new
    // SortField("foo", SortField.STRING)));
    client.close();
  }

  private void writeToLog(Hit hit) {
    LOG.info(hit.getNode() + " -- " + hit.getShard() + " -- " + hit.getScore() + " -- " + hit.getDocId());
  }

  private void deploy3Indices() throws Exception {
    DeployClient deployClient = new DeployClient(_miniCluster.getProtocol());
    deployClient.addIndex(INDEX1, TestResources.INDEX1.getAbsolutePath(), 1).joinDeployment();
    deployClient.addIndex(INDEX2, TestResources.INDEX1.getAbsolutePath(), 1).joinDeployment();
    deployClient.addIndex(INDEX3, TestResources.INDEX1.getAbsolutePath(), 1).joinDeployment();
  }

}
TOP

Related Classes of net.sf.katta.integrationTest.lib.lucene.LuceneClientTest

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.