/*
* Copyright 2012 NGDATA nv
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.lilyproject.indexer.batchbuild.test;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
import java.io.IOException;
import java.io.InputStream;
import java.util.Map;
import com.google.common.base.Charsets;
import com.google.common.base.Splitter;
import com.google.common.collect.Iterables;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.common.io.ByteStreams;
import com.ngdata.hbaseindexer.SolrConnectionParams;
import com.ngdata.hbaseindexer.model.api.IndexerDefinition;
import com.ngdata.hbaseindexer.model.api.IndexerDefinitionBuilder;
import com.ngdata.hbaseindexer.model.api.WriteableIndexerModel;
import org.apache.commons.io.IOUtils;
import org.apache.solr.client.solrj.SolrQuery;
import org.apache.solr.client.solrj.SolrQuery.ORDER;
import org.apache.solr.client.solrj.SolrServer;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.common.SolrDocumentList;
import org.apache.solr.common.SolrInputDocument;
import org.junit.AfterClass;
import org.junit.Before;
import org.junit.BeforeClass;
import org.junit.Ignore;
import org.junit.Test;
import org.lilyproject.client.LilyClient;
import org.lilyproject.indexer.derefmap.DependantRecordIdsIterator;
import org.lilyproject.indexer.derefmap.DerefMap;
import org.lilyproject.indexer.derefmap.DerefMapHbaseImpl;
import org.lilyproject.indexer.hbase.mapper.LilyIndexerComponentFactory;
import org.lilyproject.indexer.model.api.LResultToSolrMapper;
import org.lilyproject.lilyservertestfw.LilyProxy;
import org.lilyproject.lilyservertestfw.LilyServerProxy;
import org.lilyproject.repository.api.AbsoluteRecordId;
import org.lilyproject.repository.api.FieldType;
import org.lilyproject.repository.api.LRepository;
import org.lilyproject.repository.api.LTable;
import org.lilyproject.repository.api.Link;
import org.lilyproject.repository.api.QName;
import org.lilyproject.repository.api.Record;
import org.lilyproject.repository.api.RecordId;
import org.lilyproject.repository.api.RecordType;
import org.lilyproject.repository.api.SchemaId;
import org.lilyproject.repository.api.Scope;
import org.lilyproject.repository.api.TypeManager;
import org.lilyproject.solrtestfw.SolrProxy;
import org.lilyproject.util.hbase.LilyHBaseSchema.Table;
import org.lilyproject.util.io.Closer;
import org.lilyproject.util.repo.VersionTag;
public class BatchBuildTest {
private static LilyProxy lilyProxy;
private static LilyClient lilyClient;
private static LRepository repository;
private static LTable table;
private static TypeManager typeManager;
private static SolrServer solrServer;
private static SolrProxy solrProxy;
private static LilyServerProxy lilyServerProxy;
private static WriteableIndexerModel model;
private static int BUILD_TIMEOUT = 240000;
private FieldType ft1;
private FieldType ft2;
private RecordType rt1;
private final static String REPO_NAME = "batchtestrepo";
private final static String INDEX_NAME = "batchtest";
private static final String COUNTER_NUM_FAILED_RECORDS =
"org.lilyproject.indexer.batchbuild.IndexBatchBuildCounters:NUM_FAILED_RECORDS";
@BeforeClass
public static void setUpBeforeClass() throws Exception {
lilyProxy = new LilyProxy(null, null, null, true);
InputStream is = BatchBuildTest.class.getResourceAsStream("solrschema.xml");
byte[] solrSchema = IOUtils.toByteArray(is);
IOUtils.closeQuietly(is);
lilyProxy.start(solrSchema);
solrProxy = lilyProxy.getSolrProxy();
solrServer = solrProxy.getSolrServer();
lilyServerProxy = lilyProxy.getLilyServerProxy();
lilyServerProxy.createRepository(REPO_NAME);
lilyClient = lilyServerProxy.getClient();
repository = lilyClient.getRepository(REPO_NAME);
table = repository.getDefaultTable();
typeManager = repository.getTypeManager();
FieldType ft1 = typeManager.createFieldType("STRING", new QName("batchindex-test", "field1"),
Scope.NON_VERSIONED);
FieldType ft2 =
typeManager.createFieldType("LINK", new QName("batchindex-test", "linkField"), Scope.NON_VERSIONED);
typeManager.recordTypeBuilder()
.defaultNamespace("batchindex-test")
.name("rt1")
.fieldEntry().use(ft1).add()
.fieldEntry().use(ft2).add()
.create();
model = lilyServerProxy.getIndexerModel();
is = BatchBuildTest.class.getResourceAsStream("indexerconf.xml");
byte[] indexerConfiguration = ByteStreams.toByteArray(is);
Map<String, String> connectionParams = Maps.newHashMap();
connectionParams.put(SolrConnectionParams.ZOOKEEPER, "localhost:2181/solr");
connectionParams.put(SolrConnectionParams.COLLECTION, "core0");
connectionParams.put(LResultToSolrMapper.REPO_KEY, REPO_NAME);
connectionParams.put(LResultToSolrMapper.ZOOKEEPER_KEY, "localhost:2181");
IndexerDefinition index = new IndexerDefinitionBuilder()
.name(INDEX_NAME)
.connectionType("solr")
.connectionParams(connectionParams)
/*
Map<String, String> solrShards = new HashMap<String, String>();
solrShards.put("shard1", "http://localhost:8983/solr/core0");
index.setRepositoryName(REPO_NAME);
*/
.indexerComponentFactory(LilyIndexerComponentFactory.class.getName())
.configuration(indexerConfiguration)
.incrementalIndexingState(IndexerDefinition.IncrementalIndexingState.DO_NOT_SUBSCRIBE)
.build();
model.addIndexer(index);
}
@AfterClass
public static void tearDownAfterClass() throws Exception {
Closer.close(lilyClient);
Closer.close(solrServer);
Closer.close(solrProxy);
Closer.close(lilyServerProxy);
lilyProxy.stop();
}
@Before
public void setup() throws Exception {
this.ft1 = typeManager.getFieldTypeByName(new QName("batchindex-test", "field1"));
this.ft2 = typeManager.getFieldTypeByName(new QName("batchindex-test", "linkField"));
this.rt1 = typeManager.getRecordTypeByName(new QName("batchindex-test", "rt1"), null);
}
@Test
public void testBatchIndex() throws Exception {
String assertId = "batch-index-test";
//
// First create some content
//
table.recordBuilder()
.id(assertId)
.recordType(rt1.getName())
.field(ft1.getName(), "test1")
.create();
this.buildAndCommit();
QueryResponse response = solrServer.query(new SolrQuery("field1:test1*"));
assertEquals(1, response.getResults().size());
assertEquals("USER." + assertId, response.getResults().get(0).getFieldValue("lily.id"));
}
@Test
public void testClearIndex() throws Exception {
doTestClearIndex("clearIndex", true);
}
@Test
public void testNoClearIndex() throws Exception {
doTestClearIndex("dontClearIndex", false);
}
public void doTestClearIndex(String assertId, boolean clear) throws Exception {
String[] defaultConf = getBatchCliArgs(String.format("batchIndexCliArgs-testClearIndex-%s.txt", clear));
setBatchIndexConf(defaultConf, null, false);
SolrInputDocument extraDoc = new SolrInputDocument();
extraDoc.addField("field1", assertId + "extra");
extraDoc.addField("lily.id", "doesnotmatter");
extraDoc.addField("lily.key", "doesnotmatter2");
extraDoc.addField("lily.table", "record");
solrServer.add(extraDoc);
solrServer.commit();
//
// First create some content
//
table.recordBuilder()
.id(assertId)
.recordType(rt1.getName())
.field(ft1.getName(), assertId)
.create();
this.buildAndCommit();
QueryResponse response = solrServer.query(new SolrQuery("field1:" + assertId + "*"));
if (clear) {
assertEquals(1, response.getResults().size());
assertEquals("USER." + assertId, response.getResults().get(0).getFieldValue("lily.id"));
} else {
assertEquals(2, response.getResults().size());
}
}
private String[] getBatchCliArgs(String name) throws IOException {
String argString = new String(getResourceAsByteArray(name), Charsets.UTF_8);
return Iterables.toArray(Splitter.on(" ").trimResults().omitEmptyStrings().split(argString), String.class);
}
/**
* Test if the default batch index conf setting works
*/
@Test
public void testDefaultBatchIndexConf() throws Exception {
String[] defaultConf = getBatchCliArgs("defaultBatchIndexCliArgs-test2.txt");
setBatchIndexConf(defaultConf, null, false);
String assertId = "batch-index-test2";
//
// First create some content
//
table.recordBuilder()
.id(assertId)
.recordType(rt1.getName())
.field(ft1.getName(), "test2 index")
.create();
table.recordBuilder()
.id("batch-noindex-test2")
.recordType(rt1.getName())
.field(ft1.getName(), "test2 noindex")
.create();
// Now start the batch index
this.buildAndCommit();
// Check if 1 record and not 2 are in the index
QueryResponse response = solrServer.query(new SolrQuery("field1:test2*"));
assertEquals(1, response.getResults().size());
assertEquals("USER." + assertId, response.getResults().get(0).getFieldValue("lily.id"));
// check that the last used batch index conf = default
IndexerDefinition index = model.getIndexer(INDEX_NAME);
assertTrue(Lists.newArrayList(index.getLastBatchBuildInfo().getBatchIndexCliArguments())
.containsAll(Lists.newArrayList(defaultConf)));
}
/**
* Test setting a custom batch index conf.
*/
@Test
public void testCustomBatchIndexConf() throws Exception {
String[] defaultConf = getBatchCliArgs("defaultBatchIndexCliArgs-test2.txt");
setBatchIndexConf(defaultConf, null, false);
String assertId1 = "batch-index-custom-test3";
String assertId2 = "batch-index-test3";
//
// First create some content
//
Record recordToChange1 = table.recordBuilder()
.id(assertId2)
.recordType(rt1.getName())
.field(ft1.getName(), "test3 index run1")
.create();
Record recordToChange2 = table.recordBuilder()
.id(assertId1)
.recordType(rt1.getName())
.field(ft1.getName(), "test3 index run1")
.create();
table.recordBuilder()
.id("batch-noindex-test3")
.recordType(rt1.getName())
.field(ft1.getName(), "test3 noindex run1")
.create();
// Index everything with the default conf
this.buildAndCommit();
SolrDocumentList results = solrServer.query(new SolrQuery("field1:test3*").
addSortField("lily.id", ORDER.asc)).getResults();
assertEquals(2, results.size());
assertEquals("USER." + assertId1, results.get(0).getFieldValue("lily.id"));
assertEquals("USER." + assertId2, results.get(1).getFieldValue("lily.id"));
// change some fields and reindex using a specific configuration. Only one of the 2 changes should be picked up
recordToChange1.setField(ft1.getName(), "test3 index run2");
recordToChange2.setField(ft1.getName(), "test3 index run2");
table.update(recordToChange1);
table.update(recordToChange2);
String[] batchConf = getBatchCliArgs("batchIndexCliArgs-test3.txt");
setBatchIndexConf(defaultConf, batchConf, true);
waitForIndexAndCommit(BUILD_TIMEOUT);
// Check if 1 record and not 2 are in the index
QueryResponse response = solrServer.query(new SolrQuery("field1:test3\\ index\\ run2"));
assertEquals(1, response.getResults().size());
assertEquals("USER." + assertId1, response.getResults().get(0).getFieldValue("lily.id"));
// check that the last used batch index conf = default
assertTrue(Lists.newArrayList(model.getIndexer(INDEX_NAME).getLastBatchBuildInfo().getBatchIndexCliArguments())
.containsAll(Lists.newArrayList(batchConf)));
// Set things up for run 3 where the default configuration should be used again
recordToChange1.setField(ft1.getName(), "test3 index run3");
recordToChange2.setField(ft1.getName(), "test3 index run3");
table.update(recordToChange1);
table.update(recordToChange2);
// Now rebuild the index and see if the default indexer has kicked in
this.buildAndCommit();
response = solrServer.query(new SolrQuery("field1:test3\\ index\\ run3").
addSortField("lily.id", ORDER.asc));
assertEquals(2, response.getResults().size());
assertEquals("USER." + assertId1, response.getResults().get(0).getFieldValue("lily.id"));
assertEquals("USER." + assertId2, response.getResults().get(1).getFieldValue("lily.id"));
// check that the last used batch index conf = default
assertTrue(Lists.newArrayList(model.getIndexer(INDEX_NAME).getLastBatchBuildInfo().getBatchIndexCliArguments())
.containsAll(Lists.newArrayList(defaultConf)));
}
/**
* This test should cause a failure when adding a custom batchindex conf without setting a buildrequest
*/
@Test(expected = com.ngdata.hbaseindexer.model.api.IndexerValidityException.class)
public void testCustomBatchIndexConf_NoBuild() throws Exception {
setBatchIndexConf(getBatchCliArgs("defaultBatchIndexCliArgs-test2.txt"),
getBatchCliArgs("batchIndexCliArgs-test3.txt"), false);
//waitForIndexAndCommit(BUILD_TIMEOUT);
// remove when we can do this with hbase-indexer
buildAndCommit();
}
private byte[] getResourceAsByteArray(String name) throws IOException {
InputStream is = null;
try {
is = BatchBuildTest.class.getResourceAsStream(name);
return IOUtils.toByteArray(is);
} finally {
IOUtils.closeQuietly(is);
}
}
@Test
@Ignore
public void testClearDerefMap() throws Exception {
DerefMap derefMap = DerefMapHbaseImpl
.create(REPO_NAME, INDEX_NAME, lilyProxy.getHBaseProxy().getConf(), null, repository.getIdGenerator());
Record linkedRecord = table.recordBuilder()
.id("deref-test-linkedrecord")
.recordType(rt1.getName())
.field(ft1.getName(), "deref test linkedrecord")
.create();
Record record = table.recordBuilder()
.id("deref-test-main")
.recordType(rt1.getName())
.field(ft1.getName(), "deref test main")
.field(ft2.getName(), new Link(linkedRecord.getId()))
.create();
SchemaId vtag = typeManager.getFieldTypeByName(VersionTag.LAST).getId();
DependantRecordIdsIterator it = null;
try {
it = derefMap.findDependantsOf(absId(linkedRecord.getId()),
ft1.getId(), vtag);
assertTrue(!it.hasNext());
} finally {
it.close();
}
setBatchIndexConf(getBatchCliArgs("batchIndexCliArgs-testClearDerefmap-false.txt"), null, false);
buildAndCommit();
QueryResponse response = solrServer.query(new SolrQuery("field1:deref\\ test\\ main"));
assertEquals(1, response.getResults().size());
try {
it = derefMap.findDependantsOf(absId(linkedRecord.getId()), ft1.getId(), vtag);
assertTrue(it.hasNext());
} finally {
it.close();
}
setBatchIndexConf(null, getBatchCliArgs("batchIndexCliArgs-testClearDerefmap-true.txt"), true);
//waitForIndexAndCommit(BUILD_TIMEOUT);
// remove when we can do this with hbase-indexer
buildAndCommit();
try {
it = derefMap.findDependantsOf(absId(linkedRecord.getId()), ft1.getId(), vtag);
assertTrue(!it.hasNext());
} finally {
it.close();
}
}
private void buildAndCommit() throws Exception {
lilyServerProxy.batchBuildIndex(INDEX_NAME, BUILD_TIMEOUT);
solrServer.commit();
}
private void waitForIndexAndCommit(long timeout) throws Exception {
boolean indexSuccess = false;
try {
// Now wait until its finished
long tryUntil = System.currentTimeMillis() + timeout;
while (System.currentTimeMillis() < tryUntil) {
Thread.sleep(100);
IndexerDefinition definition = model.getIndexer(INDEX_NAME);
if (definition.getBatchIndexingState() == IndexerDefinition.BatchIndexingState.INACTIVE) {
Long amountFailed = null;
//amountFailed = definition.getLastBatchBuildInfo().getCounters().get(COUNTER_NUM_FAILED_RECORDS);
boolean successFlag = definition.getLastBatchBuildInfo().isFinishedSuccessful();
indexSuccess = successFlag && (amountFailed == null || amountFailed == 0L);
if (!indexSuccess) {
fail("Batch index build did not finish successfully: success flag = " +
successFlag + ", amount failed records = " + amountFailed + ", job url = " +
definition.getLastBatchBuildInfo().getMapReduceJobTrackingUrls());
} else {
break;
}
}
}
} catch (Exception e) {
throw new Exception("Error checking if batch index job ended.", e);
}
if (!indexSuccess) {
fail("Batch build did not end after " + BUILD_TIMEOUT + " millis");
} else {
solrServer.commit();
}
}
private static void setBatchIndexConf(String[] defaultConf, String[] customConf, boolean buildNow) throws Exception {
String lock = model.lockIndexer(INDEX_NAME);
try {
IndexerDefinitionBuilder index = new IndexerDefinitionBuilder().startFrom(model.getIndexer(INDEX_NAME));
if (defaultConf != null) {
index.defaultBatchIndexCliArguments(defaultConf);
}
if (customConf != null) {
index.batchIndexCliArguments(customConf);
}
if (buildNow) {
index.batchIndexingState(IndexerDefinition.BatchIndexingState.BUILD_REQUESTED);
}
model.updateIndexer(index.build(), lock);
} finally {
model.unlockIndexer(lock);
}
}
private static AbsoluteRecordId absId(RecordId recordId) {
return repository.getIdGenerator().newAbsoluteRecordId(Table.RECORD.name, recordId);
}
}