/*
* Copyright 2008-2009 LinkedIn, Inc
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package voldemort.store.readonly.mr;
import static org.junit.Assert.fail;
import java.io.File;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.commons.codec.binary.Hex;
import org.apache.commons.io.FileUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.TextInputFormat;
import org.junit.Assert;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.Parameterized;
import org.junit.runners.Parameterized.Parameters;
import voldemort.ServerTestUtils;
import voldemort.TestUtils;
import voldemort.client.RoutingTier;
import voldemort.cluster.Cluster;
import voldemort.routing.RoutingStrategyFactory;
import voldemort.routing.RoutingStrategyType;
import voldemort.serialization.DefaultSerializerFactory;
import voldemort.serialization.Serializer;
import voldemort.serialization.SerializerDefinition;
import voldemort.store.Store;
import voldemort.store.StoreDefinition;
import voldemort.store.StoreDefinitionBuilder;
import voldemort.store.readonly.BinarySearchStrategy;
import voldemort.store.readonly.InterpolationSearchStrategy;
import voldemort.store.readonly.ReadOnlyStorageConfiguration;
import voldemort.store.readonly.ReadOnlyStorageEngine;
import voldemort.store.readonly.ReadOnlyStorageFormat;
import voldemort.store.readonly.ReadOnlyStorageMetadata;
import voldemort.store.readonly.SearchStrategy;
import voldemort.store.readonly.checksum.CheckSum;
import voldemort.store.readonly.checksum.CheckSumTests;
import voldemort.store.readonly.checksum.CheckSum.CheckSumType;
import voldemort.store.readonly.fetcher.HdfsFetcher;
import voldemort.store.serialized.SerializingStore;
import voldemort.utils.ByteArray;
import voldemort.utils.ByteUtils;
import voldemort.utils.ClosableIterator;
import voldemort.utils.Pair;
import voldemort.versioning.Versioned;
/**
* Unit test to check Read-Only Batch Indexer <strong>in Local mode numReduce
* will be only one hence we will see only one node files irrespective of
* cluster details.</strong>
*
*
*/
@RunWith(Parameterized.class)
@SuppressWarnings("deprecation")
public class HadoopStoreBuilderTest {
private SearchStrategy searchStrategy;
private boolean saveKeys;
@Parameters
public static Collection<Object[]> configs() {
return Arrays.asList(new Object[][] { { new BinarySearchStrategy(), true },
{ new InterpolationSearchStrategy(), true }, { new BinarySearchStrategy(), false },
{ new InterpolationSearchStrategy(), false } });
}
public HadoopStoreBuilderTest(SearchStrategy searchStrategy, boolean saveKeys) {
this.saveKeys = saveKeys;
this.searchStrategy = searchStrategy;
}
public static class TextStoreMapper extends
AbstractHadoopStoreBuilderMapper<LongWritable, Text> {
@Override
public Object makeKey(LongWritable key, Text value) {
String[] tokens = value.toString().split("\\s+");
return tokens[0];
}
@Override
public Object makeValue(LongWritable key, Text value) {
String[] tokens = value.toString().split("\\s+");
return tokens[1];
}
}
/**
* Issue 258 : 'node--1' produced during store building if some reducer does
* not get any data.
*
* @throws Exception
*/
@Test
public void testRowsLessThanNodes() throws Exception {
Map<String, String> values = new HashMap<String, String>();
File testDir = TestUtils.createTempDir();
File tempDir = new File(testDir, "temp");
File outputDir = new File(testDir, "output");
// write test data to text file
File inputFile = File.createTempFile("input", ".txt", testDir);
inputFile.deleteOnExit();
StringBuilder contents = new StringBuilder();
for(Map.Entry<String, String> entry: values.entrySet())
contents.append(entry.getKey() + "\t" + entry.getValue() + "\n");
FileUtils.writeStringToFile(inputFile, contents.toString());
String storeName = "test";
SerializerDefinition serDef = new SerializerDefinition("string");
Cluster cluster = ServerTestUtils.getLocalCluster(10);
// Test backwards compatibility
StoreDefinition def = new StoreDefinitionBuilder().setName(storeName)
.setType(ReadOnlyStorageConfiguration.TYPE_NAME)
.setKeySerializer(serDef)
.setValueSerializer(serDef)
.setRoutingPolicy(RoutingTier.CLIENT)
.setRoutingStrategyType(RoutingStrategyType.CONSISTENT_STRATEGY)
.setReplicationFactor(1)
.setPreferredReads(1)
.setRequiredReads(1)
.setPreferredWrites(1)
.setRequiredWrites(1)
.build();
HadoopStoreBuilder builder = new HadoopStoreBuilder(new Configuration(),
TextStoreMapper.class,
TextInputFormat.class,
cluster,
def,
64 * 1024,
new Path(tempDir.getAbsolutePath()),
new Path(outputDir.getAbsolutePath()),
new Path(inputFile.getAbsolutePath()),
CheckSumType.MD5,
saveKeys,
false);
builder.build();
// Should not produce node--1 directory + have one folder for every node
Assert.assertEquals(cluster.getNumberOfNodes(), outputDir.listFiles().length);
for(File f: outputDir.listFiles()) {
Assert.assertFalse(f.toString().contains("node--1"));
}
// Check if individual nodes exist, along with their metadata file
for(int nodeId = 0; nodeId < 10; nodeId++) {
File nodeFile = new File(outputDir, "node-" + Integer.toString(nodeId));
Assert.assertTrue(nodeFile.exists());
Assert.assertTrue(new File(nodeFile, ".metadata").exists());
}
}
@Test
public void testHadoopBuild() throws Exception {
// create test data
Map<String, String> values = new HashMap<String, String>();
File testDir = TestUtils.createTempDir();
File tempDir = new File(testDir, "temp"), tempDir2 = new File(testDir, "temp2");
File outputDir = new File(testDir, "output"), outputDir2 = new File(testDir, "output2");
File storeDir = TestUtils.createTempDir(testDir);
for(int i = 0; i < 200; i++)
values.put(Integer.toString(i), Integer.toBinaryString(i));
// write test data to text file
File inputFile = File.createTempFile("input", ".txt", testDir);
inputFile.deleteOnExit();
StringBuilder contents = new StringBuilder();
for(Map.Entry<String, String> entry: values.entrySet())
contents.append(entry.getKey() + "\t" + entry.getValue() + "\n");
FileUtils.writeStringToFile(inputFile, contents.toString());
String storeName = "test";
SerializerDefinition serDef = new SerializerDefinition("string");
Cluster cluster = ServerTestUtils.getLocalCluster(1);
// Test backwards compatibility
StoreDefinition def = new StoreDefinitionBuilder().setName(storeName)
.setType(ReadOnlyStorageConfiguration.TYPE_NAME)
.setKeySerializer(serDef)
.setValueSerializer(serDef)
.setRoutingPolicy(RoutingTier.CLIENT)
.setRoutingStrategyType(RoutingStrategyType.CONSISTENT_STRATEGY)
.setReplicationFactor(1)
.setPreferredReads(1)
.setRequiredReads(1)
.setPreferredWrites(1)
.setRequiredWrites(1)
.build();
HadoopStoreBuilder builder = new HadoopStoreBuilder(new Configuration(),
TextStoreMapper.class,
TextInputFormat.class,
cluster,
def,
64 * 1024,
new Path(tempDir2.getAbsolutePath()),
new Path(outputDir2.getAbsolutePath()),
new Path(inputFile.getAbsolutePath()),
CheckSumType.MD5,
saveKeys,
false);
builder.build();
builder = new HadoopStoreBuilder(new Configuration(),
TextStoreMapper.class,
TextInputFormat.class,
cluster,
def,
64 * 1024,
new Path(tempDir.getAbsolutePath()),
new Path(outputDir.getAbsolutePath()),
new Path(inputFile.getAbsolutePath()),
CheckSumType.MD5,
saveKeys,
false);
builder.build();
// Check if checkSum is generated in outputDir
File nodeFile = new File(outputDir, "node-0");
// Check if metadata file exists
File metadataFile = new File(nodeFile, ".metadata");
Assert.assertTrue(metadataFile.exists());
ReadOnlyStorageMetadata metadata = new ReadOnlyStorageMetadata(metadataFile);
if(saveKeys)
Assert.assertEquals(metadata.get(ReadOnlyStorageMetadata.FORMAT),
ReadOnlyStorageFormat.READONLY_V2.getCode());
else
Assert.assertEquals(metadata.get(ReadOnlyStorageMetadata.FORMAT),
ReadOnlyStorageFormat.READONLY_V1.getCode());
Assert.assertEquals(metadata.get(ReadOnlyStorageMetadata.CHECKSUM_TYPE),
CheckSum.toString(CheckSumType.MD5));
// Check contents of checkSum file
byte[] md5 = Hex.decodeHex(((String) metadata.get(ReadOnlyStorageMetadata.CHECKSUM)).toCharArray());
byte[] checkSumBytes = CheckSumTests.calculateCheckSum(nodeFile.listFiles(),
CheckSumType.MD5);
Assert.assertEquals(0, ByteUtils.compare(checkSumBytes, md5));
// check if fetching works
HdfsFetcher fetcher = new HdfsFetcher();
// Fetch to version directory
File versionDir = new File(storeDir, "version-0");
fetcher.fetch(nodeFile.getAbsolutePath(), versionDir.getAbsolutePath());
Assert.assertTrue(versionDir.exists());
// open store
@SuppressWarnings("unchecked")
Serializer<Object> serializer = (Serializer<Object>) new DefaultSerializerFactory().getSerializer(serDef);
ReadOnlyStorageEngine engine = new ReadOnlyStorageEngine(storeName,
searchStrategy,
new RoutingStrategyFactory().updateRoutingStrategy(def,
cluster),
0,
storeDir,
1);
Store<Object, Object, Object> store = SerializingStore.wrap(engine,
serializer,
serializer,
serializer);
// check values
for(Map.Entry<String, String> entry: values.entrySet()) {
List<Versioned<Object>> found = store.get(entry.getKey(), null);
Assert.assertEquals("Incorrect number of results", 1, found.size());
Assert.assertEquals(entry.getValue(), found.get(0).getValue());
}
// also check the iterator - first key iterator...
try {
ClosableIterator<ByteArray> keyIterator = engine.keys();
if(!saveKeys) {
fail("Should have thrown an exception since this RO format does not support iterators");
}
int numElements = 0;
while(keyIterator.hasNext()) {
Assert.assertTrue(values.containsKey(serializer.toObject(keyIterator.next().get())));
numElements++;
}
Assert.assertEquals(numElements, values.size());
} catch(UnsupportedOperationException e) {
if(saveKeys) {
fail("Should not have thrown an exception since this RO format does support iterators");
}
}
// ... and entry iterator
try {
ClosableIterator<Pair<ByteArray, Versioned<byte[]>>> entryIterator = engine.entries();
if(!saveKeys) {
fail("Should have thrown an exception since this RO format does not support iterators");
}
int numElements = 0;
while(entryIterator.hasNext()) {
Pair<ByteArray, Versioned<byte[]>> entry = entryIterator.next();
Assert.assertEquals(values.get(serializer.toObject(entry.getFirst().get())),
serializer.toObject(entry.getSecond().getValue()));
numElements++;
}
Assert.assertEquals(numElements, values.size());
} catch(UnsupportedOperationException e) {
if(saveKeys) {
fail("Should not have thrown an exception since this RO format does support iterators");
}
}
}
}