Package voldemort.store.readonly.mr

Source Code of voldemort.store.readonly.mr.HadoopStoreBuilderTest$TextStoreMapper

/*
* Copyright 2008-2009 LinkedIn, Inc
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/

package voldemort.store.readonly.mr;

import static org.junit.Assert.fail;

import java.io.File;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import org.apache.commons.codec.binary.Hex;
import org.apache.commons.io.FileUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.TextInputFormat;
import org.junit.Assert;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.Parameterized;
import org.junit.runners.Parameterized.Parameters;

import voldemort.ServerTestUtils;
import voldemort.TestUtils;
import voldemort.client.RoutingTier;
import voldemort.cluster.Cluster;
import voldemort.routing.RoutingStrategyFactory;
import voldemort.routing.RoutingStrategyType;
import voldemort.serialization.DefaultSerializerFactory;
import voldemort.serialization.Serializer;
import voldemort.serialization.SerializerDefinition;
import voldemort.store.Store;
import voldemort.store.StoreDefinition;
import voldemort.store.StoreDefinitionBuilder;
import voldemort.store.readonly.BinarySearchStrategy;
import voldemort.store.readonly.InterpolationSearchStrategy;
import voldemort.store.readonly.ReadOnlyStorageConfiguration;
import voldemort.store.readonly.ReadOnlyStorageEngine;
import voldemort.store.readonly.ReadOnlyStorageFormat;
import voldemort.store.readonly.ReadOnlyStorageMetadata;
import voldemort.store.readonly.SearchStrategy;
import voldemort.store.readonly.checksum.CheckSum;
import voldemort.store.readonly.checksum.CheckSumTests;
import voldemort.store.readonly.checksum.CheckSum.CheckSumType;
import voldemort.store.readonly.fetcher.HdfsFetcher;
import voldemort.store.serialized.SerializingStore;
import voldemort.utils.ByteArray;
import voldemort.utils.ByteUtils;
import voldemort.utils.ClosableIterator;
import voldemort.utils.Pair;
import voldemort.versioning.Versioned;

/**
* Unit test to check Read-Only Batch Indexer <strong>in Local mode numReduce
* will be only one hence we will see only one node files irrespective of
* cluster details.</strong>
*
*
*/
@RunWith(Parameterized.class)
@SuppressWarnings("deprecation")
public class HadoopStoreBuilderTest {

    private SearchStrategy searchStrategy;
    private boolean saveKeys;

    @Parameters
    public static Collection<Object[]> configs() {
        return Arrays.asList(new Object[][] { { new BinarySearchStrategy(), true },
                { new InterpolationSearchStrategy(), true }, { new BinarySearchStrategy(), false },
                { new InterpolationSearchStrategy(), false } });
    }

    public HadoopStoreBuilderTest(SearchStrategy searchStrategy, boolean saveKeys) {
        this.saveKeys = saveKeys;
        this.searchStrategy = searchStrategy;
    }

    public static class TextStoreMapper extends
            AbstractHadoopStoreBuilderMapper<LongWritable, Text> {

        @Override
        public Object makeKey(LongWritable key, Text value) {
            String[] tokens = value.toString().split("\\s+");
            return tokens[0];
        }

        @Override
        public Object makeValue(LongWritable key, Text value) {
            String[] tokens = value.toString().split("\\s+");
            return tokens[1];
        }

    }

    /**
     * Issue 258 : 'node--1' produced during store building if some reducer does
     * not get any data.
     *
     * @throws Exception
     */
    @Test
    public void testRowsLessThanNodes() throws Exception {
        Map<String, String> values = new HashMap<String, String>();
        File testDir = TestUtils.createTempDir();
        File tempDir = new File(testDir, "temp");
        File outputDir = new File(testDir, "output");

        // write test data to text file
        File inputFile = File.createTempFile("input", ".txt", testDir);
        inputFile.deleteOnExit();
        StringBuilder contents = new StringBuilder();
        for(Map.Entry<String, String> entry: values.entrySet())
            contents.append(entry.getKey() + "\t" + entry.getValue() + "\n");
        FileUtils.writeStringToFile(inputFile, contents.toString());

        String storeName = "test";
        SerializerDefinition serDef = new SerializerDefinition("string");
        Cluster cluster = ServerTestUtils.getLocalCluster(10);

        // Test backwards compatibility
        StoreDefinition def = new StoreDefinitionBuilder().setName(storeName)
                                                          .setType(ReadOnlyStorageConfiguration.TYPE_NAME)
                                                          .setKeySerializer(serDef)
                                                          .setValueSerializer(serDef)
                                                          .setRoutingPolicy(RoutingTier.CLIENT)
                                                          .setRoutingStrategyType(RoutingStrategyType.CONSISTENT_STRATEGY)
                                                          .setReplicationFactor(1)
                                                          .setPreferredReads(1)
                                                          .setRequiredReads(1)
                                                          .setPreferredWrites(1)
                                                          .setRequiredWrites(1)
                                                          .build();
        HadoopStoreBuilder builder = new HadoopStoreBuilder(new Configuration(),
                                                            TextStoreMapper.class,
                                                            TextInputFormat.class,
                                                            cluster,
                                                            def,
                                                            64 * 1024,
                                                            new Path(tempDir.getAbsolutePath()),
                                                            new Path(outputDir.getAbsolutePath()),
                                                            new Path(inputFile.getAbsolutePath()),
                                                            CheckSumType.MD5,
                                                            saveKeys,
                                                            false);
        builder.build();

        // Should not produce node--1 directory + have one folder for every node
        Assert.assertEquals(cluster.getNumberOfNodes(), outputDir.listFiles().length);
        for(File f: outputDir.listFiles()) {
            Assert.assertFalse(f.toString().contains("node--1"));
        }

        // Check if individual nodes exist, along with their metadata file
        for(int nodeId = 0; nodeId < 10; nodeId++) {
            File nodeFile = new File(outputDir, "node-" + Integer.toString(nodeId));
            Assert.assertTrue(nodeFile.exists());
            Assert.assertTrue(new File(nodeFile, ".metadata").exists());
        }
    }

    @Test
    public void testHadoopBuild() throws Exception {
        // create test data
        Map<String, String> values = new HashMap<String, String>();
        File testDir = TestUtils.createTempDir();
        File tempDir = new File(testDir, "temp"), tempDir2 = new File(testDir, "temp2");
        File outputDir = new File(testDir, "output"), outputDir2 = new File(testDir, "output2");
        File storeDir = TestUtils.createTempDir(testDir);
        for(int i = 0; i < 200; i++)
            values.put(Integer.toString(i), Integer.toBinaryString(i));

        // write test data to text file
        File inputFile = File.createTempFile("input", ".txt", testDir);
        inputFile.deleteOnExit();
        StringBuilder contents = new StringBuilder();
        for(Map.Entry<String, String> entry: values.entrySet())
            contents.append(entry.getKey() + "\t" + entry.getValue() + "\n");
        FileUtils.writeStringToFile(inputFile, contents.toString());

        String storeName = "test";
        SerializerDefinition serDef = new SerializerDefinition("string");
        Cluster cluster = ServerTestUtils.getLocalCluster(1);

        // Test backwards compatibility
        StoreDefinition def = new StoreDefinitionBuilder().setName(storeName)
                                                          .setType(ReadOnlyStorageConfiguration.TYPE_NAME)
                                                          .setKeySerializer(serDef)
                                                          .setValueSerializer(serDef)
                                                          .setRoutingPolicy(RoutingTier.CLIENT)
                                                          .setRoutingStrategyType(RoutingStrategyType.CONSISTENT_STRATEGY)
                                                          .setReplicationFactor(1)
                                                          .setPreferredReads(1)
                                                          .setRequiredReads(1)
                                                          .setPreferredWrites(1)
                                                          .setRequiredWrites(1)
                                                          .build();
        HadoopStoreBuilder builder = new HadoopStoreBuilder(new Configuration(),
                                                            TextStoreMapper.class,
                                                            TextInputFormat.class,
                                                            cluster,
                                                            def,
                                                            64 * 1024,
                                                            new Path(tempDir2.getAbsolutePath()),
                                                            new Path(outputDir2.getAbsolutePath()),
                                                            new Path(inputFile.getAbsolutePath()),
                                                            CheckSumType.MD5,
                                                            saveKeys,
                                                            false);
        builder.build();

        builder = new HadoopStoreBuilder(new Configuration(),
                                         TextStoreMapper.class,
                                         TextInputFormat.class,
                                         cluster,
                                         def,
                                         64 * 1024,
                                         new Path(tempDir.getAbsolutePath()),
                                         new Path(outputDir.getAbsolutePath()),
                                         new Path(inputFile.getAbsolutePath()),
                                         CheckSumType.MD5,
                                         saveKeys,
                                         false);
        builder.build();

        // Check if checkSum is generated in outputDir
        File nodeFile = new File(outputDir, "node-0");

        // Check if metadata file exists
        File metadataFile = new File(nodeFile, ".metadata");
        Assert.assertTrue(metadataFile.exists());

        ReadOnlyStorageMetadata metadata = new ReadOnlyStorageMetadata(metadataFile);
        if(saveKeys)
            Assert.assertEquals(metadata.get(ReadOnlyStorageMetadata.FORMAT),
                                ReadOnlyStorageFormat.READONLY_V2.getCode());
        else
            Assert.assertEquals(metadata.get(ReadOnlyStorageMetadata.FORMAT),
                                ReadOnlyStorageFormat.READONLY_V1.getCode());

        Assert.assertEquals(metadata.get(ReadOnlyStorageMetadata.CHECKSUM_TYPE),
                            CheckSum.toString(CheckSumType.MD5));

        // Check contents of checkSum file
        byte[] md5 = Hex.decodeHex(((String) metadata.get(ReadOnlyStorageMetadata.CHECKSUM)).toCharArray());
        byte[] checkSumBytes = CheckSumTests.calculateCheckSum(nodeFile.listFiles(),
                                                               CheckSumType.MD5);
        Assert.assertEquals(0, ByteUtils.compare(checkSumBytes, md5));

        // check if fetching works
        HdfsFetcher fetcher = new HdfsFetcher();

        // Fetch to version directory
        File versionDir = new File(storeDir, "version-0");
        fetcher.fetch(nodeFile.getAbsolutePath(), versionDir.getAbsolutePath());
        Assert.assertTrue(versionDir.exists());

        // open store
        @SuppressWarnings("unchecked")
        Serializer<Object> serializer = (Serializer<Object>) new DefaultSerializerFactory().getSerializer(serDef);
        ReadOnlyStorageEngine engine = new ReadOnlyStorageEngine(storeName,
                                                                 searchStrategy,
                                                                 new RoutingStrategyFactory().updateRoutingStrategy(def,
                                                                                                                    cluster),
                                                                 0,
                                                                 storeDir,
                                                                 1);
        Store<Object, Object, Object> store = SerializingStore.wrap(engine,
                                                                    serializer,
                                                                    serializer,
                                                                    serializer);

        // check values
        for(Map.Entry<String, String> entry: values.entrySet()) {
            List<Versioned<Object>> found = store.get(entry.getKey(), null);
            Assert.assertEquals("Incorrect number of results", 1, found.size());
            Assert.assertEquals(entry.getValue(), found.get(0).getValue());
        }

        // also check the iterator - first key iterator...
        try {
            ClosableIterator<ByteArray> keyIterator = engine.keys();
            if(!saveKeys) {
                fail("Should have thrown an exception since this RO format does not support iterators");
            }
            int numElements = 0;
            while(keyIterator.hasNext()) {
                Assert.assertTrue(values.containsKey(serializer.toObject(keyIterator.next().get())));
                numElements++;
            }

            Assert.assertEquals(numElements, values.size());
        } catch(UnsupportedOperationException e) {
            if(saveKeys) {
                fail("Should not have thrown an exception since this RO format does support iterators");
            }
        }

        // ... and entry iterator
        try {
            ClosableIterator<Pair<ByteArray, Versioned<byte[]>>> entryIterator = engine.entries();
            if(!saveKeys) {
                fail("Should have thrown an exception since this RO format does not support iterators");
            }
            int numElements = 0;
            while(entryIterator.hasNext()) {
                Pair<ByteArray, Versioned<byte[]>> entry = entryIterator.next();
                Assert.assertEquals(values.get(serializer.toObject(entry.getFirst().get())),
                                    serializer.toObject(entry.getSecond().getValue()));
                numElements++;
            }

            Assert.assertEquals(numElements, values.size());
        } catch(UnsupportedOperationException e) {
            if(saveKeys) {
                fail("Should not have thrown an exception since this RO format does support iterators");
            }
        }
    }
}
TOP

Related Classes of voldemort.store.readonly.mr.HadoopStoreBuilderTest$TextStoreMapper

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.