/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.accumulo.core.client.mapreduce;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
import java.io.IOException;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
import java.util.regex.Pattern;
import org.apache.accumulo.core.client.BatchWriter;
import org.apache.accumulo.core.client.Connector;
import org.apache.accumulo.core.client.Instance;
import org.apache.accumulo.core.client.IteratorSetting;
import org.apache.accumulo.core.client.mapreduce.InputFormatBase.AccumuloIterator;
import org.apache.accumulo.core.client.mapreduce.InputFormatBase.AccumuloIteratorOption;
import org.apache.accumulo.core.client.mock.MockInstance;
import org.apache.accumulo.core.data.Key;
import org.apache.accumulo.core.data.Mutation;
import org.apache.accumulo.core.data.Value;
import org.apache.accumulo.core.iterators.user.WholeRowIterator;
import org.apache.accumulo.core.security.Authorizations;
import org.apache.accumulo.core.util.ContextFactory;
import org.apache.accumulo.core.util.Pair;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.JobContext;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.RecordReader;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.log4j.Level;
import org.junit.After;
import org.junit.Assert;
import org.junit.Test;
public class AccumuloInputFormatTest {
@After
public void tearDown() throws Exception {}
/**
* Test basic setting & getting of max versions.
*
* @throws IOException
* Signals that an I/O exception has occurred.
*/
@Test
public void testMaxVersions() throws IOException {
JobContext job = ContextFactory.createJobContext();
AccumuloInputFormat.setMaxVersions(job.getConfiguration(), 1);
int version = AccumuloInputFormat.getMaxVersions(job.getConfiguration());
assertEquals(1, version);
}
/**
* Test max versions with an invalid value.
*
* @throws IOException
* Signals that an I/O exception has occurred.
*/
@Test(expected = IOException.class)
public void testMaxVersionsLessThan1() throws IOException {
JobContext job = ContextFactory.createJobContext();
AccumuloInputFormat.setMaxVersions(job.getConfiguration(), 0);
}
/**
* Test no max version configured.
*/
@Test
public void testNoMaxVersion() {
JobContext job = ContextFactory.createJobContext();
assertEquals(-1, AccumuloInputFormat.getMaxVersions(job.getConfiguration()));
}
/**
* Check that the iterator configuration is getting stored in the Job conf correctly.
*/
@SuppressWarnings("deprecation")
@Test
public void testSetIterator() {
JobContext job = ContextFactory.createJobContext();
AccumuloInputFormat.setIterator(job, 1, "org.apache.accumulo.core.iterators.WholeRowIterator", "WholeRow");
Configuration conf = job.getConfiguration();
String iterators = conf.get("AccumuloInputFormat.iterators");
assertEquals("1:org.apache.accumulo.core.iterators.WholeRowIterator:WholeRow", iterators);
}
@Test
public void testAddIterator() {
JobContext job = ContextFactory.createJobContext();
AccumuloInputFormat.addIterator(job.getConfiguration(), new IteratorSetting(1, "WholeRow", WholeRowIterator.class));
AccumuloInputFormat.addIterator(job.getConfiguration(), new IteratorSetting(2, "Versions", "org.apache.accumulo.core.iterators.VersioningIterator"));
IteratorSetting iter = new IteratorSetting(3, "Count", "org.apache.accumulo.core.iterators.CountingIterator");
iter.addOption("v1", "1");
iter.addOption("junk", "\0omg:!\\xyzzy");
AccumuloInputFormat.addIterator(job.getConfiguration(), iter);
List<AccumuloIterator> list = AccumuloInputFormat.getIterators(job.getConfiguration());
// Check the list size
assertTrue(list.size() == 3);
// Walk the list and make sure our settings are correct
AccumuloIterator setting = list.get(0);
assertEquals(1, setting.getPriority());
assertEquals("org.apache.accumulo.core.iterators.user.WholeRowIterator", setting.getIteratorClass());
assertEquals("WholeRow", setting.getIteratorName());
setting = list.get(1);
assertEquals(2, setting.getPriority());
assertEquals("org.apache.accumulo.core.iterators.VersioningIterator", setting.getIteratorClass());
assertEquals("Versions", setting.getIteratorName());
setting = list.get(2);
assertEquals(3, setting.getPriority());
assertEquals("org.apache.accumulo.core.iterators.CountingIterator", setting.getIteratorClass());
assertEquals("Count", setting.getIteratorName());
List<AccumuloIteratorOption> iteratorOptions = AccumuloInputFormat.getIteratorOptions(job.getConfiguration());
assertEquals(2, iteratorOptions.size());
assertEquals("Count", iteratorOptions.get(0).getIteratorName());
assertEquals("Count", iteratorOptions.get(1).getIteratorName());
assertEquals("v1", iteratorOptions.get(0).getKey());
assertEquals("1", iteratorOptions.get(0).getValue());
assertEquals("junk", iteratorOptions.get(1).getKey());
assertEquals("\0omg:!\\xyzzy", iteratorOptions.get(1).getValue());
}
/**
* Test adding iterator options where the keys and values contain both the FIELD_SEPARATOR character (':') and ITERATOR_SEPARATOR (',') characters. There
* should be no exceptions thrown when trying to parse these types of option entries.
*
* This test makes sure that the expected raw values, as appears in the Job, are equal to what's expected.
*/
@Test
public void testIteratorOptionEncoding() throws Throwable {
String key = "colon:delimited:key";
String value = "comma,delimited,value";
IteratorSetting someSetting = new IteratorSetting(1, "iterator", "Iterator.class");
someSetting.addOption(key, value);
Job job = new Job();
AccumuloInputFormat.addIterator(job.getConfiguration(), someSetting);
final String rawConfigOpt = new AccumuloIteratorOption("iterator", key, value).toString();
assertEquals(rawConfigOpt, job.getConfiguration().get("AccumuloInputFormat.iterators.options"));
List<AccumuloIteratorOption> opts = AccumuloInputFormat.getIteratorOptions(job.getConfiguration());
assertEquals(1, opts.size());
assertEquals(opts.get(0).getKey(), key);
assertEquals(opts.get(0).getValue(), value);
someSetting.addOption(key + "2", value);
someSetting.setPriority(2);
someSetting.setName("it2");
AccumuloInputFormat.addIterator(job.getConfiguration(), someSetting);
opts = AccumuloInputFormat.getIteratorOptions(job.getConfiguration());
assertEquals(3, opts.size());
for (AccumuloIteratorOption opt : opts) {
assertEquals(opt.getKey().substring(0, key.length()), key);
assertEquals(opt.getValue(), value);
}
}
/**
* Test getting iterator settings for multiple iterators set
*/
@SuppressWarnings("deprecation")
@Test
public void testGetIteratorSettings() {
JobContext job = ContextFactory.createJobContext();
AccumuloInputFormat.setIterator(job, 1, "org.apache.accumulo.core.iterators.WholeRowIterator", "WholeRow");
AccumuloInputFormat.setIterator(job, 2, "org.apache.accumulo.core.iterators.VersioningIterator", "Versions");
AccumuloInputFormat.setIterator(job, 3, "org.apache.accumulo.core.iterators.CountingIterator", "Count");
List<AccumuloIterator> list = AccumuloInputFormat.getIterators(job);
// Check the list size
assertTrue(list.size() == 3);
// Walk the list and make sure our settings are correct
AccumuloIterator setting = list.get(0);
assertEquals(1, setting.getPriority());
assertEquals("org.apache.accumulo.core.iterators.WholeRowIterator", setting.getIteratorClass());
assertEquals("WholeRow", setting.getIteratorName());
setting = list.get(1);
assertEquals(2, setting.getPriority());
assertEquals("org.apache.accumulo.core.iterators.VersioningIterator", setting.getIteratorClass());
assertEquals("Versions", setting.getIteratorName());
setting = list.get(2);
assertEquals(3, setting.getPriority());
assertEquals("org.apache.accumulo.core.iterators.CountingIterator", setting.getIteratorClass());
assertEquals("Count", setting.getIteratorName());
}
/**
* Check that the iterator options are getting stored in the Job conf correctly.
*/
@SuppressWarnings("deprecation")
@Test
public void testSetIteratorOption() {
JobContext job = ContextFactory.createJobContext();
AccumuloInputFormat.setIteratorOption(job, "someIterator", "aKey", "aValue");
Configuration conf = job.getConfiguration();
String options = conf.get("AccumuloInputFormat.iterators.options");
assertEquals(new String("someIterator:aKey:aValue"), options);
}
/**
* Test getting iterator options for multiple options set
*/
@SuppressWarnings("deprecation")
@Test
public void testGetIteratorOption() {
JobContext job = ContextFactory.createJobContext();
AccumuloInputFormat.setIteratorOption(job, "iterator1", "key1", "value1");
AccumuloInputFormat.setIteratorOption(job, "iterator2", "key2", "value2");
AccumuloInputFormat.setIteratorOption(job, "iterator3", "key3", "value3");
List<AccumuloIteratorOption> list = AccumuloInputFormat.getIteratorOptions(job);
// Check the list size
assertEquals(3, list.size());
// Walk the list and make sure all the options are correct
AccumuloIteratorOption option = list.get(0);
assertEquals("iterator1", option.getIteratorName());
assertEquals("key1", option.getKey());
assertEquals("value1", option.getValue());
option = list.get(1);
assertEquals("iterator2", option.getIteratorName());
assertEquals("key2", option.getKey());
assertEquals("value2", option.getValue());
option = list.get(2);
assertEquals("iterator3", option.getIteratorName());
assertEquals("key3", option.getKey());
assertEquals("value3", option.getValue());
}
@SuppressWarnings("deprecation")
@Test
public void testSetRegex() {
JobContext job = ContextFactory.createJobContext();
String regex = ">\"*%<>\'\\";
AccumuloInputFormat.setRegex(job, org.apache.accumulo.core.client.mapreduce.InputFormatBase.RegexType.ROW, regex);
assertTrue(regex.equals(AccumuloInputFormat.getRegex(job, org.apache.accumulo.core.client.mapreduce.InputFormatBase.RegexType.ROW)));
}
static class TestMapper extends Mapper<Key,Value,Key,Value> {
Key key = null;
int count = 0;
@Override
protected void map(Key k, Value v, Context context) throws IOException, InterruptedException {
if (key != null)
assertEquals(key.getRow().toString(), new String(v.get()));
assertEquals(k.getRow(), new Text(String.format("%09x", count + 1)));
assertEquals(new String(v.get()), String.format("%09x", count));
key = new Key(k);
count++;
}
}
@Test
public void testMap() throws Exception {
MockInstance mockInstance = new MockInstance("testmapinstance");
Connector c = mockInstance.getConnector("root", new byte[] {});
c.tableOperations().create("testtable");
BatchWriter bw = c.createBatchWriter("testtable", 10000L, 1000L, 4);
for (int i = 0; i < 100; i++) {
Mutation m = new Mutation(new Text(String.format("%09x", i + 1)));
m.put(new Text(), new Text(), new Value(String.format("%09x", i).getBytes()));
bw.addMutation(m);
}
bw.close();
Job job = new Job(new Configuration());
job.setInputFormatClass(AccumuloInputFormat.class);
job.setMapperClass(TestMapper.class);
job.setNumReduceTasks(0);
AccumuloInputFormat.setInputInfo(job.getConfiguration(), "root", "".getBytes(), "testtable", new Authorizations());
AccumuloInputFormat.setMockInstance(job.getConfiguration(), "testmapinstance");
AccumuloInputFormat input = new AccumuloInputFormat();
List<InputSplit> splits = input.getSplits(job);
assertEquals(splits.size(), 1);
TestMapper mapper = (TestMapper) job.getMapperClass().newInstance();
for (InputSplit split : splits) {
RangeInputSplit risplit = (RangeInputSplit) split;
Assert.assertEquals("root", risplit.getUsername());
Assert.assertArrayEquals(new byte[0], risplit.getPassword());
Assert.assertEquals("testtable", risplit.getTable());
Assert.assertEquals(new Authorizations(), risplit.getAuths());
Assert.assertEquals("testmapinstance", risplit.getInstanceName());
TaskAttemptContext tac = ContextFactory.createTaskAttemptContext(job);
RecordReader<Key,Value> reader = input.createRecordReader(split, tac);
Mapper<Key,Value,Key,Value>.Context context = ContextFactory.createMapContext(mapper, tac, reader, null, split);
reader.initialize(split, context);
mapper.run(context);
}
}
@Test
public void testSimple() throws Exception {
MockInstance mockInstance = new MockInstance("testmapinstance");
Connector c = mockInstance.getConnector("root", new byte[] {});
c.tableOperations().create("testtable2");
BatchWriter bw = c.createBatchWriter("testtable2", 10000L, 1000L, 4);
for (int i = 0; i < 100; i++) {
Mutation m = new Mutation(new Text(String.format("%09x", i + 1)));
m.put(new Text(), new Text(), new Value(String.format("%09x", i).getBytes()));
bw.addMutation(m);
}
bw.close();
JobContext job = ContextFactory.createJobContext();
AccumuloInputFormat.setInputInfo(job.getConfiguration(), "root", "".getBytes(), "testtable2", new Authorizations());
AccumuloInputFormat.setMockInstance(job.getConfiguration(), "testmapinstance");
AccumuloInputFormat input = new AccumuloInputFormat();
RangeInputSplit ris = new RangeInputSplit();
TaskAttemptContext tac = ContextFactory.createTaskAttemptContext(job);
RecordReader<Key,Value> rr = input.createRecordReader(ris, tac);
rr.initialize(ris, tac);
TestMapper mapper = new TestMapper();
Mapper<Key,Value,Key,Value>.Context context = ContextFactory.createMapContext(mapper, tac, rr, null, ris);
rr.initialize(ris, tac);
while (rr.nextKeyValue()) {
mapper.map(rr.getCurrentKey(), rr.getCurrentValue(), context);
}
}
@SuppressWarnings("deprecation")
@Test
public void testRegex() throws Exception {
MockInstance mockInstance = new MockInstance("testmapinstance");
Connector c = mockInstance.getConnector("root", new byte[] {});
c.tableOperations().create("testtable3");
BatchWriter bw = c.createBatchWriter("testtable3", 10000L, 1000L, 4);
for (int i = 0; i < 100; i++) {
Mutation m = new Mutation(new Text(String.format("%09x", i + 1)));
m.put(new Text(), new Text(), new Value(String.format("%09x", i).getBytes()));
bw.addMutation(m);
}
bw.close();
JobContext job = ContextFactory.createJobContext();
AccumuloInputFormat.setInputInfo(job.getConfiguration(), "root", "".getBytes(), "testtable3", new Authorizations());
AccumuloInputFormat.setMockInstance(job.getConfiguration(), "testmapinstance");
final String regex = ".*1.*";
AccumuloInputFormat.setRegex(job, org.apache.accumulo.core.client.mapreduce.InputFormatBase.RegexType.ROW, regex);
AccumuloInputFormat input = new AccumuloInputFormat();
RangeInputSplit ris = new RangeInputSplit();
TaskAttemptContext tac = ContextFactory.createTaskAttemptContext(job);
RecordReader<Key,Value> rr = input.createRecordReader(ris, tac);
rr.initialize(ris, tac);
Pattern p = Pattern.compile(regex);
while (rr.nextKeyValue()) {
Assert.assertTrue(p.matcher(rr.getCurrentKey().getRow().toString()).matches());
}
}
@SuppressWarnings("deprecation")
@Test
public void testCorrectRangeInputSplits() throws Exception {
JobContext job = ContextFactory.createJobContext();
String username = "user", table = "table", rowRegex = "row.*", colfRegex = "colf.*", colqRegex = "colq.*";
String valRegex = "val.*", instance = "instance";
byte[] password = "password".getBytes();
Authorizations auths = new Authorizations("foo");
Collection<Pair<Text,Text>> fetchColumns = Collections.singleton(new Pair<Text,Text>(new Text("foo"), new Text("bar")));
boolean isolated = true, localIters = true;
int maxVersions = 5;
Level level = Level.WARN;
Instance inst = new MockInstance(instance);
Connector connector = inst.getConnector(username, password);
connector.tableOperations().create(table);
AccumuloInputFormat.setInputInfo(job, username, password, table, auths);
AccumuloInputFormat.setMockInstance(job, instance);
AccumuloInputFormat.setRegex(job, org.apache.accumulo.core.client.mapreduce.InputFormatBase.RegexType.ROW, rowRegex);
AccumuloInputFormat.setRegex(job, org.apache.accumulo.core.client.mapreduce.InputFormatBase.RegexType.COLUMN_FAMILY, colfRegex);
AccumuloInputFormat.setRegex(job, org.apache.accumulo.core.client.mapreduce.InputFormatBase.RegexType.COLUMN_QUALIFIER, colqRegex);
AccumuloInputFormat.setRegex(job, org.apache.accumulo.core.client.mapreduce.InputFormatBase.RegexType.VALUE, valRegex);
AccumuloInputFormat.setIsolated(job, isolated);
AccumuloInputFormat.setLocalIterators(job, localIters);
AccumuloInputFormat.setMaxVersions(job, maxVersions);
AccumuloInputFormat.fetchColumns(job, fetchColumns);
AccumuloInputFormat.setLogLevel(job, level);
AccumuloInputFormat aif = new AccumuloInputFormat();
List<InputSplit> splits = aif.getSplits(job);
Assert.assertEquals(1, splits.size());
InputSplit split = splits.get(0);
Assert.assertEquals(RangeInputSplit.class, split.getClass());
RangeInputSplit risplit = (RangeInputSplit) split;
Assert.assertEquals(username, risplit.getUsername());
Assert.assertEquals(table, risplit.getTable());
Assert.assertArrayEquals(password, risplit.getPassword());
Assert.assertEquals(auths, risplit.getAuths());
Assert.assertEquals(instance, risplit.getInstanceName());
Assert.assertEquals(rowRegex, risplit.getRowRegex());
Assert.assertEquals(colfRegex, risplit.getColfamRegex());
Assert.assertEquals(colqRegex, risplit.getColqualRegex());
Assert.assertEquals(valRegex, risplit.getValueRegex());
Assert.assertEquals(isolated, risplit.isIsolatedScan());
Assert.assertEquals(localIters, risplit.usesLocalIterators());
Assert.assertEquals(maxVersions, risplit.getMaxVersions().intValue());
Assert.assertEquals(fetchColumns, risplit.getFetchedColumns());
Assert.assertEquals(level, risplit.getLogLevel());
}
@Test
public void testPartialInputSplitDelegationToConfiguration() throws Exception {
MockInstance mockInstance = new MockInstance("testPartialInputSplitDelegationToConfiguration");
Connector c = mockInstance.getConnector("root", new byte[] {});
c.tableOperations().create("testtable");
BatchWriter bw = c.createBatchWriter("testtable", 10000L, 1000L, 4);
for (int i = 0; i < 100; i++) {
Mutation m = new Mutation(new Text(String.format("%09x", i + 1)));
m.put(new Text(), new Text(), new Value(String.format("%09x", i).getBytes()));
bw.addMutation(m);
}
bw.close();
Job job = new Job(new Configuration());
job.setInputFormatClass(AccumuloInputFormat.class);
job.setMapperClass(TestMapper.class);
job.setNumReduceTasks(0);
AccumuloInputFormat.setInputInfo(job.getConfiguration(), "root", "".getBytes(), "testtable", new Authorizations());
AccumuloInputFormat.setMockInstance(job.getConfiguration(), "testPartialInputSplitDelegationToConfiguration");
AccumuloInputFormat input = new AccumuloInputFormat();
List<InputSplit> splits = input.getSplits(job);
assertEquals(splits.size(), 1);
TestMapper mapper = (TestMapper) job.getMapperClass().newInstance();
RangeInputSplit emptySplit = new RangeInputSplit();
// Using an empty split should fall back to the information in the Job's Configuration
TaskAttemptContext tac = ContextFactory.createTaskAttemptContext(job);
RecordReader<Key,Value> reader = input.createRecordReader(emptySplit, tac);
Mapper<Key,Value,Key,Value>.Context context = ContextFactory.createMapContext(mapper, tac, reader, null, emptySplit);
reader.initialize(emptySplit, context);
mapper.run(context);
}
@Test(expected = IOException.class)
public void testPartialFailedInputSplitDelegationToConfiguration() throws Exception {
MockInstance mockInstance = new MockInstance("testPartialFailedInputSplitDelegationToConfiguration");
Connector c = mockInstance.getConnector("root", new byte[] {});
c.tableOperations().create("testtable");
BatchWriter bw = c.createBatchWriter("testtable", 10000L, 1000L, 4);
for (int i = 0; i < 100; i++) {
Mutation m = new Mutation(new Text(String.format("%09x", i + 1)));
m.put(new Text(), new Text(), new Value(String.format("%09x", i).getBytes()));
bw.addMutation(m);
}
bw.close();
Job job = new Job(new Configuration());
job.setInputFormatClass(AccumuloInputFormat.class);
job.setMapperClass(TestMapper.class);
job.setNumReduceTasks(0);
AccumuloInputFormat.setInputInfo(job.getConfiguration(), "root", "".getBytes(), "testtable", new Authorizations());
AccumuloInputFormat.setMockInstance(job.getConfiguration(), "testPartialFailedInputSplitDelegationToConfiguration");
AccumuloInputFormat input = new AccumuloInputFormat();
List<InputSplit> splits = input.getSplits(job);
assertEquals(splits.size(), 1);
TestMapper mapper = (TestMapper) job.getMapperClass().newInstance();
RangeInputSplit emptySplit = new RangeInputSplit();
emptySplit.setUsername("root");
emptySplit.setPassword("anythingelse".getBytes());
// Using an empty split should fall back to the information in the Job's Configuration
TaskAttemptContext tac = ContextFactory.createTaskAttemptContext(job);
RecordReader<Key,Value> reader = input.createRecordReader(emptySplit, tac);
Mapper<Key,Value,Key,Value>.Context context = ContextFactory.createMapContext(mapper, tac, reader, null, emptySplit);
reader.initialize(emptySplit, context);
mapper.run(context);
}
}