Package org.apache.accumulo.core.client.mapreduce

Source Code of org.apache.accumulo.core.client.mapreduce.AccumuloInputFormatTest$TestMapper

/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements.  See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.accumulo.core.client.mapreduce;

import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;

import java.io.IOException;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
import java.util.regex.Pattern;

import org.apache.accumulo.core.client.BatchWriter;
import org.apache.accumulo.core.client.Connector;
import org.apache.accumulo.core.client.Instance;
import org.apache.accumulo.core.client.IteratorSetting;
import org.apache.accumulo.core.client.mapreduce.InputFormatBase.AccumuloIterator;
import org.apache.accumulo.core.client.mapreduce.InputFormatBase.AccumuloIteratorOption;
import org.apache.accumulo.core.client.mock.MockInstance;
import org.apache.accumulo.core.data.Key;
import org.apache.accumulo.core.data.Mutation;
import org.apache.accumulo.core.data.Value;
import org.apache.accumulo.core.iterators.user.WholeRowIterator;
import org.apache.accumulo.core.security.Authorizations;
import org.apache.accumulo.core.util.ContextFactory;
import org.apache.accumulo.core.util.Pair;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.JobContext;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.RecordReader;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.log4j.Level;
import org.junit.After;
import org.junit.Assert;
import org.junit.Test;

public class AccumuloInputFormatTest {

  @After
  public void tearDown() throws Exception {}

  /**
   * Test basic setting & getting of max versions.
   *
   * @throws IOException
   *           Signals that an I/O exception has occurred.
   */
  @Test
  public void testMaxVersions() throws IOException {
    JobContext job = ContextFactory.createJobContext();
    AccumuloInputFormat.setMaxVersions(job.getConfiguration(), 1);
    int version = AccumuloInputFormat.getMaxVersions(job.getConfiguration());
    assertEquals(1, version);
  }

  /**
   * Test max versions with an invalid value.
   *
   * @throws IOException
   *           Signals that an I/O exception has occurred.
   */
  @Test(expected = IOException.class)
  public void testMaxVersionsLessThan1() throws IOException {
    JobContext job = ContextFactory.createJobContext();
    AccumuloInputFormat.setMaxVersions(job.getConfiguration(), 0);
  }

  /**
   * Test no max version configured.
   */
  @Test
  public void testNoMaxVersion() {
    JobContext job = ContextFactory.createJobContext();
    assertEquals(-1, AccumuloInputFormat.getMaxVersions(job.getConfiguration()));
  }

  /**
   * Check that the iterator configuration is getting stored in the Job conf correctly.
   */
  @SuppressWarnings("deprecation")
  @Test
  public void testSetIterator() {
    JobContext job = ContextFactory.createJobContext();

    AccumuloInputFormat.setIterator(job, 1, "org.apache.accumulo.core.iterators.WholeRowIterator", "WholeRow");
    Configuration conf = job.getConfiguration();
    String iterators = conf.get("AccumuloInputFormat.iterators");
    assertEquals("1:org.apache.accumulo.core.iterators.WholeRowIterator:WholeRow", iterators);
  }

  @Test
  public void testAddIterator() {
    JobContext job = ContextFactory.createJobContext();

    AccumuloInputFormat.addIterator(job.getConfiguration(), new IteratorSetting(1, "WholeRow", WholeRowIterator.class));
    AccumuloInputFormat.addIterator(job.getConfiguration(), new IteratorSetting(2, "Versions", "org.apache.accumulo.core.iterators.VersioningIterator"));
    IteratorSetting iter = new IteratorSetting(3, "Count", "org.apache.accumulo.core.iterators.CountingIterator");
    iter.addOption("v1", "1");
    iter.addOption("junk", "\0omg:!\\xyzzy");
    AccumuloInputFormat.addIterator(job.getConfiguration(), iter);

    List<AccumuloIterator> list = AccumuloInputFormat.getIterators(job.getConfiguration());

    // Check the list size
    assertTrue(list.size() == 3);

    // Walk the list and make sure our settings are correct
    AccumuloIterator setting = list.get(0);
    assertEquals(1, setting.getPriority());
    assertEquals("org.apache.accumulo.core.iterators.user.WholeRowIterator", setting.getIteratorClass());
    assertEquals("WholeRow", setting.getIteratorName());

    setting = list.get(1);
    assertEquals(2, setting.getPriority());
    assertEquals("org.apache.accumulo.core.iterators.VersioningIterator", setting.getIteratorClass());
    assertEquals("Versions", setting.getIteratorName());

    setting = list.get(2);
    assertEquals(3, setting.getPriority());
    assertEquals("org.apache.accumulo.core.iterators.CountingIterator", setting.getIteratorClass());
    assertEquals("Count", setting.getIteratorName());

    List<AccumuloIteratorOption> iteratorOptions = AccumuloInputFormat.getIteratorOptions(job.getConfiguration());
    assertEquals(2, iteratorOptions.size());
    assertEquals("Count", iteratorOptions.get(0).getIteratorName());
    assertEquals("Count", iteratorOptions.get(1).getIteratorName());
    assertEquals("v1", iteratorOptions.get(0).getKey());
    assertEquals("1", iteratorOptions.get(0).getValue());
    assertEquals("junk", iteratorOptions.get(1).getKey());
    assertEquals("\0omg:!\\xyzzy", iteratorOptions.get(1).getValue());
  }

  /**
   * Test adding iterator options where the keys and values contain both the FIELD_SEPARATOR character (':') and ITERATOR_SEPARATOR (',') characters. There
   * should be no exceptions thrown when trying to parse these types of option entries.
   *
   * This test makes sure that the expected raw values, as appears in the Job, are equal to what's expected.
   */
  @Test
  public void testIteratorOptionEncoding() throws Throwable {
    String key = "colon:delimited:key";
    String value = "comma,delimited,value";
    IteratorSetting someSetting = new IteratorSetting(1, "iterator", "Iterator.class");
    someSetting.addOption(key, value);
    Job job = new Job();
    AccumuloInputFormat.addIterator(job.getConfiguration(), someSetting);

    final String rawConfigOpt = new AccumuloIteratorOption("iterator", key, value).toString();

    assertEquals(rawConfigOpt, job.getConfiguration().get("AccumuloInputFormat.iterators.options"));

    List<AccumuloIteratorOption> opts = AccumuloInputFormat.getIteratorOptions(job.getConfiguration());
    assertEquals(1, opts.size());
    assertEquals(opts.get(0).getKey(), key);
    assertEquals(opts.get(0).getValue(), value);

    someSetting.addOption(key + "2", value);
    someSetting.setPriority(2);
    someSetting.setName("it2");
    AccumuloInputFormat.addIterator(job.getConfiguration(), someSetting);
    opts = AccumuloInputFormat.getIteratorOptions(job.getConfiguration());
    assertEquals(3, opts.size());
    for (AccumuloIteratorOption opt : opts) {
      assertEquals(opt.getKey().substring(0, key.length()), key);
      assertEquals(opt.getValue(), value);
    }
  }

  /**
   * Test getting iterator settings for multiple iterators set
   */
  @SuppressWarnings("deprecation")
  @Test
  public void testGetIteratorSettings() {
    JobContext job = ContextFactory.createJobContext();

    AccumuloInputFormat.setIterator(job, 1, "org.apache.accumulo.core.iterators.WholeRowIterator", "WholeRow");
    AccumuloInputFormat.setIterator(job, 2, "org.apache.accumulo.core.iterators.VersioningIterator", "Versions");
    AccumuloInputFormat.setIterator(job, 3, "org.apache.accumulo.core.iterators.CountingIterator", "Count");

    List<AccumuloIterator> list = AccumuloInputFormat.getIterators(job);

    // Check the list size
    assertTrue(list.size() == 3);

    // Walk the list and make sure our settings are correct
    AccumuloIterator setting = list.get(0);
    assertEquals(1, setting.getPriority());
    assertEquals("org.apache.accumulo.core.iterators.WholeRowIterator", setting.getIteratorClass());
    assertEquals("WholeRow", setting.getIteratorName());

    setting = list.get(1);
    assertEquals(2, setting.getPriority());
    assertEquals("org.apache.accumulo.core.iterators.VersioningIterator", setting.getIteratorClass());
    assertEquals("Versions", setting.getIteratorName());

    setting = list.get(2);
    assertEquals(3, setting.getPriority());
    assertEquals("org.apache.accumulo.core.iterators.CountingIterator", setting.getIteratorClass());
    assertEquals("Count", setting.getIteratorName());

  }

  /**
   * Check that the iterator options are getting stored in the Job conf correctly.
   */
  @SuppressWarnings("deprecation")
  @Test
  public void testSetIteratorOption() {
    JobContext job = ContextFactory.createJobContext();
    AccumuloInputFormat.setIteratorOption(job, "someIterator", "aKey", "aValue");

    Configuration conf = job.getConfiguration();
    String options = conf.get("AccumuloInputFormat.iterators.options");
    assertEquals(new String("someIterator:aKey:aValue"), options);
  }

  /**
   * Test getting iterator options for multiple options set
   */
  @SuppressWarnings("deprecation")
  @Test
  public void testGetIteratorOption() {
    JobContext job = ContextFactory.createJobContext();

    AccumuloInputFormat.setIteratorOption(job, "iterator1", "key1", "value1");
    AccumuloInputFormat.setIteratorOption(job, "iterator2", "key2", "value2");
    AccumuloInputFormat.setIteratorOption(job, "iterator3", "key3", "value3");

    List<AccumuloIteratorOption> list = AccumuloInputFormat.getIteratorOptions(job);

    // Check the list size
    assertEquals(3, list.size());

    // Walk the list and make sure all the options are correct
    AccumuloIteratorOption option = list.get(0);
    assertEquals("iterator1", option.getIteratorName());
    assertEquals("key1", option.getKey());
    assertEquals("value1", option.getValue());

    option = list.get(1);
    assertEquals("iterator2", option.getIteratorName());
    assertEquals("key2", option.getKey());
    assertEquals("value2", option.getValue());

    option = list.get(2);
    assertEquals("iterator3", option.getIteratorName());
    assertEquals("key3", option.getKey());
    assertEquals("value3", option.getValue());
  }

  @SuppressWarnings("deprecation")
  @Test
  public void testSetRegex() {
    JobContext job = ContextFactory.createJobContext();

    String regex = ">\"*%<>\'\\";

    AccumuloInputFormat.setRegex(job, org.apache.accumulo.core.client.mapreduce.InputFormatBase.RegexType.ROW, regex);

    assertTrue(regex.equals(AccumuloInputFormat.getRegex(job, org.apache.accumulo.core.client.mapreduce.InputFormatBase.RegexType.ROW)));
  }

  static class TestMapper extends Mapper<Key,Value,Key,Value> {
    Key key = null;
    int count = 0;

    @Override
    protected void map(Key k, Value v, Context context) throws IOException, InterruptedException {
      if (key != null)
        assertEquals(key.getRow().toString(), new String(v.get()));
      assertEquals(k.getRow(), new Text(String.format("%09x", count + 1)));
      assertEquals(new String(v.get()), String.format("%09x", count));
      key = new Key(k);
      count++;
    }
  }

  @Test
  public void testMap() throws Exception {
    MockInstance mockInstance = new MockInstance("testmapinstance");
    Connector c = mockInstance.getConnector("root", new byte[] {});
    c.tableOperations().create("testtable");
    BatchWriter bw = c.createBatchWriter("testtable", 10000L, 1000L, 4);
    for (int i = 0; i < 100; i++) {
      Mutation m = new Mutation(new Text(String.format("%09x", i + 1)));
      m.put(new Text(), new Text(), new Value(String.format("%09x", i).getBytes()));
      bw.addMutation(m);
    }
    bw.close();

    Job job = new Job(new Configuration());
    job.setInputFormatClass(AccumuloInputFormat.class);
    job.setMapperClass(TestMapper.class);
    job.setNumReduceTasks(0);
    AccumuloInputFormat.setInputInfo(job.getConfiguration(), "root", "".getBytes(), "testtable", new Authorizations());
    AccumuloInputFormat.setMockInstance(job.getConfiguration(), "testmapinstance");

    AccumuloInputFormat input = new AccumuloInputFormat();
    List<InputSplit> splits = input.getSplits(job);
    assertEquals(splits.size(), 1);

    TestMapper mapper = (TestMapper) job.getMapperClass().newInstance();
    for (InputSplit split : splits) {
      RangeInputSplit risplit = (RangeInputSplit) split;
      Assert.assertEquals("root", risplit.getUsername());
      Assert.assertArrayEquals(new byte[0], risplit.getPassword());
      Assert.assertEquals("testtable", risplit.getTable());
      Assert.assertEquals(new Authorizations(), risplit.getAuths());
      Assert.assertEquals("testmapinstance", risplit.getInstanceName());

      TaskAttemptContext tac = ContextFactory.createTaskAttemptContext(job);
      RecordReader<Key,Value> reader = input.createRecordReader(split, tac);
      Mapper<Key,Value,Key,Value>.Context context = ContextFactory.createMapContext(mapper, tac, reader, null, split);
      reader.initialize(split, context);
      mapper.run(context);
    }
  }

  @Test
  public void testSimple() throws Exception {
    MockInstance mockInstance = new MockInstance("testmapinstance");
    Connector c = mockInstance.getConnector("root", new byte[] {});
    c.tableOperations().create("testtable2");
    BatchWriter bw = c.createBatchWriter("testtable2", 10000L, 1000L, 4);
    for (int i = 0; i < 100; i++) {
      Mutation m = new Mutation(new Text(String.format("%09x", i + 1)));
      m.put(new Text(), new Text(), new Value(String.format("%09x", i).getBytes()));
      bw.addMutation(m);
    }
    bw.close();

    JobContext job = ContextFactory.createJobContext();
    AccumuloInputFormat.setInputInfo(job.getConfiguration(), "root", "".getBytes(), "testtable2", new Authorizations());
    AccumuloInputFormat.setMockInstance(job.getConfiguration(), "testmapinstance");
    AccumuloInputFormat input = new AccumuloInputFormat();
    RangeInputSplit ris = new RangeInputSplit();
    TaskAttemptContext tac = ContextFactory.createTaskAttemptContext(job);
    RecordReader<Key,Value> rr = input.createRecordReader(ris, tac);
    rr.initialize(ris, tac);

    TestMapper mapper = new TestMapper();
    Mapper<Key,Value,Key,Value>.Context context = ContextFactory.createMapContext(mapper, tac, rr, null, ris);
    rr.initialize(ris, tac);
    while (rr.nextKeyValue()) {
      mapper.map(rr.getCurrentKey(), rr.getCurrentValue(), context);
    }
  }

  @SuppressWarnings("deprecation")
  @Test
  public void testRegex() throws Exception {
    MockInstance mockInstance = new MockInstance("testmapinstance");
    Connector c = mockInstance.getConnector("root", new byte[] {});
    c.tableOperations().create("testtable3");
    BatchWriter bw = c.createBatchWriter("testtable3", 10000L, 1000L, 4);
    for (int i = 0; i < 100; i++) {
      Mutation m = new Mutation(new Text(String.format("%09x", i + 1)));
      m.put(new Text(), new Text(), new Value(String.format("%09x", i).getBytes()));
      bw.addMutation(m);
    }
    bw.close();

    JobContext job = ContextFactory.createJobContext();
    AccumuloInputFormat.setInputInfo(job.getConfiguration(), "root", "".getBytes(), "testtable3", new Authorizations());
    AccumuloInputFormat.setMockInstance(job.getConfiguration(), "testmapinstance");
    final String regex = ".*1.*";
    AccumuloInputFormat.setRegex(job, org.apache.accumulo.core.client.mapreduce.InputFormatBase.RegexType.ROW, regex);
    AccumuloInputFormat input = new AccumuloInputFormat();
    RangeInputSplit ris = new RangeInputSplit();
    TaskAttemptContext tac = ContextFactory.createTaskAttemptContext(job);
    RecordReader<Key,Value> rr = input.createRecordReader(ris, tac);
    rr.initialize(ris, tac);

    Pattern p = Pattern.compile(regex);
    while (rr.nextKeyValue()) {
      Assert.assertTrue(p.matcher(rr.getCurrentKey().getRow().toString()).matches());
    }
  }

  @SuppressWarnings("deprecation")
  @Test
  public void testCorrectRangeInputSplits() throws Exception {
    JobContext job = ContextFactory.createJobContext();

    String username = "user", table = "table", rowRegex = "row.*", colfRegex = "colf.*", colqRegex = "colq.*";
    String valRegex = "val.*", instance = "instance";
    byte[] password = "password".getBytes();
    Authorizations auths = new Authorizations("foo");
    Collection<Pair<Text,Text>> fetchColumns = Collections.singleton(new Pair<Text,Text>(new Text("foo"), new Text("bar")));
    boolean isolated = true, localIters = true;
    int maxVersions = 5;
    Level level = Level.WARN;

    Instance inst = new MockInstance(instance);
    Connector connector = inst.getConnector(username, password);
    connector.tableOperations().create(table);

    AccumuloInputFormat.setInputInfo(job, username, password, table, auths);
    AccumuloInputFormat.setMockInstance(job, instance);
    AccumuloInputFormat.setRegex(job, org.apache.accumulo.core.client.mapreduce.InputFormatBase.RegexType.ROW, rowRegex);
    AccumuloInputFormat.setRegex(job, org.apache.accumulo.core.client.mapreduce.InputFormatBase.RegexType.COLUMN_FAMILY, colfRegex);
    AccumuloInputFormat.setRegex(job, org.apache.accumulo.core.client.mapreduce.InputFormatBase.RegexType.COLUMN_QUALIFIER, colqRegex);
    AccumuloInputFormat.setRegex(job, org.apache.accumulo.core.client.mapreduce.InputFormatBase.RegexType.VALUE, valRegex);
    AccumuloInputFormat.setIsolated(job, isolated);
    AccumuloInputFormat.setLocalIterators(job, localIters);
    AccumuloInputFormat.setMaxVersions(job, maxVersions);
    AccumuloInputFormat.fetchColumns(job, fetchColumns);
    AccumuloInputFormat.setLogLevel(job, level);

    AccumuloInputFormat aif = new AccumuloInputFormat();

    List<InputSplit> splits = aif.getSplits(job);

    Assert.assertEquals(1, splits.size());

    InputSplit split = splits.get(0);

    Assert.assertEquals(RangeInputSplit.class, split.getClass());

    RangeInputSplit risplit = (RangeInputSplit) split;

    Assert.assertEquals(username, risplit.getUsername());
    Assert.assertEquals(table, risplit.getTable());
    Assert.assertArrayEquals(password, risplit.getPassword());
    Assert.assertEquals(auths, risplit.getAuths());
    Assert.assertEquals(instance, risplit.getInstanceName());
    Assert.assertEquals(rowRegex, risplit.getRowRegex());
    Assert.assertEquals(colfRegex, risplit.getColfamRegex());
    Assert.assertEquals(colqRegex, risplit.getColqualRegex());
    Assert.assertEquals(valRegex, risplit.getValueRegex());
    Assert.assertEquals(isolated, risplit.isIsolatedScan());
    Assert.assertEquals(localIters, risplit.usesLocalIterators());
    Assert.assertEquals(maxVersions, risplit.getMaxVersions().intValue());
    Assert.assertEquals(fetchColumns, risplit.getFetchedColumns());
    Assert.assertEquals(level, risplit.getLogLevel());
  }

  @Test
  public void testPartialInputSplitDelegationToConfiguration() throws Exception {
    MockInstance mockInstance = new MockInstance("testPartialInputSplitDelegationToConfiguration");
    Connector c = mockInstance.getConnector("root", new byte[] {});
    c.tableOperations().create("testtable");
    BatchWriter bw = c.createBatchWriter("testtable", 10000L, 1000L, 4);
    for (int i = 0; i < 100; i++) {
      Mutation m = new Mutation(new Text(String.format("%09x", i + 1)));
      m.put(new Text(), new Text(), new Value(String.format("%09x", i).getBytes()));
      bw.addMutation(m);
    }
    bw.close();

    Job job = new Job(new Configuration());
    job.setInputFormatClass(AccumuloInputFormat.class);
    job.setMapperClass(TestMapper.class);
    job.setNumReduceTasks(0);
    AccumuloInputFormat.setInputInfo(job.getConfiguration(), "root", "".getBytes(), "testtable", new Authorizations());
    AccumuloInputFormat.setMockInstance(job.getConfiguration(), "testPartialInputSplitDelegationToConfiguration");

    AccumuloInputFormat input = new AccumuloInputFormat();
    List<InputSplit> splits = input.getSplits(job);
    assertEquals(splits.size(), 1);

    TestMapper mapper = (TestMapper) job.getMapperClass().newInstance();

    RangeInputSplit emptySplit = new RangeInputSplit();

    // Using an empty split should fall back to the information in the Job's Configuration
    TaskAttemptContext tac = ContextFactory.createTaskAttemptContext(job);
    RecordReader<Key,Value> reader = input.createRecordReader(emptySplit, tac);
    Mapper<Key,Value,Key,Value>.Context context = ContextFactory.createMapContext(mapper, tac, reader, null, emptySplit);
    reader.initialize(emptySplit, context);
    mapper.run(context);
  }

  @Test(expected = IOException.class)
  public void testPartialFailedInputSplitDelegationToConfiguration() throws Exception {
    MockInstance mockInstance = new MockInstance("testPartialFailedInputSplitDelegationToConfiguration");
    Connector c = mockInstance.getConnector("root", new byte[] {});
    c.tableOperations().create("testtable");
    BatchWriter bw = c.createBatchWriter("testtable", 10000L, 1000L, 4);
    for (int i = 0; i < 100; i++) {
      Mutation m = new Mutation(new Text(String.format("%09x", i + 1)));
      m.put(new Text(), new Text(), new Value(String.format("%09x", i).getBytes()));
      bw.addMutation(m);
    }
    bw.close();

    Job job = new Job(new Configuration());
    job.setInputFormatClass(AccumuloInputFormat.class);
    job.setMapperClass(TestMapper.class);
    job.setNumReduceTasks(0);
    AccumuloInputFormat.setInputInfo(job.getConfiguration(), "root", "".getBytes(), "testtable", new Authorizations());
    AccumuloInputFormat.setMockInstance(job.getConfiguration(), "testPartialFailedInputSplitDelegationToConfiguration");

    AccumuloInputFormat input = new AccumuloInputFormat();
    List<InputSplit> splits = input.getSplits(job);
    assertEquals(splits.size(), 1);

    TestMapper mapper = (TestMapper) job.getMapperClass().newInstance();

    RangeInputSplit emptySplit = new RangeInputSplit();
    emptySplit.setUsername("root");
    emptySplit.setPassword("anythingelse".getBytes());

    // Using an empty split should fall back to the information in the Job's Configuration
    TaskAttemptContext tac = ContextFactory.createTaskAttemptContext(job);
    RecordReader<Key,Value> reader = input.createRecordReader(emptySplit, tac);
    Mapper<Key,Value,Key,Value>.Context context = ContextFactory.createMapContext(mapper, tac, reader, null, emptySplit);
    reader.initialize(emptySplit, context);
    mapper.run(context);
  }
}
TOP

Related Classes of org.apache.accumulo.core.client.mapreduce.AccumuloInputFormatTest$TestMapper

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.