Source Code of org.apache.hadoop.zebra.io.TestColumnGroupReaders

/**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements. See the NOTICE file distributed with this
 * work for additional information regarding copyright ownership. The ASF
 * licenses this file to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 * http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 * License for the specific language governing permissions and limitations under
 * the License.
 */
package org.apache.hadoop.zebra.io;


import java.io.IOException;
import java.util.List;
import java.util.Map;
import java.util.SortedMap;
import java.util.TreeMap;


import junit.framework.Assert;
import junit.framework.TestCase;


import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.LocalFileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.RawLocalFileSystem;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.zebra.io.BlockDistribution;
import org.apache.hadoop.zebra.io.ColumnGroup;
import org.apache.hadoop.zebra.io.TableInserter;
import org.apache.hadoop.zebra.io.TableScanner;
import org.apache.hadoop.zebra.io.ColumnGroup.Reader.CGRangeSplit;
import org.apache.hadoop.zebra.types.ParseException;
import org.apache.hadoop.zebra.types.Schema;
import org.apache.hadoop.zebra.types.TypesUtils;
import org.apache.pig.backend.executionengine.ExecException;
import org.apache.pig.data.Tuple;
import org.junit.AfterClass;
import org.junit.BeforeClass;
import org.junit.Test;


public class TestColumnGroupReaders {
  final static String outputFile = "TestColumnGroupReaders";
  final static private Configuration conf = new Configuration();
  private static FileSystem fs;
  private static Path path;
  private static ColumnGroup.Writer writer;


  @BeforeClass
  public static void setUpOnce() throws IOException {
    // set default file system to local file system
    conf.set("fs.file.impl", "org.apache.hadoop.fs.LocalFileSystem");


    // must set a conf here to the underlying FS, or it barks
    RawLocalFileSystem rawLFS = new RawLocalFileSystem();
    rawLFS.setConf(conf);
    fs = new LocalFileSystem(rawLFS);
    path = new Path(fs.getWorkingDirectory(), outputFile);
    System.out.println("output file: " + path);
  }


  @AfterClass
  public static void tearDownOnce() throws IOException {
    finish();
  }


  @SuppressWarnings("unchecked")
  @Test
  public void testInsert2Inserters() throws ExecException, IOException,
      ParseException {
    System.out.println("testInsert2Inserters");
    boolean sorted = false; // true;
    writer = new ColumnGroup.Writer(path, "col1, colTWO", sorted, "pig",
        "lzo2", true, conf);
    TableInserter ins1 = writer.getInserter("part1", false);
    TableInserter ins2 = writer.getInserter("part2", false);


    // row 1
    Tuple row = TypesUtils.createTuple(writer.getSchema());
    row.set(0, "val1");


    SortedMap<String, String> map = new TreeMap<String, String>();
    map.put("john", "boy");
    row.set(1, map);


    ins1.insert(new BytesWritable("key11".getBytes()), row);
    ins2.insert(new BytesWritable("key21".getBytes()), row);


    // row 2
    TypesUtils.resetTuple(row);
    row.set(0, "val2");
    map.put("joe", "boy");
    map.put("jane", "girl");
    // map should contain 3 k->v pairs
    row.set(1, map);


    ins2.insert(new BytesWritable("key22".getBytes()), row);
    ins2.insert(new BytesWritable("key23".getBytes()), row);
    // ins2.close();
    BytesWritable key12 = new BytesWritable("key12".getBytes());
    ins1.insert(key12, row);


    ins1.close();
    ins2.close();
    finish();


    // test without beginKey/endKey
    ColumnGroup.Reader reader = new ColumnGroup.Reader(path, conf);
    List<CGRangeSplit> listRanges = reader.rangeSplit(2);
    // TableScanner scanner = reader.getScanner(null, null, false);
    TableScanner scanner = reader.getScanner(listRanges.get(0), false);
    BytesWritable key = new BytesWritable();
    scanner.getKey(key);
    Assert
        .assertTrue(key.compareTo(new BytesWritable("key11".getBytes())) == 0);
    TypesUtils.resetTuple(row);
    scanner.getValue(row);
    Assert.assertEquals("val1", row.get(0));
    Map<String, String> mapFld2 = (Map<String, String>) row.get(1);
    Assert.assertEquals("boy", mapFld2.get("john"));
    scanner.close();
    scanner = null;


    // test with beginKey
    BytesWritable key22 = new BytesWritable("key22".getBytes());
    // TableScanner scanner2 = reader.getScanner(key22, null, true); // for
    // sorted
    TableScanner scanner2 = reader.getScanner(listRanges.get(1), true);
    BytesWritable key2 = new BytesWritable();
    scanner2.advance();// not for sorted
    scanner2.getKey(key2);
    Assert.assertEquals(key2, key22);
    TypesUtils.resetTuple(row);
    scanner2.getValue(row);
    Assert.assertEquals("val2", row.get(0));


    // test advance
    scanner2.advance();
    BytesWritable key4 = new BytesWritable();
    scanner2.getKey(key4);
    Assert
        .assertTrue(key4.compareTo(new BytesWritable("key23".getBytes())) == 0);
    TypesUtils.resetTuple(row);
    scanner2.getValue(row);
    Assert.assertEquals("val2", row.get(0));


    Map<String, String> mapFld5 = (Map<String, String>) row.get(1);
    Assert.assertEquals("girl", mapFld5.get("jane"));


    // test seekTo
    // Assert.assertTrue("failed to locate key: " + key22,
    // scanner2.seekTo(key22));
    // BytesWritable key22Read = new BytesWritable();
    // scanner2.getKey(key22Read);
    // Assert.assertEquals(new BytesWritable("key22".getBytes()), key22Read);


    // Map<String, Long> map;
    BlockDistribution dist = reader.getBlockDistribution(listRanges.get(0));
    long n = dist.getLength();


    reader.close();
    close();
  }


  @Test
  public void testMultiWriters() throws ExecException, Exception {
    System.out.println("testMultiWriters");
    ColumnGroup.Writer writer1 = writeOnePart("col1, col2, col3", 1);
    ColumnGroup.Writer writer2 = writeOnePart(null, 2);
    ColumnGroup.Writer writer3 = writeOnePart(null, 3);


    writer1.finish();
    writer2.finish();
    writer3.finish();


    // read in parts
    readOnePart(1);
    readOnePart(2);
    readOnePart(3);


    // read in one big sequence
    readInSequence(3);


    readProjection(1);


    close();


  }


  private static ColumnGroup.Writer writeOnePart(String schemaString, int count)
      throws IOException, ExecException, ParseException {
    ColumnGroup.Writer writer = null;
    if (schemaString != null) {
      writer = new ColumnGroup.Writer(path, schemaString, false, "pig", "lzo2",
          true, conf);
    } else {
      writer = new ColumnGroup.Writer(path, conf);
    }
    TableInserter ins = writer.getInserter(String.format("part-%06d", count),
        false);
    Tuple row = TypesUtils.createTuple(writer.getSchema());
    SortedMap<String, String> map = new TreeMap<String, String>();
    for (int nx = 0; nx < count; nx++) {
      row.set(0, String.format("smpcol%02d%02d", count, nx + 1));


      map.put(String.format("mapcolkey%02d%02d", count, nx + 1), String.format(
          "mapcolvalue%02d%02d", count, nx + 1));
      row.set(1, map);


      row.set(2, String.format("col3_%02d%02d", count, nx + 1));


      ins.insert(new BytesWritable(String.format("key%02d%02d", count, nx + 1)
          .getBytes()), row);
    }
    ins.close();
    return writer;
  }


  private static void readInSequence(int count) throws IOException,
      ExecException, ParseException {
    ColumnGroup.Reader reader = new ColumnGroup.Reader(path, conf);
    TableScanner scanner = reader.getScanner(null, true);


    Tuple row = TypesUtils.createTuple(reader.getSchema());
    BytesWritable key = new BytesWritable();
    for (int partIdx = 0; partIdx < count; ++partIdx) {
      for (int keyIdx = 0; keyIdx < partIdx + 1; keyIdx++) {
        scanner.getKey(key);
        String keyString = String
            .format("key%02d%02d", partIdx + 1, keyIdx + 1);
        Assert.assertTrue(new String(key.get()) + " != " + keyString, key
            .equals(new BytesWritable(keyString.getBytes())));
        scanner.advance();
      }
    }


    scanner.close();
    scanner = null;
    reader.close();
  }


  private static void readOnePart(int count) throws IOException, ExecException,
      ParseException {
    ColumnGroup.Reader reader = new ColumnGroup.Reader(path, conf);
    TableScanner scanner = reader.getScanner(reader.rangeSplit(5)
        .get(count - 1), true);


    Tuple row = TypesUtils.createTuple(reader.getSchema());
    BytesWritable key = new BytesWritable();
    for (int nx = 0; nx < count; ++nx) {
      scanner.getKey(key);
      String keyString = String.format("key%02d%02d", count, nx + 1);
      Assert.assertTrue("Not equal to " + keyString, key
          .equals(new BytesWritable(keyString.getBytes())));
      TypesUtils.resetTuple(row);
      scanner.getValue(row);


      String simpVal = String.format("smpcol%02d%02d", count, nx + 1);
      Assert.assertEquals(simpVal, row.get(0));


      String mapKey = String.format("mapcolkey%02d%02d", count, nx + 1);
      String mapVal = String.format("mapcolvalue%02d%02d", count, nx + 1);
      Map<String, String> mapFld2 = (Map<String, String>) row.get(1);
      Assert.assertEquals(mapVal, mapFld2.get(mapKey));


      String val3 = String.format("col3_%02d%02d", count, nx + 1);
      Assert.assertEquals(val3, row.get(2));


      if (nx < count - 1) {
        scanner.advance();
      }
    }
    scanner.close();
    scanner = null;
    reader.close();
  }


  private static void readProjection(int count) throws IOException,
      ExecException, ParseException {
    ColumnGroup.Reader reader = new ColumnGroup.Reader(path, conf);
    reader.setProjection("col2, col3, foo, col1");
    TableScanner scanner = reader.getScanner(null, true);


    Tuple row = TypesUtils.createTuple(reader.getSchema());
    BytesWritable key = new BytesWritable();
    scanner.getKey(key);


    String keyString = String.format("key%02d%02d", 1, 1);
    Assert.assertTrue("Not equal to " + keyString, key
        .equals(new BytesWritable(keyString.getBytes())));
    TypesUtils.resetTuple(row);
    scanner.getValue(row);


    // physical 3, logical 2
    String val3 = String.format("col3_%02d%02d", 1, 1);
    Assert.assertEquals(val3, row.get(1));


    // physical 2, logical 1
    String mapKey = String.format("mapcolkey%02d%02d", 1, 1);
    String mapVal = String.format("mapcolvalue%02d%02d", 1, 1);
    Map<String, String> mapFld2 = (Map<String, String>) row.get(0);
    Assert.assertEquals(mapVal, mapFld2.get(mapKey));


    // physical NONE, logical 3
    Assert.assertNull(row.get(2));


    String simpVal = String.format("smpcol%02d%02d", 1, 1);
    Assert.assertEquals(simpVal, row.get(3));


    // move to next row
    scanner.advance();
    scanner.getValue(row);


    simpVal = String.format("smpcol%02d%02d", 2, 1);
    Assert.assertEquals(simpVal, row.get(3));


    scanner.close();
    scanner = null;
    reader.close();
  }


  private static void finish() throws IOException {
    if (writer != null) {
      writer.finish();
    }
  }


  private static void close() throws IOException {
    if (writer != null) {
      writer.close();
      writer = null;
    }
    ColumnGroup.drop(path, conf);
  }
}
Source Code of org.apache.hadoop.zebra.io.TestColumnGroupReaders

Related Classes of org.apache.hadoop.zebra.io.TestColumnGroupReaders