Package com.infochimps.hbase.pig

Source Code of com.infochimps.hbase.pig.DynamicFamilyStorage

package com.infochimps.hbase.pig;

import java.io.ByteArrayOutputStream;
import java.io.DataOutputStream;
import java.io.IOException;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.NavigableMap;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.filter.BinaryComparator;
import org.apache.hadoop.hbase.filter.CompareFilter.CompareOp;
import org.apache.hadoop.hbase.filter.FilterList;
import org.apache.hadoop.hbase.filter.RowFilter;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableInputFormat;
import org.apache.hadoop.hbase.mapreduce.TableOutputFormat;
import org.apache.hadoop.hbase.util.Base64;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.mapreduce.InputFormat;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.OutputFormat;
import org.apache.hadoop.mapreduce.RecordReader;
import org.apache.hadoop.mapreduce.RecordWriter;
import org.apache.hadoop.conf.Configuration;
import org.apache.pig.LoadCaster;
import org.apache.pig.StoreFunc;
import org.apache.pig.LoadPushDown;
import org.apache.pig.LoadStoreCaster;
import org.apache.pig.ResourceSchema;
import org.apache.pig.ResourceSchema.ResourceFieldSchema;
import org.apache.pig.StoreFuncInterface;
import org.apache.pig.backend.hadoop.hbase.HBaseBinaryConverter;
import org.apache.pig.builtin.Utf8StorageConverter;
import org.apache.pig.data.DataBag;
import org.apache.pig.data.DataByteArray;
import org.apache.pig.data.DataType;
import org.apache.pig.data.Tuple;
import org.apache.pig.data.TupleFactory;
import org.apache.pig.data.BagFactory;
import org.apache.pig.impl.logicalLayer.FrontendException;
import org.apache.pig.impl.util.Utils;


import com.google.common.collect.Lists;

/**
* Different from default HBaseStorage in that a field in the data is used as the column family name
* and another field is used as the column name.
*
*/
public class DynamicFamilyStorage extends StoreFunc implements StoreFuncInterface {
   
    private static final Log LOG = LogFactory.getLog(DynamicFamilyStorage.class);

    private final static String STRING_CASTER   = "UTF8StorageConverter";
    private final static String BYTE_CASTER     = "HBaseBinaryConverter";
    private final static String CASTER_PROPERTY = "pig.hbase.caster";
   
    private List<byte[]> columnList_ = Lists.newArrayList();
    private HTable m_table;

    private Configuration m_conf;
    private RecordReader reader;
    private RecordWriter writer;
    private Scan scan;

    private LoadCaster caster_;
    private ResourceSchema schema_;

    public DynamicFamilyStorage() throws IOException {
        m_conf  = HBaseConfiguration.create();
        caster_ = new Utf8StorageConverter();
    }

    @Override
    public OutputFormat getOutputFormat() throws IOException {
        TableOutputFormat outputFormat = new TableOutputFormat();
        return outputFormat;
    }

    @Override
    public void checkSchema(ResourceSchema s) throws IOException {
        if (! (caster_ instanceof LoadStoreCaster)) {
            LOG.error("Caster must implement LoadStoreCaster for writing to HBase.");
            throw new IOException("Bad Caster " + caster_.getClass());
        }
        schema_ = s;
    }

    @Override
    public void prepareToWrite(@SuppressWarnings("rawtypes") RecordWriter writer) throws IOException {
        this.writer = writer;
    }

    @SuppressWarnings("unchecked")
    @Override
    public void putNext(Tuple t) throws IOException {
        byte[] rowKey = objToBytes(t.get(0), DataType.findType(t.get(0))); // Convert row key to byte[]
       
        if(rowKey != null && t.size() >= 4) {
            long ts = System.currentTimeMillis();
            Put put = new Put(rowKey);
            put.setWriteToWAL(false);
           
            byte[] family  = objToBytes(t.get(1), DataType.findType(t.get(1)));
            byte[] colName = objToBytes(t.get(2), DataType.findType(t.get(2)));
            byte[] colVal  = objToBytes(t.get(3), DataType.findType(t.get(3)));
            if (t.size() == 5) {
                ts = Long.parseLong(t.get(4).toString());
            }
            if (colVal!=null) {
                put.add(family, colName, ts, colVal);
            }
            try {
                if (!put.isEmpty()) { // Don't try to write a row with 0 columns
                    writer.write(null, put);
                }
            } catch (InterruptedException e) {
                throw new IOException(e);
            }
        }
    }
   
    @SuppressWarnings("unchecked")
    private byte[] objToBytes(Object o, byte type) throws IOException {
        LoadStoreCaster caster = (LoadStoreCaster) caster_;
        switch (type) {
        case DataType.BYTEARRAY: return ((DataByteArray) o).get();
        case DataType.BAG: return caster.toBytes((DataBag) o);
        case DataType.CHARARRAY: return caster.toBytes((String) o);
        case DataType.DOUBLE: return caster.toBytes((Double) o);
        case DataType.FLOAT: return caster.toBytes((Float) o);
        case DataType.INTEGER: return caster.toBytes((Integer) o);
        case DataType.LONG: return caster.toBytes((Long) o);
       
        // The type conversion here is unchecked.
        // Relying on DataType.findType to do the right thing.
        case DataType.MAP: return caster.toBytes((Map<String, Object>) o);
       
        case DataType.NULL: return null;
        case DataType.TUPLE: return caster.toBytes((Tuple) o);
        case DataType.ERROR: throw new IOException("Unable to determine type of " + o.getClass());
        default: throw new IOException("Unable to find a converter for tuple field " + o);
        }
    }

    @Override
    public String relToAbsPathForStoreLocation(String location, Path curDir)
    throws IOException {
        return location;
    }

    @Override
    public void setStoreFuncUDFContextSignature(String signature) { }

    @Override
    public void setStoreLocation(String location, Job job) throws IOException {
        if (location.startsWith("hbase://")){
            job.getConfiguration().set(TableOutputFormat.OUTPUT_TABLE, location.substring(8));
        }else{
            job.getConfiguration().set(TableOutputFormat.OUTPUT_TABLE, location);
        }
    }

    @Override
    public void cleanupOnFailure(String location, Job job) throws IOException {

    }
}
TOP

Related Classes of com.infochimps.hbase.pig.DynamicFamilyStorage

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.