package cc.twittertools.hbase;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.List;
import java.util.NavigableMap;
import java.util.Set;
import java.util.TreeMap;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.MasterNotRunningException;
import org.apache.hadoop.hbase.ZooKeeperConnectionException;
import org.apache.hadoop.hbase.client.Delete;
import org.apache.hadoop.hbase.client.Get;
import org.apache.hadoop.hbase.client.HBaseAdmin;
import org.apache.hadoop.hbase.client.HTableInterface;
import org.apache.hadoop.hbase.client.HTablePool;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.log4j.Logger;
public class WordCountDAO {
private final static int DAY = 60*24;
private final static int INTERVAL = 5;
public static int NUM_INTERVALS = DAY/INTERVAL;
public static final byte[] TABLE_NAME = Bytes.toBytes("wordcount");
public static final byte[] COLUMN_FAMILY = Bytes.toBytes("count");
private static final Logger log = Logger.getLogger(WordCountDAO.class);
private HTablePool pool;
public WordCountDAO(HTablePool pool) {
this.pool = pool;
public void CreateTable() throws IOException, ZooKeeperConnectionException{
Configuration conf = HBaseConfiguration.create();
HBaseAdmin hbase = new HBaseAdmin(conf);
HTableDescriptor[] wordcounts = hbase.listTables("wordcount");
if(wordcounts.length != 0){ //Drop Table if Exists
HTableDescriptor wordcount = new HTableDescriptor(TABLE_NAME);
// Cannot edit a stucture on an active table.
HColumnDescriptor columnFamily = new HColumnDescriptor(COLUMN_FAMILY);
hbase.addColumn(TABLE_NAME, columnFamily);
private static Get mkGet(String word) throws IOException {
log.debug(String.format("Creating Get for %s", word));
Get g = new Get(Bytes.toBytes(word));
return g;
private static Put mkPut(WordCount w){
log.debug(String.format("Creating Put for %s", w.word));
Put p = new Put(w.word);
// add integer compression here
// convert 2-d byte array to 1-d byte array
byte[] storage = new byte[NUM_INTERVALS*Integer.SIZE/Byte.SIZE];
for(int i=0; i< NUM_INTERVALS; i++){
for(int j=0; j<Integer.SIZE/Byte.SIZE; j++){
storage[i*Integer.SIZE/Byte.SIZE+j] = w.count[i][j];
p.add(COLUMN_FAMILY, w.column_id, storage);
return p;
private static Delete mkDel(String word) {
log.debug(String.format("Creating Delete for %s", word));
Delete d = new Delete(Bytes.toBytes(word));
return d;
private static Scan mkScan() {
Scan s = new Scan();
return s;
public void addWordCount(WordCount w) throws IOException{
HTableInterface words = pool.getTable(TABLE_NAME);
Put p = mkPut(w);
public List<WordCount> getWordCount(String word) throws IOException {
HTableInterface words = pool.getTable(TABLE_NAME);
Get g = mkGet(word);
Result result = words.get(g);
if (result.isEmpty()) {"word %s not found.", word));
return null;
List<WordCount> wordCounts = WordCount.GetWordCountFromResults(result);
return wordCounts;
public void deleteUser(String word) throws IOException {
HTableInterface words = pool.getTable(TABLE_NAME);
Delete d = mkDel(word);
public static class WordCount{
public byte[] word;
public byte[] column_id;
public byte[][] count;
public WordCount(byte[] word, byte[] column_id){
this.word = word;
this.column_id = column_id;
this.count = new byte[NUM_INTERVALS][];
for(int i=0; i < NUM_INTERVALS; i++){
this.count[i] = Bytes.toBytes(0);
public WordCount(String word, String column_id){
this.word = Bytes.toBytes(word);
this.column_id = Bytes.toBytes(column_id);
this.count = new byte[NUM_INTERVALS][];
for(int i=0; i < NUM_INTERVALS; i++){
this.count[i] = Bytes.toBytes(0);
private WordCount(byte[] word, byte[] column_id, byte[][] count){
this.word = word;
this.column_id = column_id;
this.count = count;
public static List<WordCount> GetWordCountFromResults(Result r){
List<WordCount> wordCounts = new ArrayList<WordCount>();
byte[] word = r.getRow();
// Map from column qualifiers to values
NavigableMap<byte[],byte[]> familyMap = r.getFamilyMap(COLUMN_FAMILY);
for(byte[] column: familyMap.keySet()){
byte[] value = familyMap.get(column);
// decompression
byte[][] count = new byte[NUM_INTERVALS][Integer.SIZE/Byte.SIZE];
for(int i=0; i<NUM_INTERVALS; i++){
for(int j=0; j<Integer.SIZE/Byte.SIZE; j++){
count[i][j] = value[i*Integer.SIZE/Byte.SIZE+j];
WordCount w = new WordCount(word, column, count);
return wordCounts;
public void setCount(int interval, int count){
this.count[interval] = Bytes.toBytes(count);