Package cc.twittertools.hbase

Source Code of cc.twittertools.hbase.WordCountDAO

package cc.twittertools.hbase;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.List;
import java.util.NavigableMap;
import java.util.Set;
import java.util.TreeMap;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.MasterNotRunningException;
import org.apache.hadoop.hbase.ZooKeeperConnectionException;
import org.apache.hadoop.hbase.client.Delete;
import org.apache.hadoop.hbase.client.Get;
import org.apache.hadoop.hbase.client.HBaseAdmin;
import org.apache.hadoop.hbase.client.HTableInterface;
import org.apache.hadoop.hbase.client.HTablePool;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.log4j.Logger;


public class WordCountDAO {
   private final static int DAY = 60*24;
   private final static int INTERVAL = 5;
   public static int NUM_INTERVALS = DAY/INTERVAL;
   public static final byte[] TABLE_NAME = Bytes.toBytes("wordcount");
   public static final byte[] COLUMN_FAMILY = Bytes.toBytes("count");
  
   private static final Logger log = Logger.getLogger(WordCountDAO.class);

   private HTablePool pool;
  
   public WordCountDAO(HTablePool pool) {
     this.pool = pool;
   }
  
   public void CreateTable() throws IOException, ZooKeeperConnectionException{
     Configuration conf = HBaseConfiguration.create();
     
     HBaseAdmin hbase = new HBaseAdmin(conf);
     HTableDescriptor[] wordcounts = hbase.listTables("wordcount");
     
     if(wordcounts.length != 0){ //Drop Table if Exists
        hbase.disableTable(TABLE_NAME);
       hbase.deleteTable(TABLE_NAME);
     }
    
     HTableDescriptor wordcount = new HTableDescriptor(TABLE_NAME);
     hbase.createTable(wordcount);
     // Cannot edit a stucture on an active table.
     hbase.disableTable(TABLE_NAME);
     HColumnDescriptor columnFamily = new HColumnDescriptor(COLUMN_FAMILY);
     hbase.addColumn(TABLE_NAME, columnFamily);
     hbase.enableTable(TABLE_NAME);
    
     hbase.close();
   }
  
   private static Get mkGet(String word) throws IOException {
     log.debug(String.format("Creating Get for %s", word));

     Get g = new Get(Bytes.toBytes(word));
     g.addFamily(COLUMN_FAMILY);
     return g;
   }
  
   private static Put mkPut(WordCount w){
     log.debug(String.format("Creating Put for %s", w.word));

     Put p = new Put(w.word);
     // add integer compression here
     // convert 2-d byte array to 1-d byte array
     byte[] storage = new byte[NUM_INTERVALS*Integer.SIZE/Byte.SIZE];
     for(int i=0; i< NUM_INTERVALS; i++){
       for(int j=0; j<Integer.SIZE/Byte.SIZE; j++){
        storage[i*Integer.SIZE/Byte.SIZE+j] = w.count[i][j];
       }
     }
     p.add(COLUMN_FAMILY, w.column_id, storage);
    
     return p;
   }
  
   private static Delete mkDel(String word) {
      log.debug(String.format("Creating Delete for %s", word));

      Delete d = new Delete(Bytes.toBytes(word));
      return d;
    }

   private static Scan mkScan() {
     Scan s = new Scan();
     s.addFamily(COLUMN_FAMILY);
     return s;
   }
  
   public void addWordCount(WordCount w) throws IOException{
     HTableInterface words = pool.getTable(TABLE_NAME);
     Put p = mkPut(w);
     words.put(p);
     words.close();
   }
  
   public List<WordCount> getWordCount(String word) throws IOException {
     HTableInterface words = pool.getTable(TABLE_NAME);
     Get g = mkGet(word);
     Result result = words.get(g);
     if (result.isEmpty()) {
          log.info(String.format("word %s not found.", word));
          return null;
     }

     List<WordCount> wordCounts = WordCount.GetWordCountFromResults(result);
     words.close();
     return wordCounts;
   }
  
   public void deleteUser(String word) throws IOException {
     HTableInterface words = pool.getTable(TABLE_NAME);

       Delete d = mkDel(word);
       words.delete(d);

       words.close();
   }
  
   public static class WordCount{
     public byte[] word;
     public byte[] column_id;
     public byte[][] count;
    
     public WordCount(byte[] word, byte[] column_id){
       this.word = word;
       this.column_id = column_id;
       this.count = new byte[NUM_INTERVALS][];
       for(int i=0; i < NUM_INTERVALS; i++){
         this.count[i] = Bytes.toBytes(0);
       }
     }
    
     public WordCount(String word, String column_id){
       this.word = Bytes.toBytes(word);
       this.column_id = Bytes.toBytes(column_id);
       this.count = new byte[NUM_INTERVALS][];
       for(int i=0; i < NUM_INTERVALS; i++){
         this.count[i] = Bytes.toBytes(0);
       }
     }
    
     private WordCount(byte[] word, byte[] column_id, byte[][] count){
       this.word = word;
       this.column_id = column_id;
       this.count = count;
     }
    
     public static List<WordCount> GetWordCountFromResults(Result r){
       List<WordCount> wordCounts = new ArrayList<WordCount>();
       byte[] word = r.getRow();
       // Map from column qualifiers to values
       NavigableMap<byte[],byte[]> familyMap = r.getFamilyMap(COLUMN_FAMILY);
       for(byte[] column: familyMap.keySet()){
         byte[] value = familyMap.get(column);
         // decompression
         byte[][] count = new byte[NUM_INTERVALS][Integer.SIZE/Byte.SIZE];
         for(int i=0; i<NUM_INTERVALS; i++){
           for(int j=0; j<Integer.SIZE/Byte.SIZE; j++){
             count[i][j] = value[i*Integer.SIZE/Byte.SIZE+j];
           }
         }
         WordCount w = new WordCount(word, column, count);
         wordCounts.add(w);
       }
       return wordCounts;
     }
    
     public void setCount(int interval, int count){
       this.count[interval] = Bytes.toBytes(count);
     }
   }
}
TOP

Related Classes of cc.twittertools.hbase.WordCountDAO

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.