Package cc.twittertools.hbase

Source Code of cc.twittertools.hbase.LoadWordCount

package cc.twittertools.hbase;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;

import org.apache.hadoop.hbase.client.HTablePool;
import org.apache.hadoop.hbase.util.Bytes;

import com.google.common.collect.HashBasedTable;
import com.google.common.collect.Table;

public class LoadWordCount {

  public static void main(String[] args) throws IOException {
    // TODO Auto-generated method stub
    if(args.length!=1){
      System.out.println("invalid argument");
    }
    Table<String, String, WordCountDAO.WordCount> wordCountMap = HashBasedTable.create();
    File folder = new File(args[0]);
    if(folder.isDirectory()){
      for (File file : folder.listFiles()) {
        if(!file.getName().startsWith("part"))
          continue;
        System.out.println("Processing "+args[0]+file.getName());
        BufferedReader bf = new BufferedReader(new FileReader(args[0]+file.getName()));
        // each line in wordcount file is like : 1 twitter 100
        String line;
        while((line=bf.readLine())!=null){
          String[] groups = line.split("\\t");
          if(groups.length != 4)
            continue;
          String day = groups[0]; // each day is viewed as a column in underlying HBase
          String interval = groups[1];
          String word = groups[2];
          String count = groups[3];
          if(!wordCountMap.contains(word, day)){
            WordCountDAO.WordCount w = new WordCountDAO.WordCount(word, day);
            wordCountMap.put(word, day, w);
          }
          WordCountDAO.WordCount w = wordCountMap.get(word, day);
          w.setCount(Integer.valueOf(interval), Integer.valueOf(count));
          wordCountMap.put(word, day, w);
         
        }
      }
    }
   
    System.out.println("Total "+wordCountMap.size()+" words");
    HTablePool pool = new HTablePool();
    WordCountDAO DAO = new WordCountDAO(pool);
    DAO.CreateTable();
    int count = 0;
    for(WordCountDAO.WordCount w: wordCountMap.values()){
      DAO.addWordCount(w);
      if(++count % 50000==0){
        System.out.println("Loading "+count+" words");
      }
    }
    pool.closeTablePool(DAO.TABLE_NAME);
  }

}
TOP

Related Classes of cc.twittertools.hbase.LoadWordCount

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.