Package org.solbase

Source Code of org.solbase.SolbaseUtil

package org.solbase;

import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import java.net.MalformedURLException;
import java.nio.ByteBuffer;
import java.util.Arrays;
import java.util.ResourceBundle;

import org.apache.commons.lang.ArrayUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.MasterNotRunningException;
import org.apache.hadoop.hbase.ZooKeeperConnectionException;
import org.apache.hadoop.hbase.client.Get;
import org.apache.hadoop.hbase.client.HBaseAdmin;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.client.HTableInterface;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.SolbaseHTablePool;
import org.apache.hadoop.hbase.io.hfile.Compression.Algorithm;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.lucene.index.Term;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.impl.CommonsHttpSolrServer;
import org.apache.solr.common.SolrInputDocument;

/**
* @author koh
*
* needs to create these tables first
*
* create 'SI', 'info', {NAME=>'info',REPLICATION_SCOPE=>1,VERSION=>1}
* create 'Docs', 'field', 'allTerms', 'timestamp', {COMPRESSION=>'SNAPPY',NAME=>'field',VERSION=>1,REPLICATION_SCOPE=>1},{COMPRESSION=>'SNAPPY',NAME=>'allTerms',VERSION=>1,REPLICATION_SCOPE=>1},{COMPRESSION=>'SNAPPY',NAME=>'timestamp',VERSION=>1, REPLICATION_SCOPE=>1}
* d => document, t=>term, f=>field
* create 'TV', 'd', {COMPRESSION=>'SNAPPY',NAME=>'d',VERSION=>1, REPLICATION_SCOPE=>1}
* create 'DocKeyIdMap', 'docId',{COMPRESSION=>'SNAPPY',NAME=>'docId',VERSION=>1,REPLICATION_SCOPE=>1}
* create 'Sequence', 'id',{COMPRESSION=>'SNAPPY',NAME=>'id',VERSION=>1,REPLICATION_SCOPE=>1}
* create 'TVVersionId', 'timestamp', {COMPRESSION=>'SNAPPY',NAME=>'timestamp',VERSION=>1,REPLICATION_SCOPE=>1}
* create 'uniq_checksum_user_media', 'userMediaKey', {COMPRESSION=>'SNAPPY',NAME=>'userMediaKey',VERSION=>1,REPLICATION_SCOPE=>1}
*
* loading solr schema file to solbase
* curl http://localhost:8080/solbase/schema/pbimages --data-binary @image_schema.xml -H 'Content-type:text/xml; charset=utf-8'
*/
public final class SolbaseUtil {

  public static final byte[] delimiter = {Byte.MAX_VALUE, Byte.MAX_VALUE, Byte.MAX_VALUE, Byte.MAX_VALUE };
  //public static final byte[] delimiter = {-17, -65, -65};
 
  // used to save bytes on current time minutes (2005/1/1)
  public static final long SolbaseEpochTime = 18408960l;
 
  public static final byte[] floorBytes = {0, 0, 0, 0 };

  public static final byte[] termVectorTable;
  public static final byte[] docTable;
  public static final byte[] schemaInfoTable;
  public static final byte[] docKeyIdMapTable;
  public static final byte[] sequenceTable;
  public static final byte[] termVectorVersionIDTable;
  public static final byte[] uniqChecksumUserMediaTable;
  public static final byte[] userMediaTable;
 
  static {
    String dbPostfix = System.getProperty("solbase.db.postfix");
    if(dbPostfix == null && ResourceBundle.getBundle("solbase") != null){
      dbPostfix = ResourceBundle.getBundle("solbase").getString("db.postfix");
    }

    dbPostfix = (dbPostfix == null || dbPostfix.isEmpty()) ? "" : "_" + dbPostfix;
    termVectorTable = Bytes.toBytes("TV" + dbPostfix);
    docTable = Bytes.toBytes("Docs" + dbPostfix);
    schemaInfoTable = Bytes.toBytes("SI");
    docKeyIdMapTable = Bytes.toBytes("DocKeyIdMap" + dbPostfix);
    sequenceTable = Bytes.toBytes("Sequence" + dbPostfix);
    termVectorVersionIDTable = Bytes.toBytes("TVVersionId" + dbPostfix);
    uniqChecksumUserMediaTable = Bytes.toBytes("uniq_checksum_user_media" + dbPostfix);
    userMediaTable = Bytes.toBytes("user_media");
  }

  public static final int UNIQ_ID_CHUNK = 10000;
 
  public static final byte[] timestampColumnFamilyName = Bytes.toBytes("timestamp");
  
    public static final byte[] termVectorDocColumnFamilyName = Bytes.toBytes("d");
 
  public static final byte[] docIdColumnFamilyName = Bytes.toBytes("docId");
 
  public static final byte[] idColumnFamilyName = Bytes.toBytes("id");
 
  public static final byte[] allTermsColumnFamilyName = Bytes.toBytes("allTerms");
 
  public static final byte[] fieldColumnFamilyName = Bytes.toBytes("field");
 
  public static final byte[] userMediaKeyColumnFamilyName = Bytes.toBytes("userMediaKey");
 
  public static final byte[] tombstonedColumnFamilyQualifierBytes = Bytes.toBytes("tombstoned");
 
  public static final byte[] emptyColumnFamilyQualifierBytes = Bytes.toBytes("");
 
  public static final byte[] schemaInfoColumnFamilyName = Bytes.toBytes("info")
 
  public static int   cacheInvalidationInterval = 1000;//ms
 
  private static int SOLBASE_HTABLE_POOL = 100; // per table. 3 (docs, tv, tvversionid) * 10 (num of region servers) * 100 = 3000 threads ~ 3.0G at most

  private static SolbaseHTablePool hTablePool;

  private static Configuration conf;
 
  static {
    conf = HBaseConfiguration.create();
    hTablePool = new SolbaseHTablePool(conf, SOLBASE_HTABLE_POOL);
  }

  public static HTableInterface getTable(byte[] tableName) {
    return hTablePool.getTable(tableName);
  }
 
  public static HTable getLocalTable(byte[] tableName){
    HTable table = (HTable)hTablePool.getTable(tableName);
    /*
    try {
      // setting buffer size to 12MB
      table.setWriteBufferSize(1024*1024*12);
    } catch (IOException e) {
      e.printStackTrace();
    }
    */
    table.setAutoFlush(false);
    return table;
  }
 
  public static HTableInterface getTermVectorTable() {
    return getTable(termVectorTable);
  }
 
  public static HTable getLocalTermVectorTable(){
    return getLocalTable(termVectorTable);
  }
 
  public static String getTermVectorTableName(){
    return Bytes.toString(termVectorTable);
  }
 
  // TODO: uniqChecksumUserMediaTable is PB specific table
  public static HTableInterface getUniqChecksumUserMediaTable() {
    return getTable(uniqChecksumUserMediaTable);
  }
 
  public static HTable getLocalUniqChecksumUserMediaTable(){
    return getLocalTable(uniqChecksumUserMediaTable);
  }
 
  public static String getUniqChecksumUserMediaTableName(){
    return Bytes.toString(uniqChecksumUserMediaTable);
  }
 
  public static HTableInterface getTermVectorVersionIDTable() {
    return getTable(termVectorVersionIDTable);
  }
 
  public static HTable getLocalTermVectorVersionIDTable(){
    return getLocalTable(termVectorVersionIDTable);
  }
 
  public static String getTermVectorVersionIDTableName(){
    return Bytes.toString(termVectorVersionIDTable);
  }
 
  public static HTableInterface getDocTable() {
    return getTable(docTable);
  }
 
  public static HTable getLocalDocTable() {
    return getLocalTable(docTable);
  }
 
  public static String getDocTableName(){
    return Bytes.toString(SolbaseUtil.docTable);
  }

    public static HTableInterface getSchemaInfoTable() {
        return getTable(schemaInfoTable);
    }

    public static HTableInterface getDocKeyIdMapTable() {
        return getTable(docKeyIdMapTable);
    }

    public static HTable getLocalDocKeyIdMapTable() {
      return getLocalTable(docKeyIdMapTable);
    }
   
    public static String getDocKeyIdMapTableName(){
      return Bytes.toString(SolbaseUtil.docKeyIdMapTable);
    }
   
    public static HTableInterface getUserMediaTable() {
        return getTable(userMediaTable);
    }

    public static HTable getLocalUserMediaTable() {
      return getLocalTable(userMediaTable);
    }
   
    public static HTableInterface getSequenceTable() {
        return getTable(sequenceTable);
    }
   
    public static String getSequenceTableName(){
      return Bytes.toString(SolbaseUtil.sequenceTable);
    }
   
    public static void releaseTable(HTableInterface table) {
    hTablePool.putTable(table);
  }

    public static byte[] generateTermKey(Term term) {
    byte[] fieldBytes = Bytes.toBytes(term.field());
    byte[] termBytes = Bytes.toBytes(term.text());
    byte[] fieldTermKeyBytes = Bytes.add(fieldBytes, SolbaseUtil.delimiter,
        termBytes);
    return fieldTermKeyBytes;
  }

  public static byte[] generateTermBeginKey(Term term) {
    return Bytes.add(SolbaseUtil.generateTermKey(term),
        SolbaseUtil.delimiter, SolbaseUtil.floorBytes);
  }

  public static byte[] generateTermEndKey(Term term) {
    return Bytes.add(SolbaseUtil.generateTermKey(term),
        SolbaseUtil.delimiter, SolbaseUtil.delimiter);
  }
 
  public static byte[] generateTermKey(Term term, int startDocId) {
    return Bytes.add(SolbaseUtil.generateTermKey(term),
        SolbaseUtil.delimiter, Bytes.toBytes(startDocId));
  }
 
  public static byte[] getDocumentId(byte[] termDocKey) {
    int maxByteCount = 0;
    int delimiterCount = 0;
   
    for (int i = 0; i < termDocKey.length; i++) {
      if (termDocKey[i] == Byte.MAX_VALUE) {
        maxByteCount++;
      }
     
      if (maxByteCount == 4) {
        delimiterCount++;
        maxByteCount = 0;
      }
     
      if (delimiterCount == 2) {
        return Arrays.copyOfRange(termDocKey, i+1, termDocKey.length);
      }
    }
   
    return null;
  }
 
  public static Integer getDocumentId(ByteBuffer termDocKey) {
    int maxByteCount = 0;
    int delimiterCount = 0;
   
    while (termDocKey.remaining() > 0) {
      byte currentValue = termDocKey.get();
      if (currentValue == Byte.MAX_VALUE) {
        maxByteCount++;
      }
     
      if (maxByteCount == 4) {
        delimiterCount++;
        maxByteCount = 0;
      }
     
      if (delimiterCount == 2) {
        return termDocKey.getInt();
      }
    }
   
    return null;
  }
 
  public static int findDocIdIndex(byte[] termDocKey) {
    int maxByteCount = 0;
    int delimiterCount = 0;
   
    for(int i = 0; i < termDocKey.length; i++){
      byte currentValue = termDocKey[i];
      if (currentValue == Byte.MAX_VALUE) {
        maxByteCount++;
      }
     
      if (maxByteCount == 4) {
        delimiterCount++;
        maxByteCount = 0;
      }
     
      if (delimiterCount == 2) {
        return i + 1;
      }
    }
   
    return -1;
  }
 
    public static int mreadVInt(ByteBuffer buf)
    {      
        int length = buf.remaining();
       
        if(length == 0)
            return 0;
       
        byte b = buf.get();
        int i = b & 0x7F;
        for (int pos = 1, shift = 7; (b & 0x80) != 0 && pos < length; shift += 7, pos++)
        {
            b = buf.get();
            i |= (b & 0x7F) << shift;
        }

        return i;
    }
   
    public static int mreadVInt(InputStream buf) throws IOException
    {              
        byte b = (byte)buf.read();
        int i = b & 0x7F;
       
    for (int shift = 7; (b & 0x80) != 0; shift += 7) {
      b = (byte) buf.read();
      i |= (b & 0x7F) << shift;
    }

        return i;
    }
   
   
    public static byte[] writeVInt(int i)
    {
        int length = 0;
        int p = i;

        while ((p & ~0x7F) != 0)
        {
            p >>>= 7;
            length++;
        }
        length++;

        byte[] buf = new byte[length];
        int pos = 0;
        while ((i & ~0x7F) != 0)
        {
            buf[pos] = ((byte) ((i & 0x7f) | 0x80));
            i >>>= 7;
            pos++;
        }
        buf[pos] = (byte) i;

        return buf;
    }
   
    public static Object fromBytes(ByteBuffer data) throws IOException, ClassNotFoundException
    {

        ObjectInputStream ois = new ObjectInputStream(new ByteArrayInputStream(data.array(), data.position()+data.arrayOffset(), data
                .remaining()));
        Object o = ois.readObject();
        ois.close();
        return o;
    }
   
    public static ByteBuffer toBytes(Object o) throws IOException
    {
        ByteArrayOutputStream baos = new ByteArrayOutputStream();
        ObjectOutputStream oos = new ObjectOutputStream(baos);
        oos.writeObject(o);
        oos.close();
        return ByteBuffer.wrap(baos.toByteArray());
    }

    // sequence doc id mapping to actual silo.picture_id
    public static Integer getDocId(String key) throws IOException {
        HTableInterface docIdKeyMap = SolbaseUtil.getDocKeyIdMapTable();

        try {
            Get get = new Get(Bytes.toBytes(key));
            Result result = docIdKeyMap.get(get);

            if(result.isEmpty()){
                return null;
            }
           
            byte[] docId = result.getValue(Bytes.toBytes("docId"),Bytes.toBytes(""));
           
            int doc = Bytes.toInt(docId);

            return doc;
        } finally {
            SolbaseUtil.releaseTable(docIdKeyMap);
        }
    }

    // sequence generator for generating doc id
    public static int generateDocId(String key) throws IOException {

        HTableInterface sequence = SolbaseUtil.getSequenceTable();
        HTableInterface docIdKeyMap = SolbaseUtil.getDocKeyIdMapTable();

        try {
            int docId =  new Long(sequence.incrementColumnValue(Bytes.toBytes("sequence"), Bytes.toBytes("id"), Bytes.toBytes(""), 1, true)).intValue();

           
            Put mapping = new Put(Bytes.toBytes(key));
            mapping.add(Bytes.toBytes("docId"), Bytes.toBytes(""), Bytes.toBytes(docId));
            docIdKeyMap.put(mapping);

            return docId;
        } finally {
            SolbaseUtil.releaseTable(sequence);
            SolbaseUtil.releaseTable(docIdKeyMap);
        }
    }
   
    // return uniq id from sequence table
    // mainly used for chunking with pristine indexing
    public static int generateUniqId() throws IOException {
      HTableInterface sequence = SolbaseUtil.getSequenceTable();
     
      try {
        int docId = new Long(sequence.incrementColumnValue(Bytes.toBytes("sequence"), Bytes.toBytes("id"), Bytes.toBytes(""), SolbaseUtil.UNIQ_ID_CHUNK, true)).intValue();
     
        return docId;
      } finally {
        SolbaseUtil.releaseTable(sequence);
      }
    }
   
    public static int getSequenceId(){
      HTableInterface sequence = SolbaseUtil.getSequenceTable();
      Get get = new Get(Bytes.toBytes("sequence"));
      try {
      Result result = sequence.get(get);
     
      if(result == null || result.isEmpty()){
        int docId = new Long(sequence.incrementColumnValue(Bytes.toBytes("sequence"), Bytes.toBytes("id"), Bytes.toBytes(""), 1, true)).intValue();
        return docId;
      } else {
        byte[] val = result.getValue(Bytes.toBytes("id"), Bytes.toBytes(""));
        return new Long(Bytes.toLong(val)).intValue();
      }
    } catch (IOException e) {
      // TODO Auto-generated catch block
      e.printStackTrace();
    }
    return -1;
    }
   
    public static int getCurrentMaxId(){
      return getSequenceId();
    }
   
    public static int getEpochSinceSolbase(long currentMinutes){
      return (int)(currentMinutes - SolbaseEpochTime);
    }
   
    public static long getCurrentTimeFromEpochSinceSolbase(String currentMinutes){
      int currentMin = 0;
      try {
        currentMin = Integer.parseInt(currentMinutes);
        return (SolbaseEpochTime + currentMin) * 60;
      } catch (NumberFormatException e) {
        // ignore
      }
      return currentMin;
    }
   
  public static void createTable(HTableDescriptor desc, byte[] startKey, byte[] endKey, Integer numberOfRegions) throws IOException{
    HBaseAdmin admin = new HBaseAdmin(SolbaseUtil.conf);

    if(startKey != null && endKey != null && numberOfRegions != null){
      admin.createTable(desc, startKey, endKey, numberOfRegions);
    } else {
      admin.createTable(desc);
    }
  }
 
  public static void setupHColumnDescriptor(HColumnDescriptor column){
    column.setCompressionType(Algorithm.SNAPPY);
    column.setScope(1);
    column.setMaxVersions(1);
  }
 
  public static void createSITable() throws IOException {
    HTableDescriptor desc = new HTableDescriptor(SolbaseUtil.schemaInfoTable);
    HColumnDescriptor column = new HColumnDescriptor(SolbaseUtil.schemaInfoColumnFamilyName);
    SolbaseUtil.setupHColumnDescriptor(column);
    desc.addFamily(column);
   
    HBaseAdmin admin;
    admin = new HBaseAdmin(SolbaseUtil.conf);
    admin.createTable(desc);
  }
 
  public static void createTermVectorTable(byte[][] splits) throws IOException{
    HTableDescriptor desc = new HTableDescriptor(SolbaseUtil.getTermVectorTableName());
    HColumnDescriptor column = new HColumnDescriptor(SolbaseUtil.termVectorDocColumnFamilyName);
    SolbaseUtil.setupHColumnDescriptor(column);
    desc.addFamily(column);
   
    HBaseAdmin admin;
    admin = new HBaseAdmin(SolbaseUtil.conf);
    admin.createTable(desc, splits);
  }
 
  public static void createTermVectorTable(byte[] startTerm, byte[] endTerm, Integer numberOfRegions) throws IOException {
    HTableDescriptor desc = new HTableDescriptor(SolbaseUtil.getTermVectorTableName());
    HColumnDescriptor column = new HColumnDescriptor(SolbaseUtil.termVectorDocColumnFamilyName);
    SolbaseUtil.setupHColumnDescriptor(column);
    desc.addFamily(column);
    SolbaseUtil.createTable(desc, startTerm, endTerm, numberOfRegions);
  }
   
  public static void createTermVectorVersionIDTable() throws IOException {
    HTableDescriptor desc = new HTableDescriptor(SolbaseUtil.getTermVectorVersionIDTableName());
    HColumnDescriptor column = new HColumnDescriptor(SolbaseUtil.timestampColumnFamilyName);
    SolbaseUtil.setupHColumnDescriptor(column);
    desc.addFamily(column);
    SolbaseUtil.createTable(desc,null,null,null);
  }

  public static void createDocKeyIdMapTable(byte [] start, byte[] end, Integer numberOfRegions) throws IOException {
    HTableDescriptor desc = new HTableDescriptor(SolbaseUtil.getDocKeyIdMapTableName());
    HColumnDescriptor column = new HColumnDescriptor(SolbaseUtil.docIdColumnFamilyName);
    SolbaseUtil.setupHColumnDescriptor(column);
    desc.addFamily(column);
    SolbaseUtil.createTable(desc, start, end, numberOfRegions);   
  }

  public static void createDocTable(byte[] start, byte[] end, Integer numberOfRegions) throws IOException {
    HTableDescriptor desc = new HTableDescriptor(SolbaseUtil.getDocTableName());
    HColumnDescriptor fieldColumn = new HColumnDescriptor(SolbaseUtil.fieldColumnFamilyName);
    SolbaseUtil.setupHColumnDescriptor(fieldColumn);
    desc.addFamily(fieldColumn);
   
    HColumnDescriptor allTermsColumn = new HColumnDescriptor(SolbaseUtil.allTermsColumnFamilyName);
    SolbaseUtil.setupHColumnDescriptor(allTermsColumn);
    desc.addFamily(allTermsColumn);
   
    HColumnDescriptor timestampColumn = new HColumnDescriptor(SolbaseUtil.timestampColumnFamilyName);
    SolbaseUtil.setupHColumnDescriptor(timestampColumn);
    desc.addFamily(timestampColumn);
   
    SolbaseUtil.createTable(desc, start, end, numberOfRegions)
  }
 
  public static void createDocTable(byte[][] splits) throws IOException{
    HTableDescriptor desc = new HTableDescriptor(SolbaseUtil.getDocTableName());
    HColumnDescriptor fieldColumn = new HColumnDescriptor(SolbaseUtil.fieldColumnFamilyName);
    SolbaseUtil.setupHColumnDescriptor(fieldColumn);
    desc.addFamily(fieldColumn);
   
    HColumnDescriptor allTermsColumn = new HColumnDescriptor(SolbaseUtil.allTermsColumnFamilyName);
    SolbaseUtil.setupHColumnDescriptor(allTermsColumn);
    desc.addFamily(allTermsColumn);
   
    HColumnDescriptor timestampColumn = new HColumnDescriptor(SolbaseUtil.timestampColumnFamilyName);
    SolbaseUtil.setupHColumnDescriptor(timestampColumn);
    desc.addFamily(timestampColumn);
   
    HBaseAdmin admin;
    admin = new HBaseAdmin(SolbaseUtil.conf);
    admin.createTable(desc, splits);
  }

  public static void createSequenceTable() throws IOException {
    HTableDescriptor desc = new HTableDescriptor(SolbaseUtil.getSequenceTableName());
    HColumnDescriptor column = new HColumnDescriptor(SolbaseUtil.idColumnFamilyName);
    SolbaseUtil.setupHColumnDescriptor(column);
    desc.addFamily(column);
    SolbaseUtil.createTable(desc, null, null, null);
  }

  public static void createUniqChecksumUserMediaTable(byte[] start, byte[] end, Integer numberOfRegions) throws IOException {
    HTableDescriptor desc = new HTableDescriptor(SolbaseUtil.getUniqChecksumUserMediaTableName());
    HColumnDescriptor column = new HColumnDescriptor(SolbaseUtil.userMediaKeyColumnFamilyName);
    SolbaseUtil.setupHColumnDescriptor(column);
    desc.addFamily(column);
    SolbaseUtil.createTable(desc, start, end, numberOfRegions);
  }
 
  public static void truncateTable(HBaseAdmin admin, byte[] tableName){
    try {
      HTableDescriptor desc = admin.getTableDescriptor(tableName);
      admin.disableTable(tableName);
      admin.deleteTable(tableName);
      admin.createTable(desc);
    } catch (IOException e) {
      throw new RuntimeException(e);
    }
  }
  public static byte[] randomize(Integer docId){
    byte[] bytes = Bytes.toBytes(docId);
    ArrayUtils.reverse(bytes);
    return bytes;
  }
 
  public static byte[] randomize(byte[] docId){
    ArrayUtils.reverse(docId);
    return docId;
  }
}
TOP

Related Classes of org.solbase.SolbaseUtil

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.