Package eu.stratosphere.addons.hbase.example

Source Code of eu.stratosphere.addons.hbase.example.HBaseReadExample

/***********************************************************************************************************************
*
* Copyright (C) 2010 by the Stratosphere project (http://stratosphere.eu)
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
* an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
* specific language governing permissions and limitations under the License.
*
**********************************************************************************************************************/

package eu.stratosphere.addons.hbase.example;

import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.Scan;

import eu.stratosphere.addons.hbase.TableInputFormat;
import eu.stratosphere.addons.hbase.common.HBaseKey;
import eu.stratosphere.addons.hbase.common.HBaseResult;
import eu.stratosphere.api.common.Plan;
import eu.stratosphere.api.common.Program;
import eu.stratosphere.api.common.ProgramDescription;
import eu.stratosphere.api.java.record.operators.FileDataSink;
import eu.stratosphere.api.java.record.operators.GenericDataSource;
import eu.stratosphere.api.java.record.io.CsvOutputFormat;
import eu.stratosphere.configuration.Configuration;
import eu.stratosphere.types.Record;
import eu.stratosphere.types.StringValue;

/**
* Implements a word count which takes the input file and counts the number of
* the occurrences of each word in the file.
*/
public class HBaseReadExample implements Program, ProgramDescription {
 
  public static class MyTableInputFormat extends  TableInputFormat {
   
    private static final long serialVersionUID = 1L;

    private final byte[] META_FAMILY = "meta".getBytes();
   
    private final byte[] USER_COLUMN = "user".getBytes();
   
    private final byte[] TIMESTAMP_COLUMN = "timestamp".getBytes();
   
    private final byte[] TEXT_FAMILY = "text".getBytes();
   
    private final byte[] TWEET_COLUMN = "tweet".getBytes();
   
    public MyTableInputFormat() {
      super();
     
    }
   
    @Override
    protected HTable createTable(Configuration parameters) {
      return super.createTable(parameters);
    }
   
    @Override
    protected Scan createScanner(Configuration parameters) {
      Scan scan = new Scan ();
      scan.addColumn (META_FAMILY, USER_COLUMN);
      scan.addColumn (META_FAMILY, TIMESTAMP_COLUMN);
      scan.addColumn (TEXT_FAMILY, TWEET_COLUMN);
      return scan;
    }
   
    StringValue row_string = new StringValue();
    StringValue user_string = new StringValue();
    StringValue timestamp_string = new StringValue();
    StringValue tweet_string = new StringValue();
   
    @Override
    public void mapResultToRecord(Record record, HBaseKey key,
        HBaseResult result) {
      Result res = result.getResult();
      res.getRow();
      record.setField(0, toString(row_string, res.getRow()));
      record.setField(1, toString (user_string, res.getValue(META_FAMILY, USER_COLUMN)));
      record.setField(2, toString (timestamp_string, res.getValue(META_FAMILY, TIMESTAMP_COLUMN)));
      record.setField(3, toString (tweet_string, res.getValue(TEXT_FAMILY, TWEET_COLUMN)));
    }
   
    private final StringValue toString (StringValue string, byte[] bytes) {
      string.setValueAscii(bytes, 0, bytes.length);
      return string;
    }
   
  }
 

  @Override
  public Plan getPlan(String... args) {
    // parse job parameters
    int numSubTasks   = (args.length > 0 ? Integer.parseInt(args[0]) : 1);
    String output    = (args.length > 1 ? args[1] : "");

    GenericDataSource<TableInputFormat> source = new GenericDataSource<TableInputFormat>(new MyTableInputFormat(), "HBase Input");
    source.setParameter(TableInputFormat.INPUT_TABLE, "twitter");
    source.setParameter(TableInputFormat.CONFIG_LOCATION, "/etc/hbase/conf/hbase-site.xml");
    FileDataSink out = new FileDataSink(new CsvOutputFormat(), output, source, "HBase String dump");
    CsvOutputFormat.configureRecordFormat(out)
      .recordDelimiter('\n')
      .fieldDelimiter(' ')
      .field(StringValue.class, 0)
      .field(StringValue.class, 1)
      .field(StringValue.class, 2)
      .field(StringValue.class, 3);
   
    Plan plan = new Plan(out, "HBase access Example");
    plan.setDefaultParallelism(numSubTasks);
    return plan;
  }


  @Override
  public String getDescription() {
    return "Parameters: [numSubStasks] [input] [output]";
  }
}
TOP

Related Classes of eu.stratosphere.addons.hbase.example.HBaseReadExample

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.