/**
* Copyright 2010 Nube Technologies
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed
* under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
* CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and limitations under the License.
*/
package co.nubetech.hiho.mapreduce;
import java.io.IOException;
import java.util.List;
import org.apache.avro.Schema;
import org.apache.avro.generic.GenericData;
import org.apache.avro.generic.GenericDatumWriter;
import org.apache.avro.generic.GenericRecord;
import org.apache.avro.mapred.AvroValue;
import org.apache.avro.mapred.Pair;
import org.apache.avro.util.Utf8;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reporter;
import org.apache.log4j.Logger;
import co.nubetech.hiho.avro.DBMapper;
import co.nubetech.hiho.mapreduce.lib.db.ColumnInfo;
import co.nubetech.hiho.mapreduce.lib.db.GenericDBWritable;
public class DBInputAvroMapper extends MapReduceBase implements
Mapper<LongWritable, GenericDBWritable, AvroValue<Pair>, NullWritable> {
final static Logger logger = Logger
.getLogger(co.nubetech.hiho.mapreduce.DBInputAvroMapper.class);
GenericDatumWriter<Pair> writer;
private NullWritable nullWritable = NullWritable.get();
Pair pair = null;
Schema pairSchema = null;
Schema keySchema = null;
Schema valueSchema = null;
@Override
public void map(LongWritable key, GenericDBWritable val,
OutputCollector<AvroValue<Pair>, NullWritable> output,
Reporter reporter) throws IOException {
logger.debug("Key, val are " + key + " val " + val.getColumns());
if (pairSchema == null) {
logger.debug("Creating schema for MR");
logger.debug("MR columns are " + val.getColumns());
for (ColumnInfo column: val.getColumns()) {
logger.debug("Column is " + column.getIndex() + " " + column.getName());
}
pairSchema = DBMapper.getPairSchema(val.getColumns());
keySchema = Pair.getKeySchema(pairSchema);
valueSchema = Pair.getValueSchema(pairSchema);
pair = new Pair<GenericRecord, GenericRecord>(pairSchema);
}
// writer = new GenericDatumWriter<Pair>(pairSchema);
GenericRecord keyRecord =this.getKeyRecord(keySchema, key);
logger.debug("Key record is " + keyRecord);
// now generate the avro record
GenericRecord valueRecord = this.getValueRecord(valueSchema, val);
logger.debug("Value Record is " + valueRecord);
/*
* ByteArrayOutputStream stream = new ByteArrayOutputStream();
* writer.write(record, new BinaryEncoder(stream)); stream.close();
*/
pair.key(keyRecord);
pair.value(valueRecord);
output.collect(new AvroValue<Pair>(pair), nullWritable );
}
public GenericRecord getKeyRecord(Schema keySchema, LongWritable key) {
GenericRecord keyRecord = new GenericData.Record(keySchema);
keyRecord.put(0, key.get());
return keyRecord;
}
public GenericRecord getValueRecord(Schema valueSchema,
GenericDBWritable val) {
GenericRecord valueRecord = new GenericData.Record(valueSchema);
List<Schema.Field> fieldSchemas = valueSchema.getFields();
for (int i = 0; i < val.getValues().size(); ++i) {
Schema.Type type = fieldSchemas.get(i).schema().getType();
if (type.equals(Schema.Type.STRING)) {
Utf8 utf8 = new Utf8((String) val.getValues().get(i).toString());
valueRecord.put(i, utf8);
} else {
valueRecord.put(i, val.getValues().get(i));
}
}
return valueRecord;
}
}