/**
* Copyright [2012] [Datasalt Systems S.L.]
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.datasalt.pangool.tuplemr.serialization;
import java.io.DataOutput;
import java.io.DataOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.nio.ByteBuffer;
import java.util.List;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.DataOutputBuffer;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.WritableUtils;
import org.apache.hadoop.io.serializer.Serializer;
import com.datasalt.pangool.io.BitField;
import com.datasalt.pangool.io.ITuple;
import com.datasalt.pangool.io.Schema;
import com.datasalt.pangool.io.Schema.Field;
import com.datasalt.pangool.io.Schema.Field.Type;
import com.datasalt.pangool.io.Utf8;
import com.datasalt.pangool.serialization.HadoopSerialization;
import com.datasalt.pangool.tuplemr.SerializationInfo;
/**
* This Serializer holds all the baseline code for serializing Tuples. It is used by the more complex {@link TupleSerializer}.
* It is also used by a stateful Tuple field serializer {@link TupleFieldSerialization}.
*/
@SuppressWarnings({"rawtypes", "unchecked"})
public class SimpleTupleSerializer implements Serializer<ITuple> {
private DataOutputStream out;
private final HadoopSerialization ser;
private final Utf8 HELPER_TEXT = new Utf8();
private final BitField nulls = new BitField();
private final DataOutputBuffer tmpOutputBuffer = new DataOutputBuffer();
private Serializer[] customSerializers;
// Optional in some cases
private Schema schema = null;
// A SimpleTupleSerializer that doesn't serialize a specified Schema
public SimpleTupleSerializer(HadoopSerialization ser) {
this.ser = ser;
}
// A SimpelTupleSerializer that serializes a certain Schema. Needed
// when using custom stateful serialization.
public SimpleTupleSerializer(Schema schemaToSerialize, HadoopSerialization ser, Configuration conf) {
this(ser);
this.schema = schemaToSerialize;
this.customSerializers = SerializationInfo.getSerializers(schemaToSerialize, conf);
}
@Override
public void close() throws IOException {
out.close();
}
@Override
public void open(OutputStream outputStream) {
if (outputStream instanceof DataOutputStream) {
out = (DataOutputStream) outputStream;
} else {
out = new DataOutputStream(outputStream);
}
}
@Override
public void serialize(ITuple tuple) throws IOException {
Schema schema = (this.schema != null) ? this.schema : tuple.getSchema();
write(schema, tuple, null, customSerializers);
}
public DataOutputStream getOut() {
return out;
}
/**
* @return A value in the tuple represented by the idx. If a translationTable is given,
* then idx is translated before being applied to obtain the value from the tuple.
*/
protected Object valueAt(int idx, ITuple tuple, int[] translationTable) {
if (translationTable == null) {
return tuple.get(idx);
} else {
return tuple.get(translationTable[idx]);
}
}
void write(Schema destinationSchema, ITuple tuple, int[] translationTable, Serializer[] customSerializers)
throws IOException {
// If can be null values, we compose a bit set with the null information and write it the first.
if (destinationSchema.containsNullableFields()) {
List<Integer> nullableFields = destinationSchema.getNullableFieldsIdx();
nulls.clear();
for (int i = 0; i < nullableFields.size(); i++) {
int nField = nullableFields.get(i);
if (valueAt(nField, tuple, translationTable) == null) {
nulls.set(i);
}
}
nulls.ser(out);
}
for (int i = 0; i < destinationSchema.getFields().size(); i++) {
Field field = destinationSchema.getField(i);
Type fieldType = field.getType();
Object element = valueAt(i, tuple, translationTable);
if (element == null) {
if (field.isNullable()) {
// Nullable null fields don't need serialization.
continue;
} else {
raiseUnexpectedNullException(field, element);
}
}
try {
switch (fieldType) {
case INT:
WritableUtils.writeVInt(out, (Integer) element);
break;
case LONG:
WritableUtils.writeVLong(out, (Long) element);
break;
case DOUBLE:
out.writeDouble((Double) element);
break;
case FLOAT:
out.writeFloat((Float) element);
break;
case STRING:
if (element instanceof Text) {
((Text) element).write(out);
} else if (element instanceof String) {
HELPER_TEXT.set((String) element);
HELPER_TEXT.write(out);
} else {
raisedClassCastException(null, field, element);
}
break;
case BOOLEAN:
out.write((Boolean) element ? 1 : 0);
break;
case ENUM:
writeEnum((Enum<?>) element, field, out);
break;
case OBJECT:
writeCustomObject(element, out, customSerializers[i]);
break;
case BYTES:
writeBytes(element, out);
break;
default:
throw new IOException("Not supported type:" + fieldType);
}
} catch (ClassCastException e) {
raisedClassCastException(e, field, element);
} catch (CustomObjectSerializationException e) {
raisedCustomObjectException(e, field, element, customSerializers[i]);
}
} // End for
}
private void writeCustomObject(Object element, DataOutput output, Serializer customSer) throws CustomObjectSerializationException {
try {
tmpOutputBuffer.reset();
if (customSer != null) {
customSer.open(tmpOutputBuffer);
customSer.serialize(element);
customSer.close();
} else {
// If no custom serializer defined then use Hadoop Serialization by default
ser.ser(element, tmpOutputBuffer);
}
WritableUtils.writeVInt(output, tmpOutputBuffer.getLength());
output.write(tmpOutputBuffer.getData(), 0, tmpOutputBuffer.getLength());
} catch (Throwable e) {
throw new CustomObjectSerializationException(e);
}
}
private void writeBytes(Object bytes, DataOutput output) throws IOException {
if (bytes instanceof byte[]) {
WritableUtils.writeVInt(output, ((byte[]) bytes).length);
output.write((byte[]) bytes);
} else if (bytes instanceof ByteBuffer) {
ByteBuffer buffer = (ByteBuffer) bytes;
int pos = buffer.position();
int start = buffer.arrayOffset() + pos;
int len = buffer.limit() - pos;
WritableUtils.writeVInt(output, len);
output.write(buffer.array(), start, len);
} else {
throw new IOException("Not allowed " + bytes.getClass() + " for type " + Type.BYTES);
}
}
private void writeEnum(Enum<?> element, Field field, DataOutput output) throws IOException {
Enum<?> e = (Enum<?>) element;
Class<?> expectedType = field.getObjectClass();
if (e.getClass() != expectedType) {
throw new IOException("Field '" + field.getName() + "' contains '" + element + "' which is "
+ element.getClass().getName() + ".The expected type is " + expectedType.getName());
}
WritableUtils.writeVInt(output, e.ordinal());
}
private void raisedClassCastException(ClassCastException cause, Field field, Object element) throws IOException {
throw new IOException("Field '" + field.getName() + "' with type: '" + field.getType() + "' can't contain '"
+ element + "' which is " + element.getClass().getName(), cause);
}
private void raiseUnexpectedNullException(Field field, Object element) throws IOException {
throw new IOException("Field '" + field.getName() + "' with type " + field.getType() +
" can't contain null value");
}
private void raisedCustomObjectException(CustomObjectSerializationException cause, Field field, Object element, Serializer serializer) throws IOException {
throw new IOException("Custom object field '" + field.getName() + " with value " + element +
" of type " + ((element != null) ? element.getClass().getCanonicalName() : "null") +
" using serializer " + serializer + " thrown an exception.",
cause);
}
/**
* Thrown when an unexpected exception happens when serializing a custom object.
*/
@SuppressWarnings("serial")
public static class CustomObjectSerializationException extends Exception {
public CustomObjectSerializationException() {
}
public CustomObjectSerializationException(String message) {
super(message);
}
public CustomObjectSerializationException(String message, Throwable cause) {
super(message, cause);
}
public CustomObjectSerializationException(Throwable cause) {
super(cause);
}
}
}