/**
* Copyright [2012] [Datasalt Systems S.L.]
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.datasalt.pangool.tuplemr.mapred;
import com.datasalt.pangool.PangoolRuntimeException;
import com.datasalt.pangool.io.BitField;
import com.datasalt.pangool.io.ITuple;
import com.datasalt.pangool.io.Schema;
import com.datasalt.pangool.io.Schema.Field;
import com.datasalt.pangool.io.Schema.Field.Type;
import com.datasalt.pangool.io.Utf8;
import com.datasalt.pangool.tuplemr.Criteria;
import com.datasalt.pangool.tuplemr.Criteria.Order;
import com.datasalt.pangool.tuplemr.Criteria.SortElement;
import com.datasalt.pangool.tuplemr.SerializationInfo;
import com.datasalt.pangool.tuplemr.TupleMRConfig;
import com.datasalt.pangool.tuplemr.TupleMRConfigBuilder;
import com.datasalt.pangool.tuplemr.serialization.TupleSerialization;
import org.apache.hadoop.conf.Configurable;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.RawComparator;
import org.apache.hadoop.io.WritableComparator;
import org.apache.hadoop.io.WritableUtils;
import org.apache.hadoop.io.serializer.Serializer;
import java.io.IOException;
import java.nio.ByteBuffer;
import static org.apache.hadoop.io.WritableComparator.*;
/**
* Tuple-based MapRed job binary comparator. It decodes the binary serialization
* performed by {@link TupleSerialization}.
* <p/>
* Used to group tuples according to
* {@link TupleMRConfigBuilder#setOrderBy(com.datasalt.pangool.tuplemr.OrderBy)}
*/
@SuppressWarnings("rawtypes")
public class SortComparator implements RawComparator<ITuple>, Configurable {
protected Configuration conf;
protected TupleMRConfig tupleMRConf;
protected SerializationInfo serInfo;
protected final SerializerComparator serializerComparator = new SerializerComparator();
private static final class Offsets {
protected int offset1 = 0;
protected int offset2 = 0;
}
private static final class Nulls {
protected BitField nulls1 = new BitField();
protected BitField nulls2 = new BitField();
}
protected Offsets offsets = new Offsets();
protected Nulls nulls = new Nulls();
protected boolean isMultipleSources;
public TupleMRConfig getConfig() {
return tupleMRConf;
}
public SortComparator() {
}
/**
* Never called in MapRed jobs. Just for completion and test purposes
*/
@Override
public int compare(ITuple w1, ITuple w2) {
if (isMultipleSources) {
int schemaId1 = tupleMRConf.getSchemaIdByName(w1.getSchema().getName());
int schemaId2 = tupleMRConf.getSchemaIdByName(w2.getSchema().getName());
int[] indexes1 = serInfo.getCommonSchemaIndexTranslation(schemaId1);
int[] indexes2 = serInfo.getCommonSchemaIndexTranslation(schemaId2);
Criteria c = tupleMRConf.getCommonCriteria();
int comparison = compare(serInfo.getCommonSchema(), c, w1, indexes1, w2, indexes2, serInfo.getCommonSchemaSerializers());
if (comparison != 0) {
return comparison;
} else if (schemaId1 != schemaId2) {
int r = schemaId1 - schemaId2;
return (tupleMRConf.getSchemasOrder() == Order.ASC) ? r : -r;
}
int schemaId = schemaId1;
c = tupleMRConf.getSpecificOrderBys().get(schemaId);
if (c != null) {
int[] indexes = serInfo.getSpecificSchemaIndexTranslation(schemaId);
return compare(serInfo.getSpecificSchema(schemaId), c, w1, indexes, w2, indexes, serInfo.getSpecificSchemaSerializers().get(schemaId));
} else {
return 0;
}
} else {
int[] indexes = serInfo.getCommonSchemaIndexTranslation(0);
Criteria c = tupleMRConf.getCommonCriteria();
return compare(serInfo.getCommonSchema(), c, w1, indexes, w2, indexes, serInfo.getCommonSchemaSerializers());
}
}
public int compare(Schema schema, Criteria c, ITuple w1, int[] index1, ITuple w2,
int[] index2, Serializer[] serializers) {
for (int i = 0; i < c.getElements().size(); i++) {
Field field = schema.getField(i);
SortElement e = c.getElements().get(i);
Object o1 = w1.get(index1[i]);
Object o2 = w2.get(index2[i]);
// Handling with null values
if (o1 == null || o2 == null) {
int cmp = nullCompare(o1, o2, e);
if (cmp != 0) {
return cmp;
} else {
continue;
}
}
// At this point we know that both values are not null.
Serializer serializer = (serializers == null) ? null : serializers[i];
int comparison = compareObjects(o1, o2, e.getCustomComparator(), field.getType(), serializer);
if (comparison != 0) {
return (e.getOrder() == Order.ASC ? comparison : -comparison);
}
}
return 0;
}
/**
* Compares two objects. Uses the given custom comparator if present. If the
* type is {@link Type#OBJECT} and no raw comparator is present, then a serializer
* comparator is used.
*/
@SuppressWarnings({"unchecked"})
public int compareObjects(Object elem1, Object elem2, RawComparator comparator,
Type type, Serializer serializer) {
// If custom, just use custom.
if (comparator != null) {
return comparator.compare(elem1, elem2);
}
if (type == Type.OBJECT) {
return serializerComparator.compare(elem1, serializer, elem2, serializer);
} else {
return compareObjects(elem1, elem2);
}
}
@SuppressWarnings("unchecked")
public static int compareObjects(Object element1, Object element2) {
if (element1 == null) {
return (element2 == null) ? 0 : -1;
} else if (element2 == null) {
return 1;
} else {
if (element1 instanceof String) {
element1 = new Utf8((String) element1);
}
if (element2 instanceof String) {
element2 = new Utf8((String) element2);
}
if (element1 instanceof byte[]) {
byte[] buffer1 = (byte[]) element1;
if (element2 instanceof byte[]) {
byte[] buffer2 = (byte[]) element2;
return compareBytes(buffer1, 0, buffer1.length, buffer2, 0, buffer2.length);
} else if (element2 instanceof ByteBuffer) {
ByteBuffer buffer2 = (ByteBuffer) element2;
int start2 = buffer2.arrayOffset() + buffer2.position();
int len2 = buffer2.limit() - buffer2.position();
return compareBytes(buffer1, 0, buffer1.length, buffer2.array(), start2, len2);
} else {
throw new PangoolRuntimeException("Can't compare byte[] with " + element2.getClass());
}
} else if (element1 instanceof ByteBuffer) {
ByteBuffer buffer1 = (ByteBuffer) element1;
int pos1 = buffer1.position();
int start1 = buffer1.arrayOffset() + pos1;
int len1 = buffer1.limit() - pos1;
if (element2 instanceof byte[]) {
byte[] buffer2 = (byte[]) element2;
return compareBytes(buffer1.array(), start1, len1, buffer2, 0, buffer2.length);
} else if (element2 instanceof ByteBuffer) {
ByteBuffer buffer2 = (ByteBuffer) element2;
int pos2 = buffer2.position();
int start2 = buffer2.arrayOffset() + pos2;
int len2 = buffer2.limit() - pos2;
return compareBytes(buffer1.array(), start1, len1, buffer2.array(), start2, len2);
} else {
throw new PangoolRuntimeException("Can't compare byte[] with " + element2.getClass());
}
} else if (element1 instanceof Comparable) {
return ((Comparable) element1).compareTo(element2);
} else if (element2 instanceof Comparable) {
return -((Comparable) element2).compareTo(element1);
} else {
throw new PangoolRuntimeException("Not comparable elements:" + element1.getClass() + " with object " + element2.getClass());
}
}
}
public int nullCompare(Object o1, Object o2, SortElement se) {
int res = -2;
if (o1 == null) {
res = (o2 == null) ? 0 : -1;
} else if (o2 == null) {
res = 1;
}
if (res == -2) {
throw new IllegalArgumentException("None of the two object passed as parameters are null. " +
"That is not allowed");
}
return (se.getNullOrder() == Criteria.NullOrder.NULL_SMALLEST && se.getOrder() == Order.ASC) ? res : -res;
}
@Override
public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) {
try {
return (isMultipleSources) ? compareMultipleSources(b1, s1, l1, b2, s2, l2)
: compareOneSource(b1, s1, l1, b2, s2, l2);
} catch (IOException e) {
throw new RuntimeException(e);
}
}
protected int compareMultipleSources(byte[] b1, int s1, int l1, byte[] b2, int s2,
int l2) throws IOException {
Schema commonSchema = serInfo.getCommonSchema();
Criteria commonOrder = tupleMRConf.getCommonCriteria();
int comparison = compare(b1, s1, b2, s2, commonSchema, commonOrder, offsets, nulls);
if (comparison != 0) {
return comparison;
}
int schemaId1 = readVInt(b1, offsets.offset1);
int schemaId2 = readVInt(b2, offsets.offset2);
if (schemaId1 != schemaId2) {
int r = schemaId1 - schemaId2;
return (tupleMRConf.getSchemasOrder() == Order.ASC) ? r : -r;
}
int vintSize = WritableUtils.decodeVIntSize(b1[offsets.offset1]);
offsets.offset1 += vintSize;
offsets.offset2 += vintSize;
// sources are the same
Criteria criteria = tupleMRConf.getSpecificOrderBys().get(schemaId1);
if (criteria == null) {
return 0;
}
Schema specificSchema = serInfo.getSpecificSchema(schemaId1);
return compare(b1, offsets.offset1, b2, offsets.offset2, specificSchema, criteria,
offsets, nulls);
}
private int compareOneSource(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2)
throws IOException {
Schema commonSchema = serInfo.getCommonSchema();
Criteria commonOrder = tupleMRConf.getCommonCriteria();
return compare(b1, s1, b2, s2, commonSchema, commonOrder, offsets, nulls);
}
protected int compare(byte[] b1, int s1, byte[] b2, int s2, Schema schema,
Criteria criteria, Offsets o, Nulls n) throws IOException {
o.offset1 = s1;
o.offset2 = s2;
// Reading nulls bit field, if present
if (schema.containsNullableFields()) {
o.offset1 += n.nulls1.deser(b1, s1);
o.offset2 += n.nulls2.deser(b2, s2);
}
for (int depth = 0; depth < criteria.getElements().size(); depth++) {
Field field = schema.getField(depth);
Field.Type type = field.getType();
SortElement sortElement = criteria.getElements().get(depth);
Order sort = sortElement.getOrder();
RawComparator comparator = sortElement.getCustomComparator();
// Control for nulls, if field is nullable.
if (field.isNullable()) {
Criteria.NullOrder nullOrder = sortElement.getNullOrder();
if (n.nulls1.isSet(schema.getNullablePositionFromIndex(depth))) {
if (n.nulls2.isSet(schema.getNullablePositionFromIndex(depth))) {
// Both are null, so both are equal. No space is used. Continue.
continue;
} else {
// First is null
return (nullOrder == Criteria.NullOrder.NULL_SMALLEST && sort == Order.ASC) ? -1 : 1;
}
} else if (n.nulls2.isSet(schema.getNullablePositionFromIndex(depth))) {
// Second is null
return (nullOrder == Criteria.NullOrder.NULL_SMALLEST && sort == Order.ASC) ? 1 : -1;
}
}
if (comparator != null) {
//custom comparator for OBJECT
int length1 = WritableComparator.readVInt(b1, o.offset1);
int length2 = WritableComparator.readVInt(b2, o.offset2);
o.offset1 += WritableUtils.decodeVIntSize(b1[o.offset1]);
o.offset2 += WritableUtils.decodeVIntSize(b2[o.offset2]);
int comparison = comparator.compare(b1, o.offset1, length1, b2,
o.offset2, length2);
o.offset1 += length1;
o.offset2 += length2;
if (comparison != 0) {
return (sort == Order.ASC) ? comparison : -comparison;
}
} else {
//not custom comparator
switch (type) {
case INT:
case ENUM: {
int value1 = readVInt(b1, o.offset1);
int value2 = readVInt(b2, o.offset2);
if (value1 > value2) {
return (sort == Order.ASC) ? 1 : -1;
} else if (value1 < value2) {
return (sort == Order.ASC) ? -1 : 1;
}
int vintSize = WritableUtils.decodeVIntSize(b1[o.offset1]);
o.offset1 += vintSize;
o.offset2 += vintSize;
}
break;
case LONG: {
long value1 = readVLong(b1, o.offset1);
long value2 = readVLong(b2, o.offset2);
if (value1 > value2) {
return (sort == Order.ASC) ? 1 : -1;
} else if (value1 < value2) {
return (sort == Order.ASC) ? -1 : 1;
}
int vIntSize = WritableUtils.decodeVIntSize(b1[o.offset1]);
o.offset1 += vIntSize;
o.offset2 += vIntSize;
}
break;
case FLOAT: {
float value1 = readFloat(b1, o.offset1);
float value2 = readFloat(b2, o.offset2);
int comp = Float.compare(value1, value2);
if (comp != 0) {
return (sort == Order.ASC) ? comp : -comp;
}
o.offset1 += Float.SIZE / 8;
o.offset2 += Float.SIZE / 8;
}
break;
case DOUBLE: {
double value1 = readDouble(b1, o.offset1);
double value2 = readDouble(b2, o.offset2);
int comp = Double.compare(value1, value2);
if (comp != 0) {
return (sort == Order.ASC) ? comp : -comp;
}
o.offset1 += Double.SIZE / 8;
o.offset2 += Double.SIZE / 8;
}
break;
case BOOLEAN: {
byte value1 = b1[o.offset1++];
byte value2 = b2[o.offset2++];
if (value1 > value2) {
return (sort == Order.ASC) ? 1 : -1;
} else if (value1 < value2) {
return (sort == Order.ASC) ? -1 : 1;
}
}
break;
case STRING:
case OBJECT:
case BYTES: {
int length1 = readVInt(b1, o.offset1);
int length2 = readVInt(b2, o.offset2);
o.offset1 += WritableUtils.decodeVIntSize(b1[o.offset1]);
o.offset2 += WritableUtils.decodeVIntSize(b2[o.offset2]);
int comparison = compareBytes(b1, o.offset1, length1, b2, o.offset2, length2);
o.offset1 += length1;
o.offset2 += length2;
if (comparison != 0) {
return (sort == Order.ASC) ? comparison : (-comparison);
}
}
break;
default:
throw new IOException("Not supported comparison for type:" + type);
}
}
}
return 0; // equals
}
@Override
public Configuration getConf() {
return conf;
}
@Override
public void setConf(Configuration conf) {
try {
if (conf != null) {
this.conf = conf;
setTupleMRConf(TupleMRConfig.get(conf));
TupleMRConfigBuilder.initializeComparators(conf, this.tupleMRConf);
serializerComparator.setConf(conf);
}
} catch (Exception e) {
throw new RuntimeException(e);
}
}
private void setTupleMRConf(TupleMRConfig config) {
if (this.tupleMRConf != null) {
throw new RuntimeException("TupleMR config is already set");
}
this.tupleMRConf = config;
this.serInfo = tupleMRConf.getSerializationInfo();
this.isMultipleSources = tupleMRConf.getNumIntermediateSchemas() >= 2;
}
}