/*
* Copyright (c) 2007-2014 Concurrent, Inc. All Rights Reserved.
*
* Project and contact information: http://www.cascading.org/
*
* This file is part of the Cascading project.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package cascading.tuple.hadoop.util;
import java.io.IOException;
import java.util.Comparator;
import cascading.CascadingException;
import cascading.flow.hadoop.util.HadoopUtil;
import cascading.tuple.Fields;
import cascading.tuple.StreamComparator;
import cascading.tuple.Tuple;
import cascading.tuple.hadoop.TupleSerialization;
import cascading.tuple.hadoop.io.BufferedInputStream;
import cascading.tuple.hadoop.io.HadoopTupleInputStream;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.io.RawComparator;
/** Class DeserializerComparator is the base class for all Cascading comparator classes. */
public abstract class DeserializerComparator<T> extends Configured implements RawComparator<T>
{
final BufferedInputStream lhsBuffer = new BufferedInputStream();
final BufferedInputStream rhsBuffer = new BufferedInputStream();
TupleSerialization tupleSerialization;
HadoopTupleInputStream lhsStream;
HadoopTupleInputStream rhsStream;
Comparator[] groupComparators;
@Override
public void setConf( Configuration conf )
{
if( conf == null )
return;
super.setConf( conf );
tupleSerialization = new TupleSerialization( conf );
// get new readers so deserializers don't compete for the buffer
lhsStream = new HadoopTupleInputStream( lhsBuffer, tupleSerialization.getElementReader() );
rhsStream = new HadoopTupleInputStream( rhsBuffer, tupleSerialization.getElementReader() );
groupComparators = deserializeComparatorsFor( "cascading.group.comparator" );
groupComparators = delegatingComparatorsFor( groupComparators );
}
Comparator[] deserializeComparatorsFor( String name )
{
Configuration conf = getConf();
if( conf == null )
throw new IllegalStateException( "no conf set" );
return getFieldComparatorsFrom( conf, name );
}
public static Comparator[] getFieldComparatorsFrom( Configuration conf, String name )
{
String value = conf.get( name );
if( value == null )
return new Comparator[ conf.getInt( name + ".size", 1 ) ];
try
{
return HadoopUtil.deserializeBase64( value, conf, Fields.class ).getComparators();
}
catch( IOException exception )
{
throw new CascadingException( "unable to deserialize comparators for: " + name );
}
}
Comparator[] delegatingComparatorsFor( Comparator[] fieldComparators )
{
Comparator[] comparators = new Comparator[ fieldComparators.length ];
for( int i = 0; i < comparators.length; i++ )
{
if( fieldComparators[ i ] instanceof StreamComparator )
comparators[ i ] = new TupleElementStreamComparator( (StreamComparator) fieldComparators[ i ] );
else if( fieldComparators[ i ] != null )
comparators[ i ] = new TupleElementComparator( fieldComparators[ i ] );
else
comparators[ i ] = new DelegatingTupleElementComparator( tupleSerialization );
}
return comparators;
}
final int compareTuples( Comparator[] comparators, Tuple lhs, Tuple rhs )
{
int lhsLen = lhs.size();
int rhsLen = rhs.size();
int c = lhsLen - rhsLen;
if( c != 0 )
return c;
for( int i = 0; i < lhsLen; i++ )
{
// hack to support comparators array length of 1
Object lhsObject = lhs.getObject( i );
Object rhsObject = rhs.getObject( i );
try
{
c = comparators[ i % comparators.length ].compare( lhsObject, rhsObject );
}
catch( Exception exception )
{
throw new CascadingException( "unable to compare object elements in position: " + i + " lhs: '" + lhsObject + "' rhs: '" + rhsObject + "'", exception );
}
if( c != 0 )
return c;
}
return 0;
}
final int compareTuples( Comparator[] comparators ) throws IOException
{
int lhsLen = lhsStream.getNumElements();
int rhsLen = rhsStream.getNumElements();
int c = lhsLen - rhsLen;
if( c != 0 )
return c;
for( int i = 0; i < lhsLen; i++ )
{
// hack to support comparators array length of 1
try
{
c = ( (StreamComparator) comparators[ i % comparators.length ] ).compare( lhsStream, rhsStream );
}
catch( Exception exception )
{
throw new CascadingException( "unable to compare stream elements in position: " + i, exception );
}
if( c != 0 )
return c;
}
return 0;
}
}