/*
* Copyright (c) 2007-2014 Concurrent, Inc. All Rights Reserved.
*
* Project and contact information: http://www.cascading.org/
*
* This file is part of the Cascading project.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package cascading.flow.tez.stream.element;
import cascading.flow.FlowProcess;
import cascading.flow.SliceCounters;
import cascading.flow.hadoop.HadoopGroupByClosure;
import cascading.flow.hadoop.util.TimedIterator;
import cascading.flow.stream.duct.Duct;
import cascading.flow.stream.graph.IORole;
import cascading.flow.tez.TezGroupByClosure;
import cascading.flow.tez.util.SecondarySortKeyValuesReader;
import cascading.pipe.GroupBy;
import cascading.tuple.Tuple;
import cascading.tuple.io.TuplePair;
import cascading.util.SortedListMultiMap;
import cascading.util.Util;
import org.apache.tez.runtime.api.LogicalInput;
import org.apache.tez.runtime.api.LogicalOutput;
import org.apache.tez.runtime.library.api.KeyValuesReader;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
*
*/
public class TezGroupByGate extends TezGroupGate
{
private static final Logger LOG = LoggerFactory.getLogger( TezGroupByGate.class );
protected TimedIterator<Tuple> timedIterator;
public TezGroupByGate( FlowProcess flowProcess, GroupBy groupBy, IORole role, LogicalOutput logicalOutput )
{
super( flowProcess, groupBy, role, logicalOutput );
}
public TezGroupByGate( FlowProcess flowProcess, GroupBy groupBy, IORole role, SortedListMultiMap<Integer, LogicalInput> logicalInputs )
{
super( flowProcess, groupBy, role, logicalInputs );
this.timedIterator = new TimedIterator<>( flowProcess, SliceCounters.Read_Duration, SliceCounters.Tuples_Read );
}
protected Throwable reduce() throws Exception
{
Throwable localThrowable = null;
try
{
start( this );
// if multiple ordinals, an input could be duplicated if sourcing multiple paths
LogicalInput logicalInput = Util.getFirst( logicalInputs.getValues() );
KeyValuesReader reader = (KeyValuesReader) logicalInput.getReader();
if( sortFields != null )
reader = new SecondarySortKeyValuesReader( reader, groupComparators[ 0 ] );
while( reader.next() )
{
Tuple currentKey = (Tuple) reader.getCurrentKey(); // if secondary sorting, is a TuplePair
Iterable currentValues = reader.getCurrentValues();
timedIterator.reset( currentValues );
accept( currentKey, timedIterator ); // will unwrap the TuplePair
}
complete( this );
}
catch( Throwable throwable )
{
if( !( throwable instanceof OutOfMemoryError ) )
LOG.error( "caught throwable", throwable );
return throwable;
}
return localThrowable;
}
@Override
protected HadoopGroupByClosure createClosure()
{
return new TezGroupByClosure( flowProcess, keyFields, valuesFields );
}
@Override
protected void wrapGroupingAndCollect( Duct previous, Tuple valuesTuple, Tuple groupKey ) throws java.io.IOException
{
collector.collect( groupKey, valuesTuple );
}
@Override
protected Tuple unwrapGrouping( Tuple key )
{
// copying the lhs key during secondary sorting prevents the key from advancing at the end of the
// aggregation iterator
return sortFields == null ? key : new Tuple( ( (TuplePair) key ).getLhs() );
}
}