Package cascading.flow.tez.stream.element

Source Code of cascading.flow.tez.stream.element.TezGroupByGate

/*
* Copyright (c) 2007-2014 Concurrent, Inc. All Rights Reserved.
*
* Project and contact information: http://www.cascading.org/
*
* This file is part of the Cascading project.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package cascading.flow.tez.stream.element;

import cascading.flow.FlowProcess;
import cascading.flow.SliceCounters;
import cascading.flow.hadoop.HadoopGroupByClosure;
import cascading.flow.hadoop.util.TimedIterator;
import cascading.flow.stream.duct.Duct;
import cascading.flow.stream.graph.IORole;
import cascading.flow.tez.TezGroupByClosure;
import cascading.flow.tez.util.SecondarySortKeyValuesReader;
import cascading.pipe.GroupBy;
import cascading.tuple.Tuple;
import cascading.tuple.io.TuplePair;
import cascading.util.SortedListMultiMap;
import cascading.util.Util;
import org.apache.tez.runtime.api.LogicalInput;
import org.apache.tez.runtime.api.LogicalOutput;
import org.apache.tez.runtime.library.api.KeyValuesReader;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
*
*/
public class TezGroupByGate extends TezGroupGate
  {
  private static final Logger LOG = LoggerFactory.getLogger( TezGroupByGate.class );

  protected TimedIterator<Tuple> timedIterator;

  public TezGroupByGate( FlowProcess flowProcess, GroupBy groupBy, IORole role, LogicalOutput logicalOutput )
    {
    super( flowProcess, groupBy, role, logicalOutput );
    }

  public TezGroupByGate( FlowProcess flowProcess, GroupBy groupBy, IORole role, SortedListMultiMap<Integer, LogicalInput> logicalInputs )
    {
    super( flowProcess, groupBy, role, logicalInputs );

    this.timedIterator = new TimedIterator<>( flowProcess, SliceCounters.Read_Duration, SliceCounters.Tuples_Read );
    }

  protected Throwable reduce() throws Exception
    {
    Throwable localThrowable = null;

    try
      {
      start( this );

      // if multiple ordinals, an input could be duplicated if sourcing multiple paths
      LogicalInput logicalInput = Util.getFirst( logicalInputs.getValues() );

      KeyValuesReader reader = (KeyValuesReader) logicalInput.getReader();

      if( sortFields != null )
        reader = new SecondarySortKeyValuesReader( reader, groupComparators[ 0 ] );

      while( reader.next() )
        {
        Tuple currentKey = (Tuple) reader.getCurrentKey(); // if secondary sorting, is a TuplePair
        Iterable currentValues = reader.getCurrentValues();

        timedIterator.reset( currentValues );

        accept( currentKey, timedIterator ); // will unwrap the TuplePair
        }

      complete( this );
      }
    catch( Throwable throwable )
      {
      if( !( throwable instanceof OutOfMemoryError ) )
        LOG.error( "caught throwable", throwable );

      return throwable;
      }

    return localThrowable;
    }

  @Override
  protected HadoopGroupByClosure createClosure()
    {
    return new TezGroupByClosure( flowProcess, keyFields, valuesFields );
    }

  @Override
  protected void wrapGroupingAndCollect( Duct previous, Tuple valuesTuple, Tuple groupKey ) throws java.io.IOException
    {
    collector.collect( groupKey, valuesTuple );
    }

  @Override
  protected Tuple unwrapGrouping( Tuple key )
    {
    // copying the lhs key during secondary sorting prevents the key from advancing at the end of the
    // aggregation iterator
    return sortFields == null ? key : new Tuple( ( (TuplePair) key ).getLhs() );
    }
  }
TOP

Related Classes of cascading.flow.tez.stream.element.TezGroupByGate

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.