Package cascading.tuple.hadoop.collect

Source Code of cascading.tuple.hadoop.collect.HadoopSpillableTupleMap

/*
* Copyright (c) 2007-2014 Concurrent, Inc. All Rights Reserved.
*
* Project and contact information: http://www.cascading.org/
*
* This file is part of the Cascading project.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package cascading.tuple.hadoop.collect;

import java.util.Collection;

import cascading.flow.FlowProcess;
import cascading.provider.FactoryLoader;
import cascading.tuple.Tuple;
import cascading.tuple.collect.Spillable;
import cascading.tuple.collect.SpillableTupleList;
import cascading.tuple.collect.SpillableTupleMap;
import cascading.tuple.collect.TupleCollectionFactory;
import cascading.tuple.collect.TupleMapFactory;
import org.apache.hadoop.conf.Configuration;

/**
* HadoopSpillableTupleMap is responsible for spilling values to disk if the map threshold is reached.
*
* @see SpillableTupleMap
* @see SpillableTupleList
*/
public class HadoopSpillableTupleMap extends SpillableTupleMap
  {
  private final FlowProcess<? extends Configuration> flowProcess;
  private final Spillable.SpillStrategy spillStrategy;
  private final TupleCollectionFactory<Configuration> tupleCollectionFactory;

  public HadoopSpillableTupleMap( int initialCapacity, float loadFactor, int mapThreshold, int listThreshold, FlowProcess<? extends Configuration> flowProcess )
    {
    super( initialCapacity, loadFactor, mapThreshold, listThreshold );
    this.flowProcess = flowProcess;
    this.spillStrategy = getSpillStrategy();

    FactoryLoader loader = FactoryLoader.getInstance();

    this.tupleCollectionFactory = loader.loadFactoryFrom( flowProcess, TupleMapFactory.TUPLE_MAP_FACTORY, HadoopTupleCollectionFactory.class );
    }

  @Override
  protected Collection<Tuple> createTupleCollection( Tuple tuple )
    {
    Collection<Tuple> collection = tupleCollectionFactory.create( flowProcess );

    if( collection instanceof Spillable )
      {
      ( (Spillable) collection ).setGrouping( tuple );
      ( (Spillable) collection ).setSpillListener( getSpillListener() );
      ( (Spillable) collection ).setSpillStrategy( spillStrategy );
      }

    return collection;
    }

  /**
   * Method getSpillStrategy returns a SpillStrategy instance that is passed to the underlying Spillable
   * tuple collection.
   *
   * @return of type Spillable#SpillStrategy
   */
  protected Spillable.SpillStrategy getSpillStrategy()
    {
    return new Spillable.SpillStrategy()
    {
    int minThreshold = (int) ( getMapThreshold() * .05 );

    int current()
      {
      return Math.max( minThreshold, Math.min( getInitListThreshold(), getMapThreshold() / size() ) );
      }

    @Override
    public boolean doSpill( Spillable spillable, int size )
      {
      return current() <= size;
      }

    @Override
    public String getSpillReason( Spillable spillable )
      {
      return "met current threshold: " + current();
      }
    };
    }
  }
TOP

Related Classes of cascading.tuple.hadoop.collect.HadoopSpillableTupleMap

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.