Package cascading.flow

Source Code of cascading.flow.BaseFlow

/*
* Copyright (c) 2007-2014 Concurrent, Inc. All Rights Reserved.
*
* Project and contact information: http://www.cascading.org/
*
* This file is part of the Cascading project.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package cascading.flow;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.Set;
import java.util.concurrent.Callable;
import java.util.concurrent.Future;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.locks.ReentrantLock;

import cascading.CascadingException;
import cascading.cascade.Cascade;
import cascading.flow.planner.BaseFlowStep;
import cascading.flow.planner.FlowStepJob;
import cascading.flow.planner.PlatformInfo;
import cascading.flow.planner.graph.FlowElementGraph;
import cascading.flow.planner.process.FlowStepGraph;
import cascading.management.CascadingServices;
import cascading.management.UnitOfWorkExecutorStrategy;
import cascading.management.UnitOfWorkSpawnStrategy;
import cascading.management.state.ClientState;
import cascading.property.AppProps;
import cascading.property.PropertyUtil;
import cascading.stats.FlowStats;
import cascading.tap.Tap;
import cascading.tuple.Fields;
import cascading.tuple.TupleEntryCollector;
import cascading.tuple.TupleEntryIterator;
import cascading.util.ShutdownUtil;
import cascading.util.Update;
import cascading.util.Util;
import cascading.util.Version;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import riffle.process.DependencyIncoming;
import riffle.process.DependencyOutgoing;
import riffle.process.ProcessCleanup;
import riffle.process.ProcessComplete;
import riffle.process.ProcessPrepare;
import riffle.process.ProcessStart;
import riffle.process.ProcessStop;

import static cascading.util.Util.formatDurationFromMillis;
import static org.jgrapht.Graphs.predecessorListOf;

@riffle.process.Process
public abstract class BaseFlow<Config> implements Flow<Config>
  {
  /** Field LOG */
  private static final Logger LOG = LoggerFactory.getLogger( Flow.class );

  private PlatformInfo platformInfo;

  /** Field id */
  private String id;
  /** Field name */
  private String name;
  /** Fields runID */
  private String runID;
  /** Fields classpath */
  private List<String> classPath; // may remain null
  /** Field tags */
  private String tags;
  /** Field listeners */
  private List<SafeFlowListener> listeners;
  /** Field skipStrategy */
  private FlowSkipStrategy flowSkipStrategy = new FlowSkipIfSinkNotStale();
  /** Field flowStats */
  protected FlowStats flowStats; // don't use a listener to set values
  /** Field sources */
  protected Map<String, Tap> sources = Collections.emptyMap();
  /** Field sinks */
  protected Map<String, Tap> sinks = Collections.emptyMap();
  /** Field traps */
  private Map<String, Tap> traps = Collections.emptyMap();
  /** Field checkpoints */
  private Map<String, Tap> checkpoints = Collections.emptyMap();
  /** Field stopJobsOnExit */
  protected boolean stopJobsOnExit = true;
  /** Field submitPriority */
  private int submitPriority = 5;

  /** Field stepGraph */
  private FlowStepGraph flowStepGraph;
  /** Field thread */
  protected transient Thread thread;
  /** Field throwable */
  private Throwable throwable;
  /** Field stop */
  protected boolean stop;

  /** Field pipeGraph */
  private FlowElementGraph pipeGraph; // only used for documentation purposes

  private transient CascadingServices cascadingServices;

  private FlowStepStrategy<Config> flowStepStrategy = null;
  /** Field steps */
  private transient List<FlowStep<Config>> steps;
  /** Field jobsMap */
  private transient Map<String, FlowStepJob<Config>> jobsMap;
  private transient UnitOfWorkSpawnStrategy spawnStrategy = new UnitOfWorkExecutorStrategy();

  private transient ReentrantLock stopLock = new ReentrantLock( true );
  protected ShutdownUtil.Hook shutdownHook;

  private HashMap<String, String> flowDescriptor;

  /**
   * Returns property stopJobsOnExit.
   *
   * @param properties of type Map
   * @return a boolean
   */
  static boolean getStopJobsOnExit( Map<Object, Object> properties )
    {
    return Boolean.parseBoolean( PropertyUtil.getProperty( properties, FlowProps.STOP_JOBS_ON_EXIT, "true" ) );
    }

  /** Used for testing. */
  protected BaseFlow()
    {
    this.name = "NA";
    this.flowStats = createPrepareFlowStats();
    }

  protected BaseFlow( PlatformInfo platformInfo, Map<Object, Object> properties, Config defaultConfig, String name )
    {
    this( platformInfo, properties, defaultConfig, name, new LinkedHashMap<String, String>() );
    }

  protected BaseFlow( PlatformInfo platformInfo, Map<Object, Object> properties, Config defaultConfig, String name, Map<String, String> flowDescriptor )
    {
    this.platformInfo = platformInfo;
    this.name = name;

    if( flowDescriptor != null )
      this.flowDescriptor = new LinkedHashMap<String, String>( flowDescriptor );

    addSessionProperties( properties );
    initConfig( properties, defaultConfig );

    this.flowStats = createPrepareFlowStats(); // must be last
    }

  protected BaseFlow( PlatformInfo platformInfo, Map<Object, Object> properties, Config defaultConfig, FlowDef flowDef )
    {
    properties = PropertyUtil.asFlatMap( properties );

    this.platformInfo = platformInfo;
    this.name = flowDef.getName();
    this.tags = flowDef.getTags();
    this.runID = flowDef.getRunID();
    this.classPath = flowDef.getClassPath();

    if( !flowDef.getFlowDescriptor().isEmpty() )
      this.flowDescriptor = new LinkedHashMap<String, String>( flowDef.getFlowDescriptor() );

    addSessionProperties( properties );
    initConfig( properties, defaultConfig );
    setSources( flowDef.getSourcesCopy() );
    setSinks( flowDef.getSinksCopy() );
    setTraps( flowDef.getTrapsCopy() );
    setCheckpoints( flowDef.getCheckpointsCopy() );
    initFromTaps();

    retrieveSourceFields();
    retrieveSinkFields();
    }

  public PlatformInfo getPlatformInfo()
    {
    return platformInfo;
    }

  public void initialize( FlowElementGraph pipeGraph, FlowStepGraph flowStepGraph )
    {
    this.pipeGraph = pipeGraph;
    this.flowStepGraph = flowStepGraph;

    initSteps();

    this.flowStats = createPrepareFlowStats(); // must be last

    initializeNewJobsMap();
    }

  public FlowElementGraph updateSchemes( FlowElementGraph pipeGraph )
    {
    presentSourceFields( pipeGraph );

    presentSinkFields( pipeGraph );

    return new FlowElementGraph( pipeGraph );
    }

  /** Force a Scheme to fetch any fields from a meta-data store */
  protected void retrieveSourceFields()
    {
    for( Tap tap : sources.values() )
      tap.retrieveSourceFields( getFlowProcess() );
    }

  /**
   * Present the current resolved fields for the Tap
   *
   * @param pipeGraph
   */
  protected void presentSourceFields( FlowElementGraph pipeGraph )
    {
    for( Tap tap : sources.values() )
      {
      if( pipeGraph.containsVertex( tap ) )
        tap.presentSourceFields( getFlowProcess(), getFieldsFor( pipeGraph, tap ) );
      }

    for( Tap tap : checkpoints.values() )
      {
      if( pipeGraph.containsVertex( tap ) )
        tap.presentSourceFields( getFlowProcess(), getFieldsFor( pipeGraph, tap ) );
      }
    }

  /** Force a Scheme to fetch any fields from a meta-data store */
  protected void retrieveSinkFields()
    {
    for( Tap tap : sinks.values() )
      tap.retrieveSinkFields( getFlowProcess() );
    }

  /**
   * Present the current resolved fields for the Tap
   *
   * @param pipeGraph
   */
  protected void presentSinkFields( FlowElementGraph pipeGraph )
    {
    for( Tap tap : sinks.values() )
      {
      if( pipeGraph.containsVertex( tap ) )
        tap.presentSinkFields( getFlowProcess(), getFieldsFor( pipeGraph, tap ) );
      }

    for( Tap tap : checkpoints.values() )
      {
      if( pipeGraph.containsVertex( tap ) )
        tap.presentSinkFields( getFlowProcess(), getFieldsFor( pipeGraph, tap ) );
      }
    }

  protected Fields getFieldsFor( FlowElementGraph pipeGraph, Tap tap )
    {
    return pipeGraph.outgoingEdgesOf( tap ).iterator().next().getOutValuesFields();
    }

  private void addSessionProperties( Map<Object, Object> properties )
    {
    if( properties == null )
      return;

    PropertyUtil.setProperty( properties, CASCADING_FLOW_ID, getID() );
    PropertyUtil.setProperty( properties, "cascading.flow.tags", getTags() );
    AppProps.setApplicationID( properties );
    PropertyUtil.setProperty( properties, "cascading.app.name", makeAppName( properties ) );
    PropertyUtil.setProperty( properties, "cascading.app.version", makeAppVersion( properties ) );
    }

  private String makeAppName( Map<Object, Object> properties )
    {
    if( properties == null )
      return null;

    String name = AppProps.getApplicationName( properties );

    if( name != null )
      return name;

    return Util.findName( AppProps.getApplicationJarPath( properties ) );
    }

  private String makeAppVersion( Map<Object, Object> properties )
    {
    if( properties == null )
      return null;

    String name = AppProps.getApplicationVersion( properties );

    if( name != null )
      return name;

    return Util.findVersion( AppProps.getApplicationJarPath( properties ) );
    }

  private FlowStats createPrepareFlowStats()
    {
    FlowStats flowStats = new FlowStats( this, getClientState() );

    flowStats.prepare();
    flowStats.markPending();

    return flowStats;
    }

  public CascadingServices getCascadingServices()
    {
    if( cascadingServices == null )
      cascadingServices = new CascadingServices( getConfigAsProperties() );

    return cascadingServices;
    }

  private ClientState getClientState()
    {
    return getFlowSession().getCascadingServices().createClientState( getID() );
    }

  protected void initSteps()
    {
    if( flowStepGraph == null )
      return;

    for( Object flowStep : flowStepGraph.vertexSet() )
      ( (BaseFlowStep<Config>) flowStep ).setFlow( this );
    }

  private void initFromTaps()
    {
    initFromTaps( sources );
    initFromTaps( sinks );
    initFromTaps( traps );
    }

  private void initFromTaps( Map<String, Tap> taps )
    {
    for( Tap tap : taps.values() )
      tap.flowConfInit( this );
    }

  @Override
  public String getName()
    {
    return name;
    }

  protected void setName( String name )
    {
    this.name = name;
    }

  @Override
  public String getID()
    {
    if( id == null )
      id = Util.createUniqueID();

    return id;
    }

  @Override
  public String getTags()
    {
    return tags;
    }

  @Override
  public int getSubmitPriority()
    {
    return submitPriority;
    }

  @Override
  public void setSubmitPriority( int submitPriority )
    {
    if( submitPriority < 1 || submitPriority > 10 )
      throw new IllegalArgumentException( "submitPriority must be between 1 and 10 inclusive, was: " + submitPriority );

    this.submitPriority = submitPriority;
    }

  FlowElementGraph getPipeGraph()
    {
    return pipeGraph;
    }

  FlowStepGraph getFlowStepGraph()
    {
    return flowStepGraph;
    }

  protected void setSources( Map<String, Tap> sources )
    {
    addListeners( sources.values() );
    this.sources = sources;
    }

  protected void setSinks( Map<String, Tap> sinks )
    {
    addListeners( sinks.values() );
    this.sinks = sinks;
    }

  protected void setTraps( Map<String, Tap> traps )
    {
    addListeners( traps.values() );
    this.traps = traps;
    }

  protected void setCheckpoints( Map<String, Tap> checkpoints )
    {
    addListeners( checkpoints.values() );
    this.checkpoints = checkpoints;
    }

  protected void setFlowStepGraph( FlowStepGraph flowStepGraph )
    {
    this.flowStepGraph = flowStepGraph;
    }

  /**
   * This method creates a new internal Config with the parentConfig as defaults using the properties to override
   * the defaults.
   *
   * @param properties   of type Map
   * @param parentConfig of type Config
   */
  protected abstract void initConfig( Map<Object, Object> properties, Config parentConfig );

  public Config createConfig( Map<Object, Object> properties, Config defaultConfig )
    {
    Config config = newConfig( defaultConfig );

    if( properties == null )
      return config;

    Set<Object> keys = new HashSet<>( properties.keySet() );

    // keys will only be grabbed if both key/value are String, so keep orig keys
    if( properties instanceof Properties )
      keys.addAll( ( (Properties) properties ).stringPropertyNames() );

    for( Object key : keys )
      {
      Object value = properties.get( key );

      if( value == null && properties instanceof Properties && key instanceof String )
        value = ( (Properties) properties ).getProperty( (String) key );

      if( value == null ) // don't stuff null values
        continue;

      setConfigProperty( config, key, value );
      }

    return config;
    }

  protected abstract void setConfigProperty( Config config, Object key, Object value );

  protected abstract Config newConfig( Config defaultConfig );

  protected void initFromProperties( Map<Object, Object> properties )
    {
    stopJobsOnExit = getStopJobsOnExit( properties );
    }

  public FlowSession getFlowSession()
    {
    return new FlowSession( getCascadingServices() );
    }

  @Override
  public FlowStats getFlowStats()
    {
    return flowStats;
    }

  @Override
  public Map<String, String> getFlowDescriptor()
    {
    if( flowDescriptor == null )
      return Collections.emptyMap();

    return Collections.unmodifiableMap( flowDescriptor );
    }

  @Override
  public FlowStats getStats()
    {
    return getFlowStats();
    }

  void addListeners( Collection listeners )
    {
    for( Object listener : listeners )
      {
      if( listener instanceof FlowListener )
        addListener( (FlowListener) listener );
      }
    }

  List<SafeFlowListener> getListeners()
    {
    if( listeners == null )
      listeners = new LinkedList<SafeFlowListener>();

    return listeners;
    }

  @Override
  public boolean hasListeners()
    {
    return listeners != null && !listeners.isEmpty();
    }

  @Override
  public void addListener( FlowListener flowListener )
    {
    getListeners().add( new SafeFlowListener( flowListener ) );
    }

  @Override
  public boolean removeListener( FlowListener flowListener )
    {
    return getListeners().remove( new SafeFlowListener( flowListener ) );
    }

  @Override
  public boolean hasStepListeners()
    {
    boolean hasStepListeners = false;

    for( FlowStep step : getFlowSteps() )
      hasStepListeners |= step.hasListeners();

    return hasStepListeners;
    }

  @Override
  public void addStepListener( FlowStepListener flowStepListener )
    {
    for( FlowStep step : getFlowSteps() )
      step.addListener( flowStepListener );
    }

  @Override
  public boolean removeStepListener( FlowStepListener flowStepListener )
    {
    boolean listenerRemoved = true;

    for( FlowStep step : getFlowSteps() )
      listenerRemoved &= step.removeListener( flowStepListener );

    return listenerRemoved;
    }

  @Override
  public Map<String, Tap> getSources()
    {
    return Collections.unmodifiableMap( sources );
    }

  @Override
  public List<String> getSourceNames()
    {
    return new ArrayList<String>( sources.keySet() );
    }

  @Override
  public Tap getSource( String name )
    {
    return sources.get( name );
    }

  @Override
  @DependencyIncoming
  public Collection<Tap> getSourcesCollection()
    {
    return getSources().values();
    }

  @Override
  public Map<String, Tap> getSinks()
    {
    return Collections.unmodifiableMap( sinks );
    }

  @Override
  public List<String> getSinkNames()
    {
    return new ArrayList<String>( sinks.keySet() );
    }

  @Override
  public Tap getSink( String name )
    {
    return sinks.get( name );
    }

  @Override
  @DependencyOutgoing
  public Collection<Tap> getSinksCollection()
    {
    return getSinks().values();
    }

  @Override
  public Tap getSink()
    {
    return sinks.values().iterator().next();
    }

  @Override
  public Map<String, Tap> getTraps()
    {
    return Collections.unmodifiableMap( traps );
    }

  @Override
  public List<String> getTrapNames()
    {
    return new ArrayList<String>( traps.keySet() );
    }

  @Override
  public Collection<Tap> getTrapsCollection()
    {
    return getTraps().values();
    }

  @Override
  public Map<String, Tap> getCheckpoints()
    {
    return Collections.unmodifiableMap( checkpoints );
    }

  @Override
  public List<String> getCheckpointNames()
    {
    return new ArrayList<String>( checkpoints.keySet() );
    }

  @Override
  public Collection<Tap> getCheckpointsCollection()
    {
    return getCheckpoints().values();
    }

  @Override
  public boolean isStopJobsOnExit()
    {
    return stopJobsOnExit;
    }

  @Override
  public FlowSkipStrategy getFlowSkipStrategy()
    {
    return flowSkipStrategy;
    }

  @Override
  public FlowSkipStrategy setFlowSkipStrategy( FlowSkipStrategy flowSkipStrategy )
    {
    if( flowSkipStrategy == null )
      throw new IllegalArgumentException( "flowSkipStrategy may not be null" );

    try
      {
      return this.flowSkipStrategy;
      }
    finally
      {
      this.flowSkipStrategy = flowSkipStrategy;
      }
    }

  @Override
  public boolean isSkipFlow() throws IOException
    {
    return flowSkipStrategy.skipFlow( this );
    }

  @Override
  public boolean areSinksStale() throws IOException
    {
    return areSourcesNewer( getSinkModified() );
    }

  @Override
  public boolean areSourcesNewer( long sinkModified ) throws IOException
    {
    Config confCopy = getConfigCopy();
    Iterator<Tap> values = sources.values().iterator();

    long sourceModified = 0;

    try
      {
      sourceModified = Util.getSourceModified( confCopy, values, sinkModified );

      if( sinkModified < sourceModified )
        return true;

      return false;
      }
    finally
      {
      if( LOG.isInfoEnabled() )
        logInfo( "source modification date at: " + new Date( sourceModified ) ); // not oldest, we didnt check them all
      }
    }

  @Override
  public long getSinkModified() throws IOException
    {
    long sinkModified = Util.getSinkModified( getConfigCopy(), sinks.values() );

    if( LOG.isInfoEnabled() )
      {
      if( sinkModified == -1L )
        logInfo( "at least one sink is marked for delete" );
      if( sinkModified == 0L )
        logInfo( "at least one sink does not exist" );
      else
        logInfo( "sink oldest modified date: " + new Date( sinkModified ) );
      }

    return sinkModified;
    }

  @Override
  public FlowStepStrategy getFlowStepStrategy()
    {
    return flowStepStrategy;
    }

  @Override
  public void setFlowStepStrategy( FlowStepStrategy flowStepStrategy )
    {
    this.flowStepStrategy = flowStepStrategy;
    }

  @Override
  public List<FlowStep<Config>> getFlowSteps()
    {
    if( steps != null )
      return steps;

    if( flowStepGraph == null )
      return Collections.emptyList();

    Iterator<FlowStep> topoIterator = flowStepGraph.getTopologicalIterator();

    steps = new ArrayList<>();

    while( topoIterator.hasNext() )
      steps.add( topoIterator.next() );

    return steps;
    }

  @Override
  @ProcessPrepare
  public void prepare()
    {
    try
      {
      deleteSinksIfNotUpdate();
      deleteTrapsIfNotUpdate();
      deleteCheckpointsIfNotUpdate();
      }
    catch( IOException exception )
      {
      throw new FlowException( "unable to prepare flow", exception );
      }
    }

  @Override
  @ProcessStart
  public synchronized void start()
    {
    if( thread != null )
      return;

    if( stop )
      return;

    registerShutdownHook();

    internalStart();

    String threadName = ( "flow " + Util.toNull( getName() ) ).trim();

    thread = createFlowThread( threadName );

    thread.start();
    }

  protected Thread createFlowThread( String threadName )
    {
    return new Thread( new Runnable()
    {
    @Override
    public void run()
      {
      BaseFlow.this.run();
      }
    }, threadName );
    }

  protected abstract void internalStart();

  @Override
  @ProcessStop
  public synchronized void stop()
    {
    stopLock.lock();

    try
      {
      if( stop )
        return;

      stop = true;

      fireOnStopping();

      if( !flowStats.isFinished() )
        flowStats.markStopped();

      internalStopAllJobs();

      handleExecutorShutdown();

      internalClean( true );
      }
    finally
      {
      flowStats.cleanup();
      stopLock.unlock();
      }
    }

  protected abstract void internalClean( boolean stop );

  @Override
  @ProcessComplete
  public void complete()
    {
    start();

    try
      {
      try
        {
        synchronized( this ) // prevent NPE on quick stop() & complete() after start()
          {
          while( thread == null && !stop )
            Util.safeSleep( 10 );
          }

        if( thread != null )
          thread.join();
        }
      catch( InterruptedException exception )
        {
        throw new FlowException( getName(), "thread interrupted", exception );
        }

      // if in #stop and stopping, lets wait till its done in this thread
      try
        {
        stopLock.lock();
        }
      finally
        {
        stopLock.unlock();
        }

      if( throwable instanceof FlowException )
        ( (FlowException) throwable ).setFlowName( getName() );

      if( throwable instanceof CascadingException )
        throw (CascadingException) throwable;

      if( throwable instanceof OutOfMemoryError )
        throw (OutOfMemoryError) throwable;

      if( throwable != null )
        throw new FlowException( getName(), "unhandled exception", throwable );

      if( hasListeners() )
        {
        for( SafeFlowListener safeFlowListener : getListeners() )
          {
          if( safeFlowListener.throwable != null )
            throw new FlowException( getName(), "unhandled listener exception", throwable );
          }
        }
      }
    finally
      {
      thread = null;
      throwable = null;

      try
        {
        commitTraps();

        if( hasListeners() )
          {
          for( SafeFlowListener safeFlowListener : getListeners() )
            safeFlowListener.throwable = null;
          }
        }
      finally
        {
        flowStats.cleanup();
        }
      }
    }

  private void commitTraps()
    {
    // commit all the traps, don't fail on an error

    for( Tap tap : traps.values() )
      {
      try
        {
        if( !tap.commitResource( getConfig() ) )
          logError( "unable to commit trap: " + tap.getFullIdentifier( getConfig() ), null );
        }
      catch( IOException exception )
        {
        logError( "unable to commit trap: " + tap.getFullIdentifier( getConfig() ), exception );
        }
      }
    }

  @Override
  @ProcessCleanup
  public void cleanup()
    {
    // do nothing
    }

  @Override
  public TupleEntryIterator openSource() throws IOException
    {
    return sources.values().iterator().next().openForRead( getFlowProcess() );
    }

  @Override
  public TupleEntryIterator openSource( String name ) throws IOException
    {
    if( !sources.containsKey( name ) )
      throw new IllegalArgumentException( "source does not exist: " + name );

    return sources.get( name ).openForRead( getFlowProcess() );
    }

  @Override
  public TupleEntryIterator openSink() throws IOException
    {
    return sinks.values().iterator().next().openForRead( getFlowProcess() );
    }

  @Override
  public TupleEntryIterator openSink( String name ) throws IOException
    {
    if( !sinks.containsKey( name ) )
      throw new IllegalArgumentException( "sink does not exist: " + name );

    return sinks.get( name ).openForRead( getFlowProcess() );
    }

  @Override
  public TupleEntryIterator openTrap() throws IOException
    {
    return traps.values().iterator().next().openForRead( getFlowProcess() );
    }

  @Override
  public TupleEntryIterator openTrap( String name ) throws IOException
    {
    if( !traps.containsKey( name ) )
      throw new IllegalArgumentException( "trap does not exist: " + name );

    return traps.get( name ).openForRead( getFlowProcess() );
    }

  /**
   * Method deleteSinks deletes all sinks, whether or not they are configured for {@link cascading.tap.SinkMode#UPDATE}.
   * <p/>
   * Use with caution.
   *
   * @throws IOException when
   * @see BaseFlow#deleteSinksIfNotUpdate()
   */
  public void deleteSinks() throws IOException
    {
    for( Tap tap : sinks.values() )
      deleteOrFail( tap );
    }

  private void deleteOrFail( Tap tap ) throws IOException
    {
    if( !tap.resourceExists( getConfig() ) )
      return;

    if( !tap.deleteResource( getConfig() ) )
      throw new FlowException( "unable to delete resource: " + tap.getFullIdentifier( getFlowProcess() ) );
    }

  /**
   * Method deleteSinksIfNotUpdate deletes all sinks if they are not configured with the {@link cascading.tap.SinkMode#UPDATE} flag.
   * <p/>
   * Typically used by a {@link Cascade} before executing the flow if the sinks are stale.
   * <p/>
   * Use with caution.
   *
   * @throws IOException when
   */
  public void deleteSinksIfNotUpdate() throws IOException
    {
    for( Tap tap : sinks.values() )
      {
      if( !tap.isUpdate() )
        deleteOrFail( tap );
      }
    }

  public void deleteSinksIfReplace() throws IOException
    {
    for( Tap tap : sinks.values() )
      {
      if( tap.isReplace() )
        deleteOrFail( tap );
      }
    }

  public void deleteTrapsIfNotUpdate() throws IOException
    {
    for( Tap tap : traps.values() )
      {
      if( !tap.isUpdate() )
        deleteOrFail( tap );
      }
    }

  public void deleteCheckpointsIfNotUpdate() throws IOException
    {
    for( Tap tap : checkpoints.values() )
      {
      if( !tap.isUpdate() )
        deleteOrFail( tap );
      }
    }

  public void deleteTrapsIfReplace() throws IOException
    {
    for( Tap tap : traps.values() )
      {
      if( tap.isReplace() )
        deleteOrFail( tap );
      }
    }

  public void deleteCheckpointsIfReplace() throws IOException
    {
    for( Tap tap : checkpoints.values() )
      {
      if( tap.isReplace() )
        deleteOrFail( tap );
      }
    }

  @Override
  public boolean resourceExists( Tap tap ) throws IOException
    {
    return tap.resourceExists( getConfig() );
    }

  @Override
  public TupleEntryIterator openTapForRead( Tap tap ) throws IOException
    {
    return tap.openForRead( getFlowProcess() );
    }

  @Override
  public TupleEntryCollector openTapForWrite( Tap tap ) throws IOException
    {
    return tap.openForWrite( getFlowProcess() );
    }

  /** Method run implements the Runnable run method and should not be called by users. */
  private void run()
    {
    if( thread == null )
      throw new IllegalStateException( "to start a Flow call start() or complete(), not Runnable#run()" );

    Version.printBanner();
    Update.checkForUpdate( getPlatformInfo() );

    try
      {
      if( stop )
        return;

      flowStats.markStarted();

      fireOnStarting();

      if( LOG.isInfoEnabled() )
        {
        logInfo( "starting" );

        for( Tap source : getSourcesCollection() )
          logInfo( " source: " + source );
        for( Tap sink : getSinksCollection() )
          logInfo( " sink: " + sink );
        }

      // if jobs are run local, then only use one thread to force execution serially
      //int numThreads = jobsAreLocal() ? 1 : getMaxConcurrentSteps( getJobConf() );
      int numThreads = getMaxNumParallelSteps();

      if( numThreads == 0 )
        numThreads = jobsMap.size();

      if( numThreads == 0 )
        throw new IllegalStateException( "no jobs rendered for flow: " + getName() );

      if( LOG.isInfoEnabled() )
        {
        logInfo( " parallel execution of steps is enabled: " + ( getMaxNumParallelSteps() != 1 ) );
        logInfo( " executing total steps: " + jobsMap.size() );
        logInfo( " allocating management threads: " + numThreads );
        }

      List<Future<Throwable>> futures = spawnJobs( numThreads );

      for( Future<Throwable> future : futures )
        {
        throwable = future.get();

        if( throwable != null )
          {
          if( !stop )
            internalStopAllJobs();

          handleExecutorShutdown();
          break;
          }
        }
      }
    catch( Throwable throwable )
      {
      this.throwable = throwable;
      }
    finally
      {
      handleThrowableAndMarkFailed();

      if( !stop && !flowStats.isFinished() )
        flowStats.markSuccessful();

      internalClean( stop ); // cleaning temp taps may be determined by success/failure

      try
        {
        fireOnCompleted();
        }
      finally
        {
        if( LOG.isInfoEnabled() )
          {
          long totalSliceCPUSeconds = getTotalSliceCPUMilliSeconds();

          if( totalSliceCPUSeconds == -1 )
            logInfo( " completed in: " + formatDurationFromMillis( flowStats.getDuration() ) );
          else
            logInfo( " completed in: " + formatDurationFromMillis( flowStats.getDuration() ) + ", using cpu time: " + formatDurationFromMillis( totalSliceCPUSeconds ) );
          }

        flowStats.cleanup();
        internalShutdown();
        deregisterShutdownHook();
        }
      }
    }

  protected long getTotalSliceCPUMilliSeconds()
    {
    return -1;
    }

  protected abstract int getMaxNumParallelSteps();

  protected abstract void internalShutdown();

  private List<Future<Throwable>> spawnJobs( int numThreads ) throws InterruptedException
    {
    if( stop )
      return new ArrayList<Future<Throwable>>();

    List<Callable<Throwable>> list = new ArrayList<Callable<Throwable>>();

    for( FlowStepJob<Config> job : jobsMap.values() )
      list.add( job );

    return spawnStrategy.start( this, numThreads, list );
    }

  private void handleThrowableAndMarkFailed()
    {
    if( throwable != null && !stop )
      {
      flowStats.markFailed( throwable );

      fireOnThrowable();
      }
    }

  Map<String, FlowStepJob<Config>> getJobsMap()
    {
    return jobsMap;
    }

  protected void initializeNewJobsMap()
    {
    jobsMap = new LinkedHashMap<>(); // keep topo order
    Iterator<FlowStep> topoIterator = flowStepGraph.getTopologicalIterator();

    while( topoIterator.hasNext() )
      {
      BaseFlowStep<Config> step = (BaseFlowStep) topoIterator.next();
      FlowStepJob<Config> flowStepJob = step.getFlowStepJob( getFlowProcess(), getConfig() );

      jobsMap.put( step.getName(), flowStepJob );

      List<FlowStepJob<Config>> predecessors = new ArrayList<FlowStepJob<Config>>();

      for( Object flowStep : predecessorListOf( flowStepGraph, step ) )
        predecessors.add( jobsMap.get( ( (FlowStep) flowStep ).getName() ) );

      flowStepJob.setPredecessors( predecessors );

      flowStats.addStepStats( flowStepJob.getStepStats() );
      }
    }

  protected void internalStopAllJobs()
    {
    logInfo( "stopping all jobs" );

    try
      {
      if( jobsMap == null )
        return;

      List<FlowStepJob<Config>> jobs = new ArrayList<FlowStepJob<Config>>( jobsMap.values() );

      Collections.reverse( jobs );

      for( FlowStepJob<Config> job : jobs )
        job.stop();
      }
    finally
      {
      logInfo( "stopped all jobs" );
      }
    }

  protected void handleExecutorShutdown()
    {
    if( spawnStrategy.isCompleted( this ) )
      return;

    logInfo( "shutting down job executor" );

    try
      {
      spawnStrategy.complete( this, 5 * 60, TimeUnit.SECONDS );
      }
    catch( InterruptedException exception )
      {
      // ignore
      }

    logInfo( "shutdown complete" );
    }

  protected void fireOnCompleted()
    {
    if( hasListeners() )
      {
      if( LOG.isDebugEnabled() )
        logDebug( "firing onCompleted event: " + getListeners().size() );

      for( FlowListener flowListener : getListeners() )
        flowListener.onCompleted( this );
      }
    }

  protected void fireOnThrowable()
    {
    if( hasListeners() )
      {
      if( LOG.isDebugEnabled() )
        logDebug( "firing onThrowable event: " + getListeners().size() );

      boolean isHandled = false;

      for( FlowListener flowListener : getListeners() )
        isHandled = flowListener.onThrowable( this, throwable ) || isHandled;

      if( isHandled )
        throwable = null;
      }
    }

  protected void fireOnStopping()
    {
    if( hasListeners() )
      {
      if( LOG.isDebugEnabled() )
        logDebug( "firing onStopping event: " + getListeners().size() );

      for( FlowListener flowListener : getListeners() )
        flowListener.onStopping( this );
      }
    }

  protected void fireOnStarting()
    {
    if( hasListeners() )
      {
      if( LOG.isDebugEnabled() )
        logDebug( "firing onStarting event: " + getListeners().size() );

      for( FlowListener flowListener : getListeners() )
        flowListener.onStarting( this );
      }
    }

  @Override
  public String toString()
    {
    StringBuffer buffer = new StringBuffer();

    if( getName() != null )
      buffer.append( getName() ).append( ": " );

    for( FlowStep step : getFlowSteps() )
      buffer.append( step );

    return buffer.toString();
    }

  protected void logInfo( String message )
    {
    LOG.info( "[" + Util.truncate( getName(), 25 ) + "] " + message );
    }

  private void logDebug( String message )
    {
    LOG.debug( "[" + Util.truncate( getName(), 25 ) + "] " + message );
    }

  private void logWarn( String message, Throwable throwable )
    {
    LOG.warn( "[" + Util.truncate( getName(), 25 ) + "] " + message, throwable );
    }

  private void logError( String message, Throwable throwable )
    {
    LOG.error( "[" + Util.truncate( getName(), 25 ) + "] " + message, throwable );
    }

  @Override
  public void writeDOT( String filename )
    {
    if( pipeGraph == null )
      throw new UnsupportedOperationException( "this flow instance cannot write a DOT file" );

    pipeGraph.writeDOT( filename );
    }

  @Override
  public void writeStepsDOT( String filename )
    {
    if( flowStepGraph == null )
      throw new UnsupportedOperationException( "this flow instance cannot write a DOT file" );

    flowStepGraph.writeDOT( filename );
    }

  /**
   * Used to return a simple wrapper for use as an edge in a graph where there can only be
   * one instance of every edge.
   *
   * @return FlowHolder
   */
  public FlowHolder getHolder()
    {
    return new FlowHolder( this );
    }

  public void setCascade( Cascade cascade )
    {
    setConfigProperty( getConfig(), "cascading.cascade.id", cascade.getID() );
    flowStats.recordInfo();
    }

  @Override
  public String getCascadeID()
    {
    return getProperty( "cascading.cascade.id" );
    }

  @Override
  public String getRunID()
    {
    return runID;
    }

  public List<String> getClassPath()
    {
    return classPath;
    }

  @Override
  public void setSpawnStrategy( UnitOfWorkSpawnStrategy spawnStrategy )
    {
    this.spawnStrategy = spawnStrategy;
    }

  @Override
  public UnitOfWorkSpawnStrategy getSpawnStrategy()
    {
    return spawnStrategy;
    }

  protected void registerShutdownHook()
    {
    if( !isStopJobsOnExit() )
      return;

    shutdownHook = new ShutdownUtil.Hook()
    {
    @Override
    public Priority priority()
      {
      return Priority.WORK_CHILD;
      }

    @Override
    public void execute()
      {
      logInfo( "shutdown hook calling stop on flow" );

      BaseFlow.this.stop();
      }
    };

    ShutdownUtil.addHook( shutdownHook );
    }

  private void deregisterShutdownHook()
    {
    if( !isStopJobsOnExit() || stop )
      return;

    ShutdownUtil.removeHook( shutdownHook );
    }

  /** Class FlowHolder is a helper class for wrapping Flow instances. */
  public static class FlowHolder
    {
    /** Field flow */
    public Flow flow;

    public FlowHolder()
      {
      }

    public FlowHolder( Flow flow )
      {
      this.flow = flow;
      }
    }

  /**
   * Class SafeFlowListener safely calls a wrapped FlowListener.
   * <p/>
   * This is done for a few reasons, the primary reason is so exceptions thrown by the Listener
   * can be caught by the calling Thread. Since Flow is asynchronous, much of the work is done in the run() method
   * which in turn is run in a new Thread.
   */
  private class SafeFlowListener implements FlowListener
    {
    /** Field flowListener */
    final FlowListener flowListener;
    /** Field throwable */
    Throwable throwable;

    private SafeFlowListener( FlowListener flowListener )
      {
      this.flowListener = flowListener;
      }

    public void onStarting( Flow flow )
      {
      try
        {
        flowListener.onStarting( flow );
        }
      catch( Throwable throwable )
        {
        handleThrowable( throwable );
        }
      }

    public void onStopping( Flow flow )
      {
      try
        {
        flowListener.onStopping( flow );
        }
      catch( Throwable throwable )
        {
        handleThrowable( throwable );
        }
      }

    public void onCompleted( Flow flow )
      {
      try
        {
        flowListener.onCompleted( flow );
        }
      catch( Throwable throwable )
        {
        handleThrowable( throwable );
        }
      }

    public boolean onThrowable( Flow flow, Throwable flowThrowable )
      {
      try
        {
        return flowListener.onThrowable( flow, flowThrowable );
        }
      catch( Throwable throwable )
        {
        handleThrowable( throwable );
        }

      return false;
      }

    private void handleThrowable( Throwable throwable )
      {
      this.throwable = throwable;

      logWarn( String.format( "flow listener %s threw throwable", flowListener ), throwable );

      // stop this flow
      stop();
      }

    public boolean equals( Object object )
      {
      if( object instanceof BaseFlow.SafeFlowListener )
        return flowListener.equals( ( (BaseFlow.SafeFlowListener) object ).flowListener );

      return flowListener.equals( object );
      }

    public int hashCode()
      {
      return flowListener.hashCode();
      }
    }
  }
TOP

Related Classes of cascading.flow.BaseFlow

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.