Package cascading.flow.hadoop.util

Source Code of cascading.flow.hadoop.util.HadoopUtil

/*
* Copyright (c) 2007-2014 Concurrent, Inc. All Rights Reserved.
*
* Project and contact information: http://www.cascading.org/
*
* This file is part of the Cascading project.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package cascading.flow.hadoop.util;

import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.lang.reflect.Constructor;
import java.lang.reflect.Field;
import java.lang.reflect.InvocationTargetException;
import java.net.URI;
import java.net.URL;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.Set;
import java.util.jar.Attributes;
import java.util.jar.Manifest;

import cascading.CascadingException;
import cascading.flow.FlowException;
import cascading.flow.planner.BaseFlowStep;
import cascading.flow.planner.PlatformInfo;
import cascading.flow.planner.Scope;
import cascading.pipe.Group;
import cascading.scheme.hadoop.TextLine;
import cascading.tap.hadoop.Hfs;
import cascading.tuple.Fields;
import cascading.util.LogUtil;
import cascading.util.Util;
import org.apache.commons.codec.binary.Base64;
import org.apache.hadoop.conf.Configurable;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.filecache.DistributedCache;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.LocalFileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.util.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import static cascading.util.Util.invokeInstanceMethod;

/**
*
*/
public class HadoopUtil
  {
  public static final String CASCADING_FLOW_EXECUTING = "cascading.flow.executing";

  private static final Logger LOG = LoggerFactory.getLogger( HadoopUtil.class );
  private static final String ENCODING = "US-ASCII";
  private static final Class<?> DEFAULT_OBJECT_SERIALIZER = JavaObjectSerializer.class;

  private static PlatformInfo platformInfo;

  public static void setIsInflow( Configuration conf )
    {
    conf.setBoolean( CASCADING_FLOW_EXECUTING, true );
    }

  public static boolean isInflow( Configuration conf )
    {
    return conf.getBoolean( CASCADING_FLOW_EXECUTING, false );
    }

  public static void initLog4j( JobConf configuration )
    {
    initLog4j( (Configuration) configuration );
    }

  public static void initLog4j( Configuration configuration )
    {
    String values = configuration.get( "log4j.logger", null );

    if( values == null || values.length() == 0 )
      return;

    if( !Util.hasClass( "org.apache.log4j.Logger" ) )
      {
      LOG.info( "org.apache.log4j.Logger is not in the current CLASSPATH, not setting log4j.logger properties" );
      return;
      }

    String[] elements = values.split( "," );

    for( String element : elements )
      LogUtil.setLog4jLevel( element.split( "=" ) );
    }

  // only place JobConf should ever be returned
  public static JobConf asJobConfInstance( Configuration configuration )
    {
    if( configuration instanceof JobConf )
      return (JobConf) configuration;

    return new JobConf( configuration );
    }

  public static <C> C copyJobConf( C parentJobConf )
    {
    return copyConfiguration( parentJobConf );
    }

  public static JobConf copyJobConf( JobConf parentJobConf )
    {
    if( parentJobConf == null )
      throw new IllegalArgumentException( "parent may not be null" );

    // see https://github.com/Cascading/cascading/pull/21
    // The JobConf(JobConf) constructor causes derived JobConfs to share Credentials. We want to avoid this, in
    // case those Credentials are mutated later on down the road (which they will be, during job submission, in
    // separate threads!). Using the JobConf(Configuration) constructor avoids Credentials-sharing.
    final Configuration configurationCopy = new Configuration( parentJobConf );
    final JobConf jobConf = new JobConf( configurationCopy );

    jobConf.getCredentials().addAll( parentJobConf.getCredentials() );

    return jobConf;
    }

  public static JobConf createJobConf( Map<Object, Object> properties, JobConf defaultJobconf )
    {
    JobConf jobConf = defaultJobconf == null ? new JobConf() : copyJobConf( defaultJobconf );

    if( properties == null )
      return jobConf;

    return copyConfiguration( properties, jobConf );
    }

  public static <C> C copyConfiguration( C parent )
    {
    if( parent == null )
      throw new IllegalArgumentException( "parent may not be null" );

    if( !( parent instanceof Configuration ) )
      throw new IllegalArgumentException( "parent must be of type Configuration" );

    Configuration conf = (Configuration) parent;

    // see https://github.com/Cascading/cascading/pull/21
    // The JobConf(JobConf) constructor causes derived JobConfs to share Credentials. We want to avoid this, in
    // case those Credentials are mutated later on down the road (which they will be, during job submission, in
    // separate threads!). Using the JobConf(Configuration) constructor avoids Credentials-sharing.
    Configuration configurationCopy = new Configuration( conf );

    Configuration copiedConf = callCopyConstructor( parent.getClass(), configurationCopy );

    if( Util.hasInstanceMethod( parent, "getCredentials", null ) )
      {
      Object result = invokeInstanceMethod( parent, "getCredentials", null, null );
      Object credentials = invokeInstanceMethod( copiedConf, "getCredentials", null, null );

      invokeInstanceMethod( credentials, "addAll", new Object[]{result}, new Class[]{credentials.getClass()} );
      }

    return (C) copiedConf;
    }

  protected static <C extends Configuration> C callCopyConstructor( Class type, Configuration parent )
    {
    try
      {
      Constructor<C> constructor = type.getConstructor( parent.getClass() );

      return constructor.newInstance( parent );
      }
    catch( NoSuchMethodException | InvocationTargetException | InstantiationException | IllegalAccessException exception )
      {
      throw new CascadingException( "unable to create copy of: " + type );
      }
    }

  public static <C extends Configuration> C copyConfiguration( Map<Object, Object> srcProperties, C dstConfiguration )
    {
    Set<Object> keys = new HashSet<Object>( srcProperties.keySet() );

    // keys will only be grabbed if both key/value are String, so keep orig keys
    if( srcProperties instanceof Properties )
      keys.addAll( ( (Properties) srcProperties ).stringPropertyNames() );

    for( Object key : keys )
      {
      Object value = srcProperties.get( key );

      if( value == null && srcProperties instanceof Properties && key instanceof String )
        value = ( (Properties) srcProperties ).getProperty( (String) key );

      if( value == null ) // don't stuff null values
        continue;

      // don't let these objects pass, even though toString is called below.
      if( value instanceof Class || value instanceof JobConf )
        continue;

      dstConfiguration.set( key.toString(), value.toString() );
      }

    return dstConfiguration;
    }

  public static Map<Object, Object> createProperties( Configuration jobConf )
    {
    Map<Object, Object> properties = new HashMap<Object, Object>();

    if( jobConf == null )
      return properties;

    for( Map.Entry<String, String> entry : jobConf )
      properties.put( entry.getKey(), entry.getValue() );

    return properties;
    }

  public static Thread getHDFSShutdownHook()
    {
    Exception caughtException;

    try
      {
      // we must init the FS so the finalizer is registered
      FileSystem.getLocal( new JobConf() );

      Field field = FileSystem.class.getDeclaredField( "clientFinalizer" );
      field.setAccessible( true );

      Thread finalizer = (Thread) field.get( null );

      if( finalizer != null )
        Runtime.getRuntime().removeShutdownHook( finalizer );

      return finalizer;
      }
    catch( NoSuchFieldException exception )
      {
      caughtException = exception;
      }
    catch( IllegalAccessException exception )
      {
      caughtException = exception;
      }
    catch( IOException exception )
      {
      caughtException = exception;
      }

    LOG.debug( "unable to find and remove client hdfs shutdown hook, received exception: {}", caughtException.getClass().getName() );

    return null;
    }

  public static String encodeBytes( byte[] bytes )
    {
    try
      {
      return new String( Base64.encodeBase64( bytes ), ENCODING );
      }
    catch( UnsupportedEncodingException exception )
      {
      throw new RuntimeException( exception );
      }
    }

  public static byte[] decodeBytes( String string )
    {
    try
      {
      byte[] bytes = string.getBytes( ENCODING );
      return Base64.decodeBase64( bytes );
      }
    catch( UnsupportedEncodingException exception )
      {
      throw new RuntimeException( exception );
      }
    }

  public static <T> ObjectSerializer instantiateSerializer( Configuration conf, Class<T> type ) throws ClassNotFoundException
    {
    Class<ObjectSerializer> flowSerializerClass;

    String serializerClassName = conf.get( ObjectSerializer.OBJECT_SERIALIZER_PROPERTY );

    if( serializerClassName == null || serializerClassName.length() == 0 )
      flowSerializerClass = (Class<ObjectSerializer>) DEFAULT_OBJECT_SERIALIZER;
    else
      flowSerializerClass = (Class<ObjectSerializer>) Class.forName( serializerClassName );

    ObjectSerializer objectSerializer;

    try
      {
      objectSerializer = flowSerializerClass.newInstance();

      if( objectSerializer instanceof Configurable )
        ( (Configurable) objectSerializer ).setConf( conf );
      }
    catch( Exception exception )
      {
      exception.printStackTrace();
      throw new IllegalArgumentException( "Unable to instantiate serializer \""
        + flowSerializerClass.getName()
        + "\" for class: "
        + type.getName() );
      }

    if( !objectSerializer.accepts( type ) )
      throw new IllegalArgumentException( serializerClassName + " won't accept objects of class " + type.toString() );

    return objectSerializer;
    }

  public static <T> String serializeBase64( T object, Configuration conf ) throws IOException
    {
    return serializeBase64( object, conf, true );
    }

  public static <T> String serializeBase64( T object, Configuration conf, boolean compress ) throws IOException
    {
    ObjectSerializer objectSerializer;

    try
      {
      objectSerializer = instantiateSerializer( conf, object.getClass() );
      }
    catch( ClassNotFoundException exception )
      {
      throw new IOException( exception );
      }

    return encodeBytes( objectSerializer.serialize( object, compress ) );
    }

  /**
   * This method deserializes the Base64 encoded String into an Object instance.
   *
   * @param string
   * @return an Object
   */
  public static <T> T deserializeBase64( String string, Configuration conf, Class<T> type ) throws IOException
    {
    return deserializeBase64( string, conf, type, true );
    }

  public static <T> T deserializeBase64( String string, Configuration conf, Class<T> type, boolean decompress ) throws IOException
    {
    if( string == null || string.length() == 0 )
      return null;

    ObjectSerializer objectSerializer;

    try
      {
      objectSerializer = instantiateSerializer( conf, type );
      }
    catch( ClassNotFoundException exception )
      {
      throw new IOException( exception );
      }

    return objectSerializer.deserialize( decodeBytes( string ), type, decompress );
    }

  public static Class findMainClass( Class defaultType )
    {
    return Util.findMainClass( defaultType, "org.apache.hadoop" );
    }

  public static Map<String, String> getConfig( Configuration defaultConf, Configuration updatedConf )
    {
    Map<String, String> configs = new HashMap<String, String>();

    for( Map.Entry<String, String> entry : updatedConf )
      configs.put( entry.getKey(), entry.getValue() );

    for( Map.Entry<String, String> entry : defaultConf )
      {
      if( entry.getValue() == null )
        continue;

      String updatedValue = configs.get( entry.getKey() );

      // if both null, lets purge from map to save space
      if( updatedValue == null && entry.getValue() == null )
        configs.remove( entry.getKey() );

      // if the values are the same, lets also purge from map to save space
      if( updatedValue != null && updatedValue.equals( entry.getValue() ) )
        configs.remove( entry.getKey() );

      configs.remove( "mapred.working.dir" );
      configs.remove( "mapreduce.job.working.dir" ); // hadoop2
      }

    return configs;
    }

  public static JobConf[] getJobConfs( Configuration job, List<Map<String, String>> configs )
    {
    JobConf[] jobConfs = new JobConf[ configs.size() ];

    for( int i = 0; i < jobConfs.length; i++ )
      jobConfs[ i ] = (JobConf) mergeConf( job, configs.get( i ), false );

    return jobConfs;
    }

  public static <J extends Configuration> J mergeConf( J job, Map<String, String> config, boolean directly )
    {
    Configuration currentConf = directly ? job : ( job instanceof JobConf ? copyJobConf( (JobConf) job ) : new Configuration( job ) );

    for( String key : config.keySet() )
      {
      LOG.debug( "merging key: {} value: {}", key, config.get( key ) );

      currentConf.set( key, config.get( key ) );
      }

    return (J) currentConf;
    }

  public static Configuration removePropertiesFrom( Configuration jobConf, String... keys )
    {
    Map<Object, Object> properties = createProperties( jobConf );

    for( String key : keys )
      properties.remove( key );

    return copyConfiguration( properties, new JobConf() );
    }

  public static boolean removeStateFromDistCache( Configuration conf, String path ) throws IOException
    {
    return new Hfs( new TextLine(), path ).deleteResource( conf );
    }

  public static PlatformInfo getPlatformInfo()
    {
    if( platformInfo == null )
      platformInfo = getPlatformInfoInternal();

    return platformInfo;
    }

  private static PlatformInfo getPlatformInfoInternal()
    {
    URL url = JobConf.class.getResource( JobConf.class.getSimpleName() + ".class" );

    if( url == null || !url.toString().startsWith( "jar" ) )
      return new PlatformInfo( "Hadoop", null, null );

    String path = url.toString();
    String manifestPath = path.substring( 0, path.lastIndexOf( "!" ) + 1 ) + "/META-INF/MANIFEST.MF";

    Manifest manifest;

    try
      {
      manifest = new Manifest( new URL( manifestPath ).openStream() );
      }
    catch( IOException exception )
      {
      LOG.warn( "unable to get manifest from {}", manifestPath, exception );

      return new PlatformInfo( "Hadoop", null, null );
      }

    Attributes attributes = manifest.getAttributes( "org/apache/hadoop" );

    if( attributes == null )
      {
      LOG.debug( "unable to get Hadoop manifest attributes" );
      return new PlatformInfo( "Hadoop", null, null );
      }

    String vendor = attributes.getValue( "Implementation-Vendor" );
    String version = attributes.getValue( "Implementation-Version" );

    return new PlatformInfo( "Hadoop", vendor, version );
    }

  /**
   * Add to class path.
   *
   * @param config    the config
   * @param classpath the classpath
   */
  public static Map<Path, Path> addToClassPath( Configuration config, List<String> classpath )
    {
    if( classpath == null )
      return null;

    // given to fully qualified
    Map<String, Path> localPaths = new HashMap<String, Path>();
    Map<String, Path> remotePaths = new HashMap<String, Path>();

    resolvePaths( config, classpath, null, localPaths, remotePaths );

    try
      {
      LocalFileSystem localFS = getLocalFS( config );

      for( String path : localPaths.keySet() )
        {
        // only add local if no remote
        if( remotePaths.containsKey( path ) )
          continue;

        Path artifact = localPaths.get( path );

        DistributedCache.addFileToClassPath( artifact.makeQualified( localFS ), config );
        }

      FileSystem defaultFS = getDefaultFS( config );

      for( String path : remotePaths.keySet() )
        {
        // always add remote
        Path artifact = remotePaths.get( path );

        DistributedCache.addFileToClassPath( artifact.makeQualified( defaultFS ), config );
        }
      }
    catch( IOException exception )
      {
      throw new FlowException( "unable to set distributed cache paths", exception );
      }

    return getCommonPaths( localPaths, remotePaths );
    }

  /**
   * Copies paths from one local path to a remote path. If syncTimes is true, both modification and access time are
   * changed to match the local 'from' path.
   * <p/>
   * Returns a map of file-name to remote modification times if the remote time is different than the local time.
   *
   * @param config
   * @param commonPaths
   * @param syncTimes
   */
  public static Map<String, Long> syncPaths( Configuration config, Map<Path, Path> commonPaths, boolean syncTimes )
    {
    if( commonPaths == null )
      return Collections.emptyMap();

    Map<String, Long> timestampMap = new HashMap<>();

    Map<Path, Path> copyPaths = getCopyPaths( config, commonPaths ); // tests remote file existence or if stale

    LocalFileSystem localFS = getLocalFS( config );
    FileSystem remoteFS = getDefaultFS( config );

    for( Map.Entry<Path, Path> entry : copyPaths.entrySet() )
      {
      Path localPath = entry.getKey();
      Path remotePath = entry.getValue();

      try
        {
        LOG.info( "copying from: {}, to: {}", localPath, remotePath );
        remoteFS.copyFromLocalFile( localPath, remotePath );

        if( !syncTimes )
          {
          timestampMap.put( remotePath.getName(), remoteFS.getFileStatus( remotePath ).getModificationTime() );
          continue;
          }
        }
      catch( IOException exception )
        {
        throw new FlowException( "unable to copy local: " + localPath + " to remote: " + remotePath, exception );
        }

      FileStatus localFileStatus = null;

      try
        {
        // sync the modified times so we can lazily upload jars to hdfs after job is started
        // otherwise modified time will be local to hdfs
        localFileStatus = localFS.getFileStatus( localPath );
        remoteFS.setTimes( remotePath, localFileStatus.getModificationTime(), -1 ); // don't set the access time
        }
      catch( IOException exception )
        {
        LOG.info( "unable to set local modification time on remote file: {}, 'dfs.namenode.accesstime.precision' may be set to 0 on HDFS.", remotePath );

        if( localFileStatus != null )
          timestampMap.put( remotePath.getName(), localFileStatus.getModificationTime() );
        }
      }

    return timestampMap;
    }

  public static Map<Path, Path> getCommonPaths( Map<String, Path> localPaths, Map<String, Path> remotePaths )
    {
    Map<Path, Path> commonPaths = new HashMap<Path, Path>();

    for( Map.Entry<String, Path> entry : localPaths.entrySet() )
      {
      if( remotePaths.containsKey( entry.getKey() ) )
        commonPaths.put( entry.getValue(), remotePaths.get( entry.getKey() ) );
      }

    return commonPaths;
    }

  private static Map<Path, Path> getCopyPaths( Configuration config, Map<Path, Path> commonPaths )
    {
    Map<Path, Path> copyPaths = new HashMap<Path, Path>();

    FileSystem remoteFS = getDefaultFS( config );
    FileSystem localFS = getLocalFS( config );

    for( Map.Entry<Path, Path> entry : commonPaths.entrySet() )
      {
      Path localPath = entry.getKey();
      Path remotePath = entry.getValue();

      try
        {
        boolean localExists = localFS.exists( localPath );
        boolean remoteExist = remoteFS.exists( remotePath );

        if( localExists && !remoteExist )
          {
          copyPaths.put( localPath, remotePath );
          }
        else if( localExists )
          {
          long localModTime = localFS.getFileStatus( localPath ).getModificationTime();
          long remoteModTime = remoteFS.getFileStatus( remotePath ).getModificationTime();

          if( localModTime > remoteModTime )
            copyPaths.put( localPath, remotePath );
          }
        }
      catch( IOException exception )
        {
        throw new FlowException( "unable to get handle to underlying filesystem", exception );
        }
      }

    return copyPaths;
    }

  public static void resolvePaths( Configuration config, List<String> classpath, String remoteRoot, Map<String, Path> localPaths, Map<String, Path> remotePaths )
    {
    FileSystem defaultFS = getDefaultFS( config );
    FileSystem localFS = getLocalFS( config );

    Path remoteRootPath = new Path( remoteRoot == null ? "./.staging" : remoteRoot );

    remoteRootPath = defaultFS.makeQualified( remoteRootPath );

    boolean defaultIsLocal = defaultFS.equals( localFS );

    for( String stringPath : classpath )
      {
      Path path = new Path( stringPath );

      URI uri = path.toUri();

      if( uri.getScheme() == null && !defaultIsLocal ) // we want to sync
        {
        Path localPath = localFS.makeQualified( path );

        if( !exists( localFS, localPath ) )
          throw new FlowException( "path not found: " + localPath );

        localPaths.put( localPath.getName(), localPath );
        remotePaths.put( localPath.getName(), defaultFS.makeQualified( new Path( remoteRootPath, path.getName() ) ) );
        }
      else if( localFS.equals( getFileSystem( config, path ) ) )
        {
        if( !exists( localFS, path ) )
          throw new FlowException( "path not found: " + path );

        Path localPath = localFS.makeQualified( path );

        localPaths.put( localPath.getName(), localPath );
        }
      else
        {
        if( !exists( defaultFS, path ) )
          throw new FlowException( "path not found: " + path );

        Path defaultPath = defaultFS.makeQualified( path );

        remotePaths.put( defaultPath.getName(), defaultPath );
        }
      }
    }

  private static boolean exists( FileSystem fileSystem, Path path )
    {
    try
      {
      return fileSystem.exists( path );
      }
    catch( IOException exception )
      {
      throw new FlowException( "could not test file exists: " + path );
      }
    }

  private static FileSystem getFileSystem( Configuration config, Path path )
    {
    try
      {
      return path.getFileSystem( config );
      }
    catch( IOException exception )
      {
      throw new FlowException( "unable to get handle to underlying filesystem", exception );
      }
    }

  public static LocalFileSystem getLocalFS( Configuration config )
    {
    try
      {
      return FileSystem.getLocal( config );
      }
    catch( IOException exception )
      {
      throw new FlowException( "unable to get handle to underlying filesystem", exception );
      }
    }

  public static FileSystem getDefaultFS( Configuration config )
    {
    try
      {
      return FileSystem.get( config );
      }
    catch( IOException exception )
      {
      throw new FlowException( "unable to get handle to underlying filesystem", exception );
      }
    }

  public static boolean isLocal( Configuration conf )
    {
    // hadoop 1.0 and 2.0 use different properties to define local mode: we check the new YARN
    // property first
    String frameworkName = conf.get( "mapreduce.framework.name" );

    // we are running on hadoop 2.0 (YARN)
    if( frameworkName != null )
      return frameworkName.equals( "local" );

    // for Tez
    String tezLocal = conf.get( "tez.local.mode" );

    if( tezLocal != null )
      return tezLocal.equals( "true" );

    // hadoop 1.0: use the old property to determine the local mode
    return conf.get( "mapred.job.tracker" ).equals( "local" );
    }

  public static void setLocal( Configuration conf )
    {
    // set both properties to local
    conf.set( "mapred.job.tracker", "local" );

    // yarn
    conf.set( "mapreduce.framework.name", "local" );

    // tez
    conf.set( "tez.local.mode", "true" );
    conf.set( "tez.runtime.optimize.local.fetch", "true" );
    }

  public static void addInputPath( Configuration conf, Path path )
    {
    Path workingDirectory = getWorkingDirectory( conf );
    path = new Path( workingDirectory, path );
    String dirStr = StringUtils.escapeString( path.toString() );
    String dirs = conf.get( "mapred.input.dir" );
    conf.set( "mapred.input.dir", dirs == null ? dirStr :
      dirs + StringUtils.COMMA_STR + dirStr );
    }

  public static void setOutputPath( Configuration conf, Path path )
    {
    Path workingDirectory = getWorkingDirectory( conf );
    path = new Path( workingDirectory, path );
    conf.set( "mapred.output.dir", path.toString() );
    }

  private static Path getWorkingDirectory( Configuration conf )
    {
    String name = conf.get( "mapred.working.dir" );
    if( name != null )
      {
      return new Path( name );
      }
    else
      {
      try
        {
        Path dir = FileSystem.get( conf ).getWorkingDirectory();
        conf.set( "mapred.working.dir", dir.toString() );
        return dir;
        }
      catch( IOException e )
        {
        throw new RuntimeException( e );
        }
      }
    }

  public static Path getOutputPath( Configuration conf )
    {
    String name = conf.get( "mapred.output.dir" );
    return name == null ? null : new Path( name );
    }

  public static String pack( Object object, Configuration conf )
    {
    if( object == null )
      return "";

    try
      {
      return serializeBase64( object, conf, true );
      }
    catch( IOException exception )
      {
      throw new FlowException( "unable to pack object: " + object.getClass().getCanonicalName(), exception );
      }
    }

  public static void addComparators( Configuration conf, String property, Map<String, Fields> map, BaseFlowStep flowStep, Group group )
    {
    Iterator<Fields> fieldsIterator = map.values().iterator();

    if( !fieldsIterator.hasNext() )
      return;

    Fields fields = fieldsIterator.next();

    if( fields.hasComparators() )
      {
      conf.set( property, pack( fields, conf ) );
      return;
      }

    // use resolved fields if there are no comparators.
    Set<Scope> previousScopes = flowStep.getPreviousScopes( group );

    fields = previousScopes.iterator().next().getOutValuesFields();

    if( fields.size() != 0 ) // allows fields.UNKNOWN to be used
      conf.setInt( property + ".size", fields.size() );
    }
  }
TOP

Related Classes of cascading.flow.hadoop.util.HadoopUtil

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.