Package bixo.config

Source Code of bixo.config.BixoPlatform

/*
* Copyright 2009-2013 Scale Unlimited
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/package bixo.config;

import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.DataInputStream;
import java.io.DataOutputStream;
import java.io.File;
import java.util.Date;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapred.JobConf;

import cascading.flow.FlowConnector;
import cascading.flow.FlowProcess;
import cascading.flow.FlowProcess.NullFlowProcess;
import cascading.flow.hadoop.HadoopFlowProcess;
import cascading.flow.local.LocalFlowProcess;
import cascading.scheme.Scheme;
import cascading.tap.SinkMode;
import cascading.tap.Tap;
import cascading.tuple.Fields;
import cascading.tuple.Tuple;

import com.scaleunlimited.cascading.BasePath;
import com.scaleunlimited.cascading.BasePlatform;
import com.scaleunlimited.cascading.LoggingFlowProcess;
import com.scaleunlimited.cascading.hadoop.HadoopPlatform;
import com.scaleunlimited.cascading.local.LocalPlatform;

@SuppressWarnings({"rawtypes", "serial"})
public class BixoPlatform extends BasePlatform {

    // TODO Replace these with LocalPlatform.PLATFORM_TYPE and
    // HadoopPlatform.PLATFORM_TYPE?
    public enum Platform {
        Local,
        Hadoop
    }
   
    private static final long CASCADING_LOCAL_JOB_POLLING_INTERVAL = 100;
    private static final long LOCAL_HADOOP_JOB_POLLING_INTERVAL = 100;
   
    private BasePlatform _platform;
    private JobConf _hadoopJobConf = null;

    public BixoPlatform(Class applicationJarClass, Platform platform) throws Exception {
        super(applicationJarClass);
        if (platform == Platform.Local) {
            _platform = new LocalPlatform(applicationJarClass);
            setJobPollingInterval(CASCADING_LOCAL_JOB_POLLING_INTERVAL);
        } else {
            configureHadoopPlatform(applicationJarClass, new JobConf());
        }
    }

    public BixoPlatform(Class applicationJarClass, Configuration conf) throws Exception {
        this(applicationJarClass, new JobConf(conf));
    }
   
    public BixoPlatform(Class applicationJarClass, JobConf conf) throws Exception {
        super(applicationJarClass);
        configureHadoopPlatform(applicationJarClass, conf);
    }
   
    private void configureHadoopPlatform(Class applicationJarClass, JobConf jobConf) throws Exception {
        _hadoopJobConf = jobConf;
        HadoopPlatform hp = new HadoopPlatform(applicationJarClass, _hadoopJobConf);
        _platform = hp;
       
        // Special configuration for Hadoop.
       
        if (isLocal()) {
            setNumReduceTasks(1);
            setJobPollingInterval(LOCAL_HADOOP_JOB_POLLING_INTERVAL);
        } else {
            setNumReduceTasks(BasePlatform.CLUSTER_REDUCER_COUNT);
        }
       
        hp.setMapSpeculativeExecution(false);
        hp.setReduceSpeculativeExecution(false);
    }
   
    @Override
    public String getPlatformType() {
        return _platform.getPlatformType();
    }
   
    public static Tuple clone(Tuple t, FlowProcess flowProcess) {
        if (flowProcess instanceof LoggingFlowProcess) {
            // Need to unwrap to get at actual flow process.
            flowProcess = ((LoggingFlowProcess)flowProcess).getDelegate();
        }
       
        if (flowProcess instanceof HadoopFlowProcess) {
            // Hadoop will do a deep copy implicitly (via serialization), so we don't
            // have to do any copying ourselves, and thus the tuple isn't modified.
            return t;
        } else if (flowProcess instanceof NullFlowProcess) {
            // I'm guessing we don't need to copy Tuples when running a CascadingTestCase.
            return t;
        } else if (flowProcess instanceof LocalFlowProcess) {
           
            // TODO We don't really need to copy the Tuple if every value
            // inside is immutable.
           
            // Cascading local mode just does a shallow copy of the tuple, so we need
            // to clone the tuple, and then deep-clone any tuple fields that aren't scalars.
            // Currently we only handle Tuple & Date as an embedded object, or anything
            // that's Writable.
           
            Tuple result = t;
           
            for (int i = 0; i < result.size(); i++) {
                Object value = result.getObject(i);
                if ((value == null) || (value instanceof Boolean) || (value instanceof String) ||
                    (value instanceof Double) || (value instanceof Float) ||
                    (value instanceof Integer) || (value instanceof Long) || (value instanceof Short)) {
                    // All set, Cascading will do shallow copy
                } else {
                    // we need to clone the Tuple
                    if (result == t) {
                        result = new Tuple(t);
                    }
                   
                    result.set(i, cloneValue(value, i));
                }
            }
           
            return result;
        } else {
            throw new IllegalArgumentException("Unknown class of flow process: " + flowProcess.getClass().getName());
        }
    }
   
    private static Object cloneValue(Object value, int index) {
        if (value instanceof Tuple) {
            return new Tuple((Tuple)value);
        } else if (value instanceof Date) {
            return new Date(((Date)value).getTime());
        } else if (value instanceof Writable) {
            // FUTURE - add threadlocal variables that we reuse here, versus re-allocating constantly.
            try {
                ByteArrayOutputStream baos = new ByteArrayOutputStream();
                ((Writable)value).write(new DataOutputStream(baos));
                Object result = value.getClass().newInstance();
                ((Writable)result).readFields(new DataInputStream(new ByteArrayInputStream(baos.toByteArray())));
                return result;
            } catch (Exception e) {
                throw new RuntimeException("Error doing deep copy of Writable", e);
            }
        } else {
            // FUTURE - add support for Serializable types
            throw new RuntimeException(String.format("Field value of type %s at position %d can't be cloned, unknown type", value.getClass().getName(), index));
        }
    }


    @Override
    public File getDefaultLogDir() {
        return _platform.getDefaultLogDir();
    }

    @Override
    public BasePath getTempDir() throws Exception {
        return _platform.getTempDir();
    }

    @Override
    public boolean isLocal() {
        return _platform.isLocal();
    }

    @Override
    public boolean isTextSchemeCompressable() {
        return _platform.isTextSchemeCompressable();
    }

    @Override
    public Scheme makeBinaryScheme(Fields fields) throws Exception {
        return _platform.makeBinaryScheme(fields);
    }

    @Override
    public FlowConnector makeFlowConnector() throws Exception {
        return _platform.makeFlowConnector();
    }

    @Override
    public FlowProcess makeFlowProcess() throws Exception {
        return _platform.makeFlowProcess();
    }

    @Override
    public BasePath makePath(String path) throws Exception {
        return _platform.makePath(path);
    }

    @Override
    public BasePath makePath(BasePath parent, String subdir) throws Exception {
        return _platform.makePath(parent, subdir);
    }

    @Override
    public Tap makeTap(Scheme scheme, BasePath path, SinkMode mode) throws Exception {
        return _platform.makeTap(scheme, path, mode);
    }

    @Override
    public Tap makeTemplateTap(Tap tap, String pattern, Fields fields) throws Exception {
        return _platform.makeTemplateTap(tap, pattern, fields);
    }

    @Override
    public Scheme makeTextScheme() throws Exception {
        return _platform.makeTextScheme();
    }

    @Override
    public Scheme makeTextScheme(boolean isEnableCompression) throws Exception {
        return _platform.makeTextScheme(isEnableCompression);
    }

    @Override
    public boolean rename(BasePath srcPath, BasePath destPath) throws Exception {
        return _platform.rename(srcPath, destPath);
    }

    @Override
    public void resetNumReduceTasks() throws Exception {
        _platform.resetNumReduceTasks();       
    }

    @Override
    public void setFlowPriority(FlowPriority flowPriority) throws Exception {
        _platform.setFlowPriority(flowPriority);
    }

    @Override
    public void setNumReduceTasks(int numReduceTasks) throws Exception {
        _platform.setNumReduceTasks(numReduceTasks);
    }

    @Override
    public int getNumReduceTasks() throws Exception {
        return _platform.getNumReduceTasks();
    }

    @Override
    public String shareLocalDir(String localDirName) {
        return _platform.shareLocalDir(localDirName);
    }

    @Override
    public String copySharedDirToLocal(FlowProcess flowProcess, String sharedDirName) {
        return _platform.copySharedDirToLocal(flowProcess, sharedDirName);
    }

    public String getProperty(String name) {
        return _platform.getProperty(name);
    }

    public void setProperty(String name, int value) {
        _platform.setProperty(name, value);
    }

    public void setProperty(String name, String value) {
        _platform.setProperty(name, value);
    }
   
   
}
TOP

Related Classes of bixo.config.BixoPlatform

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.