Package org.apache.pig.impl

Source Code of org.apache.pig.impl.PigContext

/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.pig.impl;

import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.Serializable;
import java.lang.reflect.Constructor;
import java.net.MalformedURLException;
import java.net.URI;
import java.net.URL;
import java.net.URLClassLoader;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.StringTokenizer;
import java.util.Vector;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.log4j.Level;
import org.apache.pig.FuncSpec;
import org.apache.pig.Main;
import org.apache.pig.ExecType;
import org.apache.pig.PigException;
import org.apache.pig.backend.datastorage.DataStorage;
import org.apache.pig.backend.datastorage.DataStorageException;
import org.apache.pig.backend.datastorage.ElementDescriptor;
import org.apache.pig.backend.executionengine.ExecException;
import org.apache.pig.backend.executionengine.ExecutionEngine;
import org.apache.pig.backend.hadoop.datastorage.HDataStorage;
import org.apache.pig.backend.hadoop.executionengine.HExecutionEngine;
//import org.apache.pig.backend.hadoop.executionengine.mapreduceExec.MapReduceLauncher;
//import org.apache.pig.backend.hadoop.executionengine.mapreduceExec.PigMapReduce;
import org.apache.pig.backend.local.executionengine.LocalExecutionEngine;
import org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MapReduceLauncher;
import org.apache.pig.backend.hadoop.streaming.HadoopExecutableManager;
import org.apache.pig.impl.logicalLayer.LogicalPlanBuilder;
import org.apache.pig.impl.streaming.ExecutableManager;
import org.apache.pig.impl.streaming.StreamingCommand;
import org.apache.pig.impl.util.JarManager;
import org.apache.pig.impl.util.WrappedIOException;

public class PigContext implements Serializable, FunctionInstantiator {
    private static final long serialVersionUID = 1L;
   
    private transient final Log log = LogFactory.getLog(getClass());
   
    public static final String JOB_NAME = "jobName";
    public static final String JOB_NAME_PREFIX= "PigLatin";
   
    /* NOTE: we only serialize some of the stuff
     *
     *(to make it smaller given that it's not all needed on the Hadoop side,
     * and also because some is not serializable e.g. the Configuration)
     */
   
    //one of: local, mapreduce, pigbody
    private ExecType execType;;   

    //  configuration for connecting to hadoop
    private Properties conf = new Properties();
   
    //  extra jar files that are needed to run a job
    transient public List<URL> extraJars = new LinkedList<URL>();             
   
    //  The jars that should not be merged in. (Some functions may come from pig.jar and we don't want the whole jar file.)
    transient public Vector<String> skipJars = new Vector<String>(2);   
   
    //main file system that jobs and shell commands access
    transient private DataStorage dfs;                        
   
    //  local file system, where jar files, etc. reside
    transient private DataStorage lfs;                        
   
    // handle to the back-end
    transient private ExecutionEngine executionEngine;
  
    private String jobName = JOB_NAME_PREFIX;    // can be overwritten by users
 
    private Properties properties;
   
    /**
     * a table mapping function names to function specs.
     */
    private Map<String, FuncSpec> definedFunctions = new HashMap<String, FuncSpec>();
   
    /**
     * a table mapping names to streaming commands.
     */
    private Map<String, StreamingCommand> definedCommands =
        new HashMap<String, StreamingCommand>();

    private static ArrayList<String> packageImportList = new ArrayList<String>();

    private Properties log4jProperties = new Properties();
   
    private Level defaultLogLevel;
   
    public int defaultParallel = -1;

    // Says, wether we're processing an explain right now. Explain
    // might skip some check in the logical plan validation (file
    // existence checks, etc).
    public boolean inExplain = false;
   
    private String last_alias = null;

    // List of paths skipped for automatic shipping
    List<String> skippedShipPaths = new ArrayList<String>();
   
    public PigContext() {
        this(ExecType.MAPREDUCE, new Properties());
    }
       
    public PigContext(ExecType execType, Properties properties){
        this.execType = execType;
        this.properties = properties;  

        String pigJar = JarManager.findContainingJar(Main.class);
        String hadoopJar = JarManager.findContainingJar(FileSystem.class);
        if (pigJar != null) {
            skipJars.add(pigJar);
            if (!pigJar.equals(hadoopJar))
                skipJars.add(hadoopJar);
        }
       
        executionEngine = null;
       
        // Add the default paths to be skipped for auto-shipping of commands
        skippedShipPaths.add("/bin");
        skippedShipPaths.add("/usr/bin");
        skippedShipPaths.add("/usr/local/bin");
        skippedShipPaths.add("/sbin");
        skippedShipPaths.add("/usr/sbin");
        skippedShipPaths.add("/usr/local/sbin");
    }

    static{
        packageImportList.add("");
        packageImportList.add("org.apache.pig.builtin.");
        packageImportList.add("org.apache.pig.impl.builtin.");       
    }
   
    public static void initializeImportList(String importListCommandLineProperties)
    {
        StringTokenizer tokenizer = new StringTokenizer(importListCommandLineProperties, ":");
        int pos = 0;
        while (tokenizer.hasMoreTokens())
        {
            String importItem = tokenizer.nextToken();
            if (!importItem.endsWith("."))
                importItem += ".";
            packageImportList.add(pos, importItem);
            pos++;
        }
    }
   
    public void connect() throws ExecException {

        switch (execType) {
            case LOCAL:
            {
                lfs = new HDataStorage(URI.create("file:///"),
                                       new Properties());
               
                dfs = lfs;
                executionEngine = new LocalExecutionEngine(this);
            }
            break;

            case MAPREDUCE:
            {
                executionEngine = new HExecutionEngine (this);

                executionEngine.init();
               
                dfs = executionEngine.getDataStorage();
               
                lfs = new HDataStorage(URI.create("file:///"),
                                        new Properties());               
            }
            break;
           
            default:
            {
                int errCode = 2040;
                String msg = "Unkown exec type: " + execType;
                throw new ExecException(msg, errCode, PigException.BUG);
            }
        }

    }

    public void setJobtrackerLocation(String newLocation) {
        Properties trackerLocation = new Properties();
        trackerLocation.setProperty("mapred.job.tracker", newLocation);
       
        try {
            executionEngine.updateConfiguration(trackerLocation);
        }
        catch (ExecException e) {
            log.error("Failed to set tracker at: " + newLocation);
        }
    }
   
    public void addJar(String path) throws MalformedURLException {
        if (path != null) {
            URL resource = (new File(path)).toURI().toURL();
            addJar(resource);
        }
    }
   
    public void addJar(URL resource) throws MalformedURLException{
        if (resource != null) {
            extraJars.add(resource);
            LogicalPlanBuilder.classloader = createCl(null);
        }
    }

    public void rename(String oldName, String newName) throws IOException {
        if (oldName.equals(newName)) {
            return;
        }
       
        System.out.println("Renaming " + oldName + " to " + newName);

        ElementDescriptor dst = null;
        ElementDescriptor src = null;           

        try {
            dst = dfs.asElement(newName);
            src = dfs.asElement(oldName);           
        }
        catch (DataStorageException e) {
            byte errSrc = getErrorSource();           
            int errCode = 0;
            switch(errSrc) {
            case PigException.REMOTE_ENVIRONMENT:
                errCode = 6005;
                break;
            case PigException.USER_ENVIRONMENT:
                errCode = 4005;
                break;
            default:
                errCode = 2038;
                    break;
            }
            String msg = "Unable to rename " + oldName + " to " + newName;           
            throw new ExecException(msg, errCode, errSrc, e);
        }

        if (dst.exists()) {
            dst.delete();
        }
       
        src.rename(dst);

    }

    public void copy(String src, String dst, boolean localDst) throws IOException {
        DataStorage dstStorage = dfs;
       
        if (localDst) {
            dstStorage = lfs;
        }
       
        ElementDescriptor srcElement = null;
        ElementDescriptor dstElement = null;

        try {
            srcElement = dfs.asElement(src);
            dstElement = dstStorage.asElement(dst);
        }
        catch (DataStorageException e) {
            byte errSrc = getErrorSource();           
            int errCode = 0;
            switch(errSrc) {
            case PigException.REMOTE_ENVIRONMENT:
                errCode = 6006;
                break;
            case PigException.USER_ENVIRONMENT:
                errCode = 4006;
                break;
            default:
                errCode = 2039;
                    break;
            }
            String msg = "Unable to copy " + src + " to " + dst;           
            throw new ExecException(msg, errCode, errSrc, e);
        }
       
        srcElement.copy(dstElement, this.properties, false);
    }
   
    public ExecutionEngine getExecutionEngine() {
        return executionEngine;
    }

    public DataStorage getDfs() {
        return dfs;
    }

    public DataStorage getLfs() {
        return lfs;
    }

    public DataStorage getFs() {
        if(execType == ExecType.LOCAL) {
            return lfs;
        } else {
            return dfs;
        }
    }
   
    /**
     * Provides configuration information.
     *
     * @return - information about the configuration used to connect to
     *         execution engine
     */
    public Properties getProperties() {
        return this.properties;
    }
   
    /**
     * @deprecated use {@link #getProperties()} instead
     */
    public Properties getConf() {
        return getProperties();
    }

    public String getLastAlias() {
      return this.last_alias;
    }

    public void setLastAlias(String value) {
      this.last_alias = value;
    }

    /**
     * Defines an alias for the given function spec. This
     * is useful for functions that require arguments to the
     * constructor.
     *
     * @param function - the new function alias to define.
     * @param functionSpec - the FuncSpec object representing the name of
     * the function class and any arguments to constructor.
     *
     */
    public void registerFunction(String function, FuncSpec functionSpec) {
        if (functionSpec == null) {
            definedFunctions.remove(function);
        } else {
            definedFunctions.put(function, functionSpec);
        }
    }

    /**
     * Defines an alias for the given streaming command.
     *
     * This is useful for complicated streaming command specs.
     *
     * @param alias - the new command alias to define.
     * @param command - the command
     */
    public void registerStreamCmd(String alias, StreamingCommand command) {
        if (command == null) {
            definedCommands.remove(alias);
        } else {
            definedCommands.put(alias, command);
        }
    }

    /**
     * Returns the type of execution currently in effect.
     *
     * @return current execution type
     */
    public ExecType getExecType() {
        return execType;
    }
   
   
   
   

    /**
     * Creates a Classloader based on the passed jarFile and any extra jar files.
     *
     * @param jarFile
     *            the jar file to be part of the newly created Classloader. This jar file plus any
     *            jars in the extraJars list will constitute the classpath.
     * @return the new Classloader.
     * @throws MalformedURLException
     */
    public ClassLoader createCl(String jarFile) throws MalformedURLException {
        int len = extraJars.size();
        int passedJar = jarFile == null ? 0 : 1;
        URL urls[] = new URL[len + passedJar];
        if (jarFile != null) {
            urls[0] = new URL("file:" + jarFile);
        }
        for (int i = 0; i < extraJars.size(); i++) {
            urls[i + passedJar] = extraJars.get(i);
        }
        //return new URLClassLoader(urls, PigMapReduce.class.getClassLoader());
        return new URLClassLoader(urls, PigContext.class.getClassLoader());
    }
   
   
    public static Class resolveClassName(String name) throws IOException{

        for(String prefix: packageImportList) {
            Class c;
            try {
                c = Class.forName(prefix+name,true, LogicalPlanBuilder.classloader);
                return c;
            }
            catch (ClassNotFoundException e) {
                // do nothing
            }
            catch (UnsupportedClassVersionError e) {
                int errCode = 1069;
                String msg = "Problem resolving class version numbers for class " + name;
                throw new ExecException(msg, errCode, PigException.INPUT, e) ;
            }
           
        }

        // create ClassNotFoundException exception and attach to IOException
        // so that we don't need to buble interface changes throughout the code
        int errCode = 1070;
        String msg = "Could not resolve " + name + " using imports: " + packageImportList;
        throw new ExecException(msg, errCode, PigException.INPUT);
    }
   
   
    @SuppressWarnings("unchecked")
    public static Object instantiateFuncFromSpec(FuncSpec funcSpec)  {
        Object ret;
        String className =funcSpec.getClassName();
        String[] args = funcSpec.getCtorArgs();
        Class objClass = null ;

        try {
            objClass = resolveClassName(className);
        }
        catch(IOException ioe) {
            throw new RuntimeException("Cannot instantiate:" + className, ioe) ;
        }

        try {
            // Do normal instantiation
            if (args != null && args.length > 0) {
                Class paramTypes[] = new Class[args.length];
                for (int i = 0; i < paramTypes.length; i++) {
                    paramTypes[i] = String.class;
                }
                Constructor c = objClass.getConstructor(paramTypes);
                ret =  c.newInstance((Object[])args);
            } else {
                ret = objClass.newInstance();
            }
        }
        catch(NoSuchMethodException nme) {
            // Second chance. Try with var arg constructor
            try {
                Constructor c = objClass.getConstructor(String[].class);
                Object[] wrappedArgs = new Object[1] ;
                wrappedArgs[0] = args ;
                ret =  c.newInstance(wrappedArgs);
            }
            catch(Throwable e){
                // bad luck
                StringBuilder sb = new StringBuilder();
                sb.append("could not instantiate '");
                sb.append(className);
                sb.append("' with arguments '");
                sb.append(args);
                sb.append("'");
                throw new RuntimeException(sb.toString(), e);
            }
        }
        catch(Throwable e){
            // bad luck
            StringBuilder sb = new StringBuilder();
            sb.append("could not instantiate '");
            sb.append(className);
            sb.append("' with arguments '");
            sb.append(args);
            sb.append("'");
            throw new RuntimeException(sb.toString(), e);
        }
        return ret;
    }
   
    public static Object instantiateFuncFromSpec(String funcSpec)  {
        return instantiateFuncFromSpec(new FuncSpec(funcSpec));
    }
   
   
    public Class getClassForAlias(String alias) throws IOException{
        String className = null;
        FuncSpec funcSpec = null;
        if (definedFunctions != null) {
            funcSpec = definedFunctions.get(alias);
        }
        if (funcSpec != null) {
            className = funcSpec.getClassName();
        }else{
            className = FuncSpec.getClassNameFromSpec(alias);
        }
        return resolveClassName(className);
    }
 
    public Object instantiateFuncFromAlias(String alias) throws IOException {
        FuncSpec funcSpec;
        if (definedFunctions != null && (funcSpec = definedFunctions.get(alias))!=null)
            return instantiateFuncFromSpec(funcSpec);
        else
            return instantiateFuncFromSpec(alias);
    }

    /**
     * Get the {@link StreamingCommand} for the given alias.
     *
     * @param alias the alias for the <code>StreamingCommand</code>
     * @return <code>StreamingCommand</code> for the alias
     */
    public StreamingCommand getCommandForAlias(String alias) {
        return definedCommands.get(alias);
    }
   
    public void setExecType(ExecType execType) {
        this.execType = execType;
    }
   
    /**
     * Create a new {@link ExecutableManager} depending on the ExecType.
     *
     * @return a new {@link ExecutableManager} depending on the ExecType
     * @throws ExecException
     */
    public ExecutableManager createExecutableManager() throws ExecException {
        ExecutableManager executableManager = null;

        switch (execType) {
            case LOCAL:
            {
                executableManager = new ExecutableManager();
            }
            break;
            case MAPREDUCE:
            {
                executableManager = new HadoopExecutableManager();
            }
            break;
            default:
            {
                int errCode = 2040;
                String msg = "Unkown exec type: " + execType;
                throw new ExecException(msg, errCode, PigException.BUG);
            }
        }
       
        return executableManager;
    }

    public FuncSpec getFuncSpecFromAlias(String alias) {
        FuncSpec funcSpec;
        if (definedFunctions != null && (funcSpec = definedFunctions.get(alias))!=null)
            return funcSpec;
        else
            return null;
    }

    /**
     * Add a path to be skipped while automatically shipping binaries for
     * streaming.
     * 
     * @param path path to be skipped
     */
    public void addPathToSkip(String path) {
        skippedShipPaths.add(path);
    }
   
    /**
     * Get paths which are to skipped while automatically shipping binaries for
     * streaming.
     *
     * @return paths which are to skipped while automatically shipping binaries
     *         for streaming
     */
    public List<String> getPathsToSkip() {
        return skippedShipPaths;
    }
   
    /**
     * Check the execution mode and return the appropriate error source
     *
     * @return error source
     */
    public byte getErrorSource() {
        if(execType == ExecType.LOCAL) {
            return PigException.USER_ENVIRONMENT;
        } else if (execType == ExecType.MAPREDUCE) {
            return PigException.REMOTE_ENVIRONMENT;
        } else {
            return PigException.BUG;
        }       
    }
   
    public static ArrayList<String> getPackageImportList()
    {
        return packageImportList;
    }
    public static void setPackageImportList(ArrayList<String> list)
    {
        packageImportList = list;
    }
    public void setLog4jProperties(Properties p)
    {
        log4jProperties = p;
    }
    public Properties getLog4jProperties()
    {
        return log4jProperties;
    }
    public Level getDefaultLogLevel()
    {
        return defaultLogLevel;
    }
    public void setDefaultLogLevel(Level l)
    {
        defaultLogLevel = l;
    }
}
TOP

Related Classes of org.apache.pig.impl.PigContext

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.