Package org.apache.pig.impl.util

Source Code of org.apache.pig.impl.util.Utils

/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.pig.impl.util;

import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.SequenceInputStream;
import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.Properties;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapreduce.Job;
import org.apache.pig.FileInputLoadFunc;
import org.apache.pig.FuncSpec;
import org.apache.pig.LoadFunc;
import org.apache.pig.PigException;
import org.apache.pig.ResourceSchema;
import org.apache.pig.ResourceSchema.ResourceFieldSchema;
import org.apache.pig.backend.executionengine.ExecException;
import org.apache.pig.data.DataType;
import org.apache.pig.data.Tuple;
import org.apache.pig.impl.PigContext;
import org.apache.pig.impl.io.InterStorage;
import org.apache.pig.impl.io.ReadToEndLoader;
import org.apache.pig.impl.io.TFileStorage;
import org.apache.pig.impl.logicalLayer.schema.Schema;
import org.apache.pig.impl.logicalLayer.schema.Schema.FieldSchema;
import org.apache.pig.newplan.logical.relational.LogicalSchema;
import org.apache.pig.parser.ParserException;
import org.apache.pig.parser.QueryParserDriver;

import com.google.common.collect.Lists;

/**
* Class with utility static methods
*/
public class Utils {
    private static final Log log = LogFactory.getLog(Utils.class);
    /**
     * This method is a helper for classes to implement {@link java.lang.Object#equals(java.lang.Object)}
     * checks if two objects are equals - two levels of checks are
     * made - first if both are null or not null. If either is null,
     * check is made whether both are null.
     * If both are non null, equality also is checked if so indicated
     * @param obj1 first object to be compared
     * @param obj2 second object to be compared
     * @param checkEquality flag to indicate whether object equality should
     * be checked if obj1 and obj2 are non-null
     * @return true if the two objects are equal
     * false otherwise
     */
    public static boolean checkNullEquals(Object obj1, Object obj2, boolean checkEquality) {
        if(obj1 == null || obj2 == null) {
            return obj1 == obj2;
        }
        if(checkEquality) {
            if(!obj1.equals(obj2)) {
                return false;
            }
        }
        return true;
    }


    /**
     * This method is a helper for classes to implement {@link java.lang.Object#equals(java.lang.Object)}
     * The method checks whether the two arguments are both null or both not null and
     * whether they are of the same class
     * @param obj1 first object to compare
     * @param obj2 second object to compare
     * @return true if both objects are null or both are not null
     * and if both are of the same class if not null
     * false otherwise
     */
    public static boolean checkNullAndClass(Object obj1, Object obj2) {
        if(checkNullEquals(obj1, obj2, false)) {
            if(obj1 != null) {
                return obj1.getClass() == obj2.getClass();
            } else {
                return true; // both obj1 and obj2 should be null
            }
        } else {
            return false;
        }
    }

    /**
     * A helper function for retrieving the script schema set by the LOLoad
     * function.
     *
     * @param loadFuncSignature
     * @param conf
     * @return Schema
     * @throws IOException
     */
    public static Schema getScriptSchema(
            String loadFuncSignature,
            Configuration conf) throws IOException {
        Schema scriptSchema = null;
        String scriptField = conf.get(getScriptSchemaKey(loadFuncSignature));

        if (scriptField != null) {
            scriptSchema = (Schema) ObjectSerializer.deserialize(scriptField);
        }

        return scriptSchema;
    }

    public static String getScriptSchemaKey(String loadFuncSignature) {
      return loadFuncSignature + ".scriptSchema";
    }

    public static ResourceSchema getSchema(LoadFunc wrappedLoadFunc, String location, boolean checkExistence, Job job)
    throws IOException {
        Configuration conf = job.getConfiguration();
        if (checkExistence) {
            Path path = new Path(location);
            if (!FileSystem.get(conf).exists(path)) {
                // At compile time in batch mode, the file may not exist
                // (such as intermediate file). Just return null - the
                // same way as we would if we did not get a valid record
                return null;
            }
        }
        ReadToEndLoader loader = new ReadToEndLoader(wrappedLoadFunc, conf, location, 0);
        // get the first record from the input file
        // and figure out the schema from the data in
        // the first record
        Tuple t = loader.getNext();
        if (t == null) {
            // we couldn't get a valid record from the input
            return null;
        }
        int numFields = t.size();
        Schema s = new Schema();
        for (int i = 0; i < numFields; i++) {
            try {
                s.add(DataType.determineFieldSchema(t.get(i)));
            }
            catch (Exception e) {
                int errCode = 2104;
                String msg = "Error while determining schema of SequenceFileStorage data.";
                throw new ExecException(msg, errCode, PigException.BUG, e);
            }
        }
        return new ResourceSchema(s);
    }

    /**
     * @param schemaString a String representation of the Schema <b>without</b>
     *                     any enclosing curly-braces.<b>Not</b> for use with
     *                     <code>Schema#toString</code>
     * @return Schema instance
     * @throws ParserException
     */
    public static Schema getSchemaFromString(String schemaString) throws ParserException {
        LogicalSchema schema = parseSchema(schemaString);
        Schema result = org.apache.pig.newplan.logical.Util.translateSchema(schema);
        Schema.setSchemaDefaultType(result, DataType.BYTEARRAY);
        return result;
    }

    /**
     * getSchemaFromBagSchemaString
     * <b>NOTE: use this call</b> when you need to generate a Schema object
     * from the representation generated by <code>Schema#toString</code>.
     * This call strips the enclosing outer curly braces from the <code>toString</code>
     * representation, which are placed there because the actual representation of
     * the schema data is as a Bag-type relation.
     * @param schemaString a String representation of the Schema to instantiate,
     *                     in the form generated by <code>Schema.toString()</code>
     * @return Schema instance
     * @throws ParserException
     */
    public static Schema getSchemaFromBagSchemaString(String schemaString) throws ParserException {
        String unwrappedSchemaString = schemaString.substring(1, schemaString.length() - 1);
        return getSchemaFromString(unwrappedSchemaString);
    }

    public static LogicalSchema parseSchema(String schemaString) throws ParserException {
        QueryParserDriver queryParser = new QueryParserDriver( new PigContext(),
                "util", new HashMap<String, String>() ) ;
        LogicalSchema schema = queryParser.parseSchema(schemaString);
        return schema;
    }

    /**
     * This method adds FieldSchema of 'input source tag/path' as the first
     * field. This will be called only when PigStorage is invoked with
     * '-tagFile' or '-tagPath' option and the schema file is present to be
     * loaded.
     *
     * @param schema
     * @param fieldName
     * @return ResourceSchema
     */
    public static ResourceSchema getSchemaWithInputSourceTag(ResourceSchema schema, String fieldName) {
        ResourceFieldSchema[] fieldSchemas = schema.getFields();
        ResourceFieldSchema sourceTagSchema = new ResourceFieldSchema(new FieldSchema(fieldName, DataType.CHARARRAY));
        ResourceFieldSchema[] fieldSchemasWithSourceTag = new ResourceFieldSchema[fieldSchemas.length + 1];
        fieldSchemasWithSourceTag[0] = sourceTagSchema;
        for(int j = 0; j < fieldSchemas.length; j++) {
            fieldSchemasWithSourceTag[j + 1] = fieldSchemas[j];
        }
        return schema.setFields(fieldSchemasWithSourceTag);
    }

    public static String getTmpFileCompressorName(PigContext pigContext) {
        if (pigContext == null)
            return InterStorage.class.getName();
        boolean tmpFileCompression = pigContext.getProperties().getProperty("pig.tmpfilecompression", "false").equals("true");
        String codec = pigContext.getProperties().getProperty("pig.tmpfilecompression.codec", "");
        if (tmpFileCompression) {
            if (codec.equals("lzo"))
                pigContext.getProperties().setProperty("io.compression.codec.lzo.class", "com.hadoop.compression.lzo.LzoCodec");
            return TFileStorage.class.getName();
        } else
            return InterStorage.class.getName();
    }

    public static FileInputLoadFunc getTmpFileStorageObject(Configuration conf) throws IOException {
        boolean tmpFileCompression = conf.getBoolean("pig.tmpfilecompression", false);
        return tmpFileCompression ? new TFileStorage() : new InterStorage();
    }

    public static boolean tmpFileCompression(PigContext pigContext) {
        if (pigContext == null)
            return false;
        return pigContext.getProperties().getProperty("pig.tmpfilecompression", "false").equals("true");
    }

    public static String tmpFileCompressionCodec(PigContext pigContext) throws IOException {
        if (pigContext == null)
            return "";
        String codec = pigContext.getProperties().getProperty("pig.tmpfilecompression.codec", "");
        if (codec.equals("gz") || codec.equals("lzo"))
            return codec;
        else
            throw new IOException("Invalid temporary file compression codec ["+codec+"]. Expected compression codecs are gz and lzo");
    }

    public static String getStringFromArray(String[] arr) {
        StringBuilder str = new StringBuilder();
        for(String s: arr) {
            str.append(s);
            str.append(" ");
        }
        return str.toString();
    }

    public static FuncSpec buildSimpleFuncSpec(String className, byte...types) {
        List<Schema.FieldSchema> fieldSchemas = Lists.newArrayListWithExpectedSize(types.length);
        for (byte type : types) {
            fieldSchemas.add(new Schema.FieldSchema(null, type));
        }
        return new FuncSpec(className, new Schema(fieldSchemas));
    }

    /**
     * Replace sequences of two slashes ("\\") with one slash ("\")
     * (not escaping a slash in grunt is disallowed, but a double slash doesn't get converted
     * into a regular slash, so we have to do it instead)
     * @param str
     * @return the resulting string
     */
    public static String slashisize(String str) {
        return str.replace("\\\\", "\\");
    }
   
    @SuppressWarnings("unchecked")
    public static <O> Collection<O> mergeCollection(Collection<O> a, Collection<O> b) {
        if (a==null && b==null)
            return null;
        Collection<O> result = null;
        try {
            if (a!=null)
                result = a.getClass().newInstance();
            else
                result = b.getClass().newInstance();
        } catch (Exception e) {
            // Shall not happen
        }
        if (a==null) {
            result.addAll(b);
        }
        else if (b==null) {
            result.addAll(a);
        }
        else {
            result.addAll(a);
            for (O o : b) {
                if (!result.contains(o)) {
                    result.add(o);
                }
            }
        }
       
        return result;
    }
   
   public static InputStream getCompositeStream(InputStream in, Properties properties) {
       //Load default ~/.pigbootup if not specified by user
        final String bootupFile = properties.getProperty("pig.load.default.statements", System.getProperty("user.home") + "/.pigbootup");
        try {
        final InputStream inputSteam = new FileInputStream(new File(bootupFile));
        return new SequenceInputStream(inputSteam, in);
        } catch(FileNotFoundException fe) {
            log.info("Default bootup file " +bootupFile+ " not found");
            return in;
        }
    }

    /**
     * Returns the total number of bytes for this file, or if a file all files in the directory.
     */
    public static long getPathLength(FileSystem fs, FileStatus status) throws IOException {
        if (!status.isDir()) {
            return status.getLen();
        } else {
            FileStatus[] children = fs.listStatus(status.getPath());
            long size = 0;
            for (FileStatus child : children) {
                size += getPathLength(fs, child);
            }
            return size;
        }
    }

}
TOP

Related Classes of org.apache.pig.impl.util.Utils

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.