Package com.netflix.lipstick.util

Source Code of com.netflix.lipstick.util.OutputSampler$SampleOutput

/**
* Copyright 2013 Netflix, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.netflix.lipstick.util;

import java.lang.Class;
import java.lang.reflect.Field;
import java.lang.IllegalAccessException;
import java.util.LinkedList;
import java.util.List;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.pig.LoadFunc;
import org.apache.pig.StoreFuncInterface;
import org.apache.pig.backend.hadoop.datastorage.ConfigurationUtil;
import org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POStore;
import org.apache.pig.builtin.PigStorage;
import org.apache.pig.data.Tuple;
import org.apache.pig.impl.PigContext;
import org.apache.pig.impl.io.InterStorage;
import org.apache.pig.impl.io.ReadToEndLoader;
import org.apache.pig.impl.io.TFileStorage;
import org.apache.pig.tools.pigstats.JobStats;
import org.apache.pig.tools.pigstats.ScriptState;

/**
* Output sampler of intermediate results of Pig jobs.
*
* @author nbates
*
*/
public class OutputSampler {
    public static class SampleOutput {
        private final String schema;
        private final String output;

        /**
         * Constructs a SampleOutput from the given schema/output strings.
         *
         * @param schema
         * @param output
         */
        public SampleOutput(String schema, String output) {
            this.schema = schema;
            this.output = output;
        }

        public String getSchema() {
            return schema;
        }

        public String getOutput() {
            return output;
        }
    }

    private static final Log LOG = LogFactory.getLog(OutputSampler.class);
    public static final String DELIMITER = "\001";
    private final JobStats jobStats;

    /**
     * Constructs an OutputSampler from a JobStats object.
     *
     * @param jobStats
     */
    public OutputSampler(JobStats jobStats) {
        this.jobStats = jobStats;
    }

    /**
     * Check if StoreFunc is safe to sample data from.
     *
     * @param store the StoreFunc
     * @return true if safe to sample, otherwise false
     */
    protected boolean storeFuncValidForReading(StoreFuncInterface store) {
        return store instanceof InterStorage || store instanceof TFileStorage || store.getClass() == PigStorage.class;
    }

    /**
     * Returns a list of sample outputs limited by the maxRowsPerEntry and
     * maxBytesPerEntry.
     *
     * @param maxRowsPerEntry
     * @param maxBytesPerEntry
     * @return
     */
    public List<SampleOutput> getSampleOutputs(int maxRowsPerEntry, int maxBytesPerEntry) {
        List<SampleOutput> sampleOutputs = new LinkedList<SampleOutput>();

        for (POStore storeInfo : getStoreInfo(jobStats)) {
            LOG.info("Sample output: " + storeInfo);
            LOG.info("StoreFunc: " + storeInfo.getStoreFunc().getClass());
            if (storeInfo != null && storeFuncValidForReading(storeInfo.getStoreFunc())) {
                String schema = (storeInfo.getSchema() == null) ? ("") : storeInfo.getSchema().toString();
                sampleOutputs.add(new SampleOutput(schema, getSampleRows(storeInfo, maxRowsPerEntry, maxBytesPerEntry)));
            }
        }
        return sampleOutputs;
    }

    /* For a given object retrieve the value of a named field, regardless
       of what class in the object's inheritance hierarchy the field was
       declared upon, and raises NoSuchFieldException if the field does
       not exist on any class in the hierarchy. */
    public Object getInheritedFieldValue(Object obj, String fieldName) throws IllegalAccessException, NoSuchFieldException {
        return getInheritedFieldValue(obj, obj.getClass(), fieldName);
    }

    protected Object getInheritedFieldValue(Object obj, Class cls, String fieldName) throws IllegalAccessException, NoSuchFieldException {
        try {
            Field f = cls.getDeclaredField(fieldName);
            f.setAccessible(true);
            return f.get(obj);
        } catch (NoSuchFieldException e) {
            Class souper = cls.getSuperclass();
            if (souper != null) {
                return getInheritedFieldValue(obj, souper, fieldName);
            } else {
                /* getSuperclass() returns null if we've gotten all the
                   way up to Object. At this point we've checked every class
                   in the heirarchy so the field must not exist. */
                throw new NoSuchFieldException(fieldName);
            }
        }
    }

    @SuppressWarnings("unchecked")
    private List<POStore> getStoreInfo(JobStats jobStats) {
        List<POStore> storeInfo = new LinkedList<POStore>();

        // Use reflection to get the store info for the jobStats
        // Done b/c the OutputStats from jobStats.getOutputs()
        // doesn't include intermediate (temp) outputs
        List<POStore> mapStores = null;
        List<POStore> reduceStores = null;
        try {
            mapStores = (LinkedList<POStore>) getInheritedFieldValue(jobStats, "mapStores");
        } catch (Exception e) {
            LOG.warn("Failed to get map store information for jobId [" + jobStats.getJobId() + "].", e);
        }

        try {
            reduceStores = (LinkedList<POStore>) getInheritedFieldValue(jobStats, "reduceStores");
        } catch (Exception e) {
            LOG.warn("Failed to get reduce store information for jobId [" + jobStats.getJobId() + "].", e);
        }

        if (mapStores != null) {
            storeInfo.addAll(mapStores);
        } else {
            LOG.info("No map store information for jobId [" + jobStats.getJobId() + "].");
        }

        if (reduceStores != null) {
            storeInfo.addAll(reduceStores);
        } else {
            LOG.info("No reduce store information for jobId [" + jobStats.getJobId() + "].");
        }

        return storeInfo;
    }

    private String getSampleRows(POStore store, int maxRows, int maxBytes) {

        // Load the proper amount of data
        StringBuilder sb = new StringBuilder();

        try {
            LoadFunc loader = getLoader(store);
            if (loader != null) {
                int rowCount = 0;
                Tuple t = loader.getNext();
                while (t != null && rowCount < maxRows) {
                    String strTuple = t.toDelimitedString(DELIMITER);

                    if (strTuple != null) {
                        if (sb.length() + strTuple.length() + DELIMITER.length() > maxBytes) {
                            break;
                        }

                        if (sb.length() > 0) {
                            sb.append('\n');
                        }

                        sb.append(strTuple);
                    }
                    rowCount++;
                    t = loader.getNext();
                }
            }
        } catch (Exception e) {
            String sampleDescription = (sb.length() > 0) ? "full" : "any";
            LOG.warn("Unable to get " + sampleDescription + " sample for: " + store.getSFile(), e);
        }

        return sb.toString();
    }

    protected LoadFunc getLoader(POStore store) {
        // Create a loader from the POStore
        // Sampled from JobStats class
        LoadFunc loader = null;
        PigContext pigContext = ScriptState.get().getPigContext();
        try {
            LoadFunc originalLoadFunc = (LoadFunc) PigContext.instantiateFuncFromSpec(store.getSFile().getFuncSpec());

            loader = new ReadToEndLoader(originalLoadFunc,
                                         ConfigurationUtil.toConfiguration(pigContext.getProperties()),
                                         store.getSFile().getFileName(),
                                         0);

        } catch (Exception e) {
            LOG.warn("Unable to get sample rows for: " + store.getSFile(), e);
        }

        return loader;
    }
}
TOP

Related Classes of com.netflix.lipstick.util.OutputSampler$SampleOutput

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.