Package org.apache.pig.tools.pigstats

Source Code of org.apache.pig.tools.pigstats.JobStats

/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.pig.tools.pigstats;

import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Map;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.mapred.Counters;
import org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.FileBasedOutputSizeReader;
import org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigStatsOutputSizeReader;
import org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POStore;
import org.apache.pig.classification.InterfaceAudience;
import org.apache.pig.classification.InterfaceStability;
import org.apache.pig.impl.PigContext;
import org.apache.pig.impl.logicalLayer.FrontendException;
import org.apache.pig.newplan.Operator;
import org.apache.pig.newplan.PlanVisitor;
import org.apache.pig.tools.pigstats.PigStats.JobGraph;

/**
* This class encapsulates the runtime statistics of a MapReduce job.
* Job statistics is collected when job is completed.
*/
@InterfaceAudience.Public
@InterfaceStability.Evolving
public abstract class JobStats extends Operator {

    private static final Log LOG = LogFactory.getLog(JobStats.class);

    public static final String ALIAS = "JobStatistics:alias";
    public static final String ALIAS_LOCATION = "JobStatistics:alias_location";
    public static final String FEATURE = "JobStatistics:feature";

    public static final String SUCCESS_HEADER = null;
    public static final String FAILURE_HEADER = null;

    public static enum JobState { UNKNOWN, SUCCESS, FAILED; }

    protected JobState state = JobState.UNKNOWN;

    protected ArrayList<OutputStats> outputs;

    protected ArrayList<InputStats> inputs;

    protected Configuration conf;

    protected long hdfsBytesRead = 0;
    protected long hdfsBytesWritten = 0;

    private String errorMsg;

    private Exception exception = null;

    protected JobStats(String name, JobGraph plan) {
        super(name, plan);
        outputs = new ArrayList<OutputStats>();
        inputs = new ArrayList<InputStats>();
    }

    public abstract String getJobId();

    public void setConf(Configuration conf) {
        if (conf == null) {
            return;
        }
        this.conf = conf;
    }

    public JobState getState() { return state; }

    public boolean isSuccessful() { return (state == JobState.SUCCESS); }

    public void setSuccessful(boolean isSuccessful) {
        this.state = isSuccessful ? JobState.SUCCESS : JobState.FAILED;
    }

    public String getErrorMessage() { return errorMsg; }

    public Exception getException() { return exception; }

    public List<OutputStats> getOutputs() {
        return Collections.unmodifiableList(outputs);
    }

    public List<InputStats> getInputs() {
        return Collections.unmodifiableList(inputs);
    }

    public String getAlias() {
        return (String)getAnnotation(ALIAS);
    }

    public String getAliasLocation() {
        return (String)getAnnotation(ALIAS_LOCATION);
    }

    public String getFeature() {
        return (String)getAnnotation(FEATURE);
    }

    public long getHdfsBytesRead() {
        return hdfsBytesRead;
    }

    public long getHdfsBytesWritten() {
        return hdfsBytesWritten;
    }

    /**
     * Returns the total bytes written to user specified HDFS
     * locations of this job.
     */
    public long getBytesWritten() {
        long count = 0;
        for (OutputStats out : outputs) {
            long n = out.getBytes();
            if (n > 0) count += n;
        }
        return count;
    }

    /**
     * Returns the total number of records in user specified output
     * locations of this job.
     */
    public long getRecordWrittern() {
        long count = 0;
        for (OutputStats out : outputs) {
            long rec = out.getNumberRecords();
            if (rec > 0) count += rec;
        }
        return count;
    }

    @Override
    public abstract void accept(PlanVisitor v) throws FrontendException;


    @Override
    public boolean isEqual(Operator operator) {
        if (!(operator instanceof JobStats)) return false;
        return name.equalsIgnoreCase(operator.getName());
    }

    public void setErrorMsg(String errorMsg) {
        this.errorMsg = errorMsg;
    }

    public void setBackendException(Exception e) {
        exception = e;
    }

    public abstract String getDisplayString();


    /**
     * Calculate the median value from the given array
     * @param durations
     * @return median value
     */
    protected long calculateMedianValue(List<Long> durations) {
        long median;
        // figure out the median
        Collections.sort(durations);
        int midPoint = durations.size() /2;
        if ((durations.size() & 1) == 1) {
            // odd
            median = durations.get(midPoint);
        } else {
            // even
            median = (durations.get(midPoint-1) + durations.get(midPoint)) / 2;
        }
        return median;
    }

    public boolean isSampler() {
        return getFeature().contains(ScriptState.PIG_FEATURE.SAMPLER.name());
    }

    public boolean isIndexer() {
        return getFeature().contains(ScriptState.PIG_FEATURE.INDEXER.name());
    }

    /**
     * @deprecated If you are using mapreduce, please cast JobStats to org.apache.pig.tools.pigstats.mapreduce.MRJobStats,
     * then use {@link org.apache.pig.tools.pigstats.mapreduce.MRJobStats#getNumberMaps} instead.
     */
    @Deprecated
    abstract public int getNumberMaps();

    /**
     * @deprecated If you are using mapreduce, please cast JobStats to org.apache.pig.tools.pigstats.mapreduce.MRJobStats,
     * then use {@link org.apache.pig.tools.pigstats.mapreduce.MRJobStats#getNumberReduces} instead.
     */
    @Deprecated
    abstract public int getNumberReduces();

    /**
     * @deprecated If you are using mapreduce, please cast JobStats to org.apache.pig.tools.pigstats.mapreduce.MRJobStats,
     * then use {@link org.apache.pig.tools.pigstats.mapreduce.MRJobStats#getMaxMapTime} instead.
     */
    @Deprecated
    abstract public long getMaxMapTime();

    /**
     * @deprecated If you are using mapreduce, please cast JobStats to org.apache.pig.tools.pigstats.mapreduce.MRJobStats,
     * then use {@link org.apache.pig.tools.pigstats.mapreduce.MRJobStats#getMinMapTime} instead.
     */
    @Deprecated
    abstract public long getMinMapTime();

    /**
     * @deprecated If you are using mapreduce, please cast JobStats to org.apache.pig.tools.pigstats.mapreduce.MRJobStats,
     * then use {@link org.apache.pig.tools.pigstats.mapreduce.MRJobStats#getAvgMapTime} instead.
     */
    @Deprecated
    abstract public long getAvgMapTime();

    /**
     * @deprecated If you are using mapreduce, please cast JobStats to org.apache.pig.tools.pigstats.mapreduce.MRJobStats,
     * then use {@link org.apache.pig.tools.pigstats.mapreduce.MRJobStats#getMaxReduceTime} instead.
     */
    @Deprecated
    abstract public long getMaxReduceTime();

    /**
     * @deprecated If you are using mapreduce, please cast JobStats to org.apache.pig.tools.pigstats.mapreduce.MRJobStats,
     * then use {@link org.apache.pig.tools.pigstats.mapreduce.MRJobStats#getMinReduceTime} instead.
     */
    @Deprecated
    abstract public long getMinReduceTime();

    /**
     * @deprecated If you are using mapreduce, please cast JobStats to org.apache.pig.tools.pigstats.mapreduce.MRJobStats,
     * then use {@link org.apache.pig.tools.pigstats.mapreduce.MRJobStats#getAvgREduceTime} instead.
     */
    @Deprecated
    abstract public long getAvgREduceTime();

    /**
     * @deprecated If you are using mapreduce, please cast JobStats to org.apache.pig.tools.pigstats.mapreduce.MRJobStats,
     * then use {@link org.apache.pig.tools.pigstats.mapreduce.MRJobStats#getMapInputRecords} instead.
     */
    @Deprecated
    abstract public long getMapInputRecords();

    /**
     * @deprecated If you are using mapreduce, please cast JobStats to org.apache.pig.tools.pigstats.mapreduce.MRJobStats,
     * then use {@link org.apache.pig.tools.pigstats.mapreduce.MRJobStats#getMapOutputRecords} instead.
     */
    @Deprecated
    abstract public long getMapOutputRecords();

    /**
     * @deprecated If you are using mapreduce, please cast JobStats to org.apache.pig.tools.pigstats.mapreduce.MRJobStats,
     * then use {@link org.apache.pig.tools.pigstats.mapreduce.MRJobStats#getReduceInputRecords} instead.
     */
    @Deprecated
    abstract public long getReduceInputRecords();

    /**
     * @deprecated If you are using mapreduce, please cast JobStats to org.apache.pig.tools.pigstats.mapreduce.MRJobStats,
     * then use {@link org.apache.pig.tools.pigstats.mapreduce.MRJobStats#getReduceOutputRecords} instead.
     */
    @Deprecated
    abstract public long getReduceOutputRecords();

    /**
     * @deprecated If you are using mapreduce, please cast JobStats to org.apache.pig.tools.pigstats.mapreduce.MRJobStats,
     * then use {@link org.apache.pig.tools.pigstats.mapreduce.MRJobStats#getSMMSpillCount} instead.
     */
    @Deprecated
    abstract public long getSMMSpillCount();

    /**
     * @deprecated If you are using mapreduce, please cast JobStats to org.apache.pig.tools.pigstats.mapreduce.MRJobStats,
     * then use {@link org.apache.pig.tools.pigstats.mapreduce.MRJobStats#getProactiveSpillCountObjects} instead.
     */
    @Deprecated
    abstract public long getProactiveSpillCountObjects();

    /**
     * @deprecated If you are using mapreduce, please cast JobStats to org.apache.pig.tools.pigstats.mapreduce.MRJobStats,
     * then use {@link org.apache.pig.tools.pigstats.mapreduce.MRJobStats#getProactiveSpillCountRecs} instead.
     */
    @Deprecated
    abstract public long getProactiveSpillCountRecs();

    /**
     * @deprecated If you are using mapreduce, please cast JobStats to org.apache.pig.tools.pigstats.mapreduce.MRJobStats,
     * then use {@link org.apache.pig.tools.pigstats.mapreduce.MRJobStats#getHadoopCounters} instead.
     */
    @Deprecated
    abstract public Counters getHadoopCounters();

    /**
     * @deprecated If you are using mapreduce, please cast JobStats to org.apache.pig.tools.pigstats.mapreduce.MRJobStats,
     * then use {@link org.apache.pig.tools.pigstats.mapreduce.MRJobStats#getMultiStoreCounters} instead.
     */
    @Deprecated
    abstract public Map<String, Long> getMultiStoreCounters();

    /**
     * @deprecated If you are using mapreduce, please cast JobStats to org.apache.pig.tools.pigstats.mapreduce.MRJobStats,
     * then use {@link org.apache.pig.tools.pigstats.mapreduce.MRJobStats#getMultiInputCounters} instead.
     */
    @Deprecated
    abstract public Map<String, Long> getMultiInputCounters();

    /**
     * Looks up the output size reader from OUTPUT_SIZE_READER_KEY and invokes
     * it to get the size of output. If OUTPUT_SIZE_READER_KEY is not set,
     * defaults to FileBasedOutputSizeReader.
     * @param sto POStore
     * @param conf configuration
     */
    public static long getOutputSize(POStore sto, Configuration conf) {
        PigStatsOutputSizeReader reader = null;
        String readerNames = conf.get(
                PigStatsOutputSizeReader.OUTPUT_SIZE_READER_KEY,
                FileBasedOutputSizeReader.class.getCanonicalName());

        for (String className : readerNames.split(",")) {
            reader = (PigStatsOutputSizeReader) PigContext.instantiateFuncFromSpec(className);
            if (reader.supports(sto, conf)) {
                LOG.info("using output size reader: " + className);
                try {
                    return reader.getOutputSize(sto, conf);
                } catch (FileNotFoundException e) {
                    LOG.warn("unable to find the output file", e);
                    return -1;
                } catch (IOException e) {
                    LOG.warn("unable to get byte written of the job", e);
                    return -1;
                }
            }
        }

        LOG.warn("unable to find an output size reader");
        return -1;
    }

}
TOP

Related Classes of org.apache.pig.tools.pigstats.JobStats

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.