Package com.netflix.lipstick.pigtolipstick

Source Code of com.netflix.lipstick.pigtolipstick.BasicP2LClient

/**
* Copyright 2013 Netflix, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.netflix.lipstick.pigtolipstick;

import java.io.IOException;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Properties;
import java.util.Set;

import org.apache.commons.codec.binary.Base64;
import org.apache.commons.lang.StringUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.mapred.Counters;
import org.apache.hadoop.mapred.Counters.Counter;
import org.apache.hadoop.mapred.Counters.Group;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobID;
import org.apache.hadoop.mapred.JobInProgress;
import org.apache.hadoop.mapred.RunningJob;
import org.apache.hadoop.mapred.TaskReport;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.pig.ExecType;
import org.apache.pig.LipstickPigServer;
import org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MapReduceOper;
import org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.plans.MROperPlan;
import org.apache.pig.backend.hadoop.executionengine.physicalLayer.PhysicalOperator;
import org.apache.pig.impl.PigContext;
import org.apache.pig.newplan.Operator;
import org.apache.pig.tools.pigstats.JobStats;
import org.apache.pig.tools.pigstats.PigStats;
import org.apache.pig.tools.pigstats.ScriptState;

import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.common.collect.Sets;
import com.netflix.lipstick.MRPlanCalculator;
import com.netflix.lipstick.P2jPlanGenerator;
import com.netflix.lipstick.model.P2jCounters;
import com.netflix.lipstick.model.P2jWarning;
import com.netflix.lipstick.model.P2jJobStatus;
import com.netflix.lipstick.model.P2jPlanPackage;
import com.netflix.lipstick.model.P2jPlanStatus;
import com.netflix.lipstick.model.P2jPlanStatus.StatusText;
import com.netflix.lipstick.model.P2jSampleOutput;
import com.netflix.lipstick.model.P2jSampleOutputList;
import com.netflix.lipstick.warnings.JobWarnings;
import com.netflix.lipstick.pigstatus.PigStatusClient;
import com.netflix.lipstick.pigstatus.RestfulPigStatusClient;
import com.netflix.lipstick.util.OutputSampler;
import com.netflix.lipstick.util.OutputSampler.SampleOutput;

/**
* A basic implementation of P2LClient.
*
* @author nbates
*
*/
public class BasicP2LClient implements P2LClient {
    private static final Log LOG = LogFactory.getLog(BasicP2LClient.class);

    protected static final String JOB_NAME_PROP = "jobName";
    protected static final String ENABLE_SAMPLE_OUTPUT_PROP = "lipstick.enable.sampleoutput";

    protected boolean planFailed = false;
    protected String planId;
    protected P2jPlanGenerator unopPlanGenerator;
    protected P2jPlanGenerator opPlanGenerator;
    protected LipstickPigServer ps;
    protected PigContext context;
    protected final Set<String> runningJobIds = Sets.newHashSet();
    protected final Map<String, P2jJobStatus> jobIdToJobStatusMap = Maps.newHashMap();

    protected final PigStatusClient psClient;
    protected boolean invalidClient = false;
    protected boolean enableSampleOutput = true;

    /**
     * Instantiates a new BasicP2LClient using RestfulPigStatusClient with serviceUrls.
     *
     * @param serviceUrls One or more comma separated urls for connecting to a Lipstick Server
     */
    public BasicP2LClient(String serviceUrls) {
        this(new RestfulPigStatusClient(serviceUrls));
    }

    public BasicP2LClient(PigStatusClient client) {
        this.psClient = client;
    }

    @Override
    public void setPlanGenerators(P2jPlanGenerator unopPlanGenerator, P2jPlanGenerator opPlanGenerator) {
        this.unopPlanGenerator = unopPlanGenerator;
        this.opPlanGenerator = opPlanGenerator;
    }

    @Override
    public void setPigServer(LipstickPigServer ps) {
        this.ps = ps;
    }

    @Override
    public void setPigContext(PigContext context) {
        this.context = context;
    }

    @Override
    public void setPlanId(String planId) {
        this.planId = planId;
    }

    @Override
    public String getPlanId() {
        return planId;
    }

    @Override
    @SuppressWarnings("unused")
    public void createPlan(MROperPlan plan) {
        if (plan != null && unopPlanGenerator != null && opPlanGenerator != null && context != null) {
            Configuration conf = null;
            for (MapReduceOper job : plan) {
                if (conf == null) {
                    conf = new Configuration();
                    ScriptState.get().addSettingsToConf(job, conf);
                    break;
                }
            }
            try {
                Map<PhysicalOperator, Operator> p2lMap = Maps.newHashMap();
                Map<Operator, PhysicalOperator> l2pMap = context.getExecutionEngine().getLogToPhyMap();
                for (Entry<Operator, PhysicalOperator> i : l2pMap.entrySet()) {
                    p2lMap.put(i.getValue(), i.getKey());
                }

                String script = null;

                // suppress getting script from conf for now - do something smarter later
                if (conf != null && false) {
                    script = new String(Base64.decodeBase64(conf.get("pig.script")));
                }
                if ((script == null || script.length() == 0) && (ps != null)) {
                    script = StringUtils.join(ps.getScriptCache(), '\n');
                }

                MRPlanCalculator opPlan = new MRPlanCalculator(opPlanGenerator.getP2jPlan(), plan, p2lMap, opPlanGenerator.getReverseMap());
                MRPlanCalculator unopPlan = new MRPlanCalculator(unopPlanGenerator.getP2jPlan(), plan, p2lMap, unopPlanGenerator.getReverseMap());

                P2jPlanPackage plans = new P2jPlanPackage(opPlan.getP2jPlan(), unopPlan.getP2jPlan(), script, planId);

                Properties props = context.getProperties();
                plans.setUserName(UserGroupInformation.getCurrentUser().getUserName());
                if (props.containsKey(JOB_NAME_PROP)) {
                    plans.setJobName(props.getProperty(JOB_NAME_PROP));
                } else {
                    plans.setJobName("unknown");
                }

                if(props.containsKey(ENABLE_SAMPLE_OUTPUT_PROP)) {
                    String strProp = props.getProperty(ENABLE_SAMPLE_OUTPUT_PROP).toLowerCase();
                    if(strProp.equals("f") || strProp.equals("false")) {
                        enableSampleOutput = false;
                        LOG.warn("Sample Output has been disabled.");
                    }
                }

                plans.getStatus().setStartTime();
                plans.getStatus().setStatusText(StatusText.running);
                invalidClient = (psClient.savePlan(plans) == null);

            } catch (Exception e) {
                LOG.error("Caught unexpected exception generating json plan.", e);
                invalidClient = true;
            }
        } else {
            LOG.warn("Not saving plan, missing necessary objects to do so");
            invalidClient = true;
        }

        if(invalidClient) {
            LOG.error("Failed to properly create lipstick client and save plan.  Lipstick will be disabled.");
        }
    }

    @Override
    public void updateProgress(int progress) {
        if(invalidClient) {
            return;
        }

        P2jPlanStatus planStatus = new P2jPlanStatus();
        planStatus.setProgress(progress);

        // toArray() done to avoid concurrent access errors during iteration
        for (String jobId : runningJobIds.toArray(new String[0])) {
            updatePlanStatusForJobId(planStatus, jobId);
        }

        psClient.saveStatus(planId, planStatus);
    }

    @Override
    public void jobStarted(String jobId) {
        if(invalidClient) {
            return;
        }

        PigStats.JobGraph jobGraph = PigStats.get().getJobGraph();
        LOG.debug("jobStartedNotification - jobId " + jobId + ", jobGraph:\n" + jobGraph);

        // for each job in the graph, check if the stats for a job with this
        // name is found. If so, look up it's scope and bind the jobId to
        // the DAGNode with the same scope.
        for (JobStats jobStats : jobGraph) {
            if (jobId.equals(jobStats.getJobId())) {
                LOG.info("jobStartedNotification - scope " + jobStats.getName() + " is jobId " + jobId);
                P2jJobStatus jobStatus = new P2jJobStatus();
                jobStatus.setJobId(jobId);
                jobStatus.setStartTime(System.currentTimeMillis());
                jobStatus.setScope(jobStats.getName());
                jobIdToJobStatusMap.put(jobId, jobStatus);
                runningJobIds.add(jobId);
            }
        }
        P2jPlanStatus planStatus = new P2jPlanStatus();
        updatePlanStatusForJobId(planStatus, jobId);
        psClient.saveStatus(planId, planStatus);
    }

    @Override
    public void jobFinished(JobStats jobStats) {
        if(invalidClient) {
            return;
        }

        // Remove jobId from runningSet b/c it's now complete
        String jobId = jobStats.getJobId();
        if (!runningJobIds.remove(jobId)) {
            LOG.error("Internal Error.  Job finished with no record of running jobId: " + jobId);
        }

        // Update the status of this job
        P2jPlanStatus planStatus = new P2jPlanStatus();
        jobIdToJobStatusMap.get(jobId).setFinishTime(System.currentTimeMillis());
        if (context.getExecType() == ExecType.LOCAL) {
            jobIdToJobStatusMap.get(jobId).setMapProgress(1);
            jobIdToJobStatusMap.get(jobId).setReduceProgress(1);
        }
        jobIdToJobStatusMap.get(jobId).setBytesWritten(jobStats.getBytesWritten());
        jobIdToJobStatusMap.get(jobId).setRecordsWritten(jobStats.getRecordWrittern());

        updatePlanStatusForCompletedJobId(planStatus, jobId);

        /* Set the completed job warnings after calling updatePlanStatusForCompletedJobId() otherwise
           we end up overwriting the warning field with the running job warnings (which are included
           with the completed job warnings). */
        JobClient jobClient = PigStats.get().getJobClient();
        jobIdToJobStatusMap.get(jobId).setWarnings(getCompletedJobWarnings(jobClient, jobStats));

       

        psClient.saveStatus(planId, planStatus);

        if(enableSampleOutput) {
            // Get sample output for the job
            try {
                P2jSampleOutputList sampleOutputList = new P2jSampleOutputList();
                OutputSampler os = new OutputSampler(jobStats);
                // The 10 & 1024 params (maxRows and maxBytes)
                // should be configurable via properties
                for (SampleOutput schemaOutputPair : os.getSampleOutputs(10, 1024)) {
                    P2jSampleOutput sampleOutput = new P2jSampleOutput();
                    sampleOutput.setSchemaString(schemaOutputPair.getSchema());
                    sampleOutput.setSampleOutput(schemaOutputPair.getOutput());
                    sampleOutputList.add(sampleOutput);
                }
                psClient.saveSampleOutput(planId,
                                          jobIdToJobStatusMap.get(jobStats.getJobId()).getScope(),
                                          sampleOutputList);
            } catch (Exception e) {
                LOG.error("Unable to get sample output from job with id [" + jobStats.getJobId() + "]. ", e);
            }
        }
    }

    @Override
    public void jobFailed(JobStats jobStats) {
        if(invalidClient) {
            return;
        }

        planFailed = true;
    }

    @Override
    public void planCompleted() {
        if(invalidClient) {
            return;
        }

        if (planFailed) {
            planEndedWithStatusText(StatusText.failed);
        } else {
            planEndedWithStatusText(StatusText.finished);
        }
    }

    /**
     * Set the planStatus as ended with status statusText and saveStatus to the client.
     *
     * @param statusText job state at completition
     */
    protected void planEndedWithStatusText(StatusText statusText) {
        P2jPlanStatus planStatus = new P2jPlanStatus();
        planStatus.setEndTime();
        planStatus.setStatusText(statusText);
        psClient.saveStatus(planId, planStatus);
    }

    /**
     * Update planStatus with status for a map/reduce job.
     *
     * @param planStatus the P2jPlanStatus object to update
     * @param jobId the map/reduce job id
     */
    protected void updatePlanStatusForJobId(P2jPlanStatus planStatus, String jobId) {
        P2jJobStatus status = buildJobStatusMap(jobId);
        if (status != null) {
            planStatus.updateWith(status);
        }
    }

    protected void updatePlanStatusForCompletedJobId(P2jPlanStatus planStatus, String jobId) {
        LOG.info("Updating plan status for completed job " + jobId);
        updatePlanStatusForJobId(planStatus, jobId);
        JobClient jobClient = PigStats.get().getJobClient();
        JobID jobID = JobID.forName(jobId);
        long startTime = Long.MAX_VALUE;
        long finishTime = Long.MIN_VALUE;
        /* The JobClient doesn't expose a way to get the Start and Finish time
           of the over all job[1] sadly, so we're pulling out the min task start
           time and max task finish time and using these to approximate.

           [1] - Which is really dumb.  The data obviously exists, it gets rendered
           in the job tracker via the JobInProgress but sadly this is internal
           to the remote job tracker so we don't have access to this
           information. */
        try {
            if (!jobClient.getConf().getBoolean("pig.stats.notaskreport", false)) {
                List<TaskReport> reports = Lists.newArrayList();
                reports.addAll(Arrays.asList(jobClient.getMapTaskReports(jobID)));
                reports.addAll(Arrays.asList(jobClient.getReduceTaskReports(jobID)));
                for(TaskReport rpt : reports) {
                    /* rpt.getStartTime() sometimes returns zero meaning it does
                       not know what time it started so we need to prevent using
                       this or we'll lose the actual lowest start time */
                    long taskStartTime = rpt.getStartTime();
                    if (0 != taskStartTime) {
                        startTime = Math.min(startTime, taskStartTime);
                    }
                    finishTime = Math.max(finishTime, rpt.getFinishTime());
                }
                P2jJobStatus jobStatus = jobIdToJobStatusMap.get(jobId);
                if (startTime < Long.MAX_VALUE) {
                    jobStatus.setStartTime(startTime);
                }
                if (finishTime > Long.MIN_VALUE) {
                    jobStatus.setFinishTime(finishTime);
                }
                LOG.info("Determined start and finish times for job " + jobId);
            } else {
                LOG.info("Skipping determining start and finish times for job " + jobId);
            }
        } catch (IOException e) {
            LOG.error("Error getting job info.", e);
        }

    }

    /**
     * Build a P2jJobStatus object for the map/reduce job with id jobId.
     *
     * @param jobId the id of the map/reduce job
     * @return the newly created P2jJobStatus
     */
    @SuppressWarnings("deprecation")
    protected P2jJobStatus buildJobStatusMap(String jobId) {
        JobClient jobClient = PigStats.get().getJobClient();
        P2jJobStatus js = jobIdToJobStatusMap.get(jobId);

        try {
            RunningJob rj = jobClient.getJob(jobId);
            if (rj == null) {
                LOG.warn("Couldn't find job status for jobId=" + jobId);
                return js;
            }

            JobID jobID = rj.getID();
            Counters counters = rj.getCounters();
            js.setCounters(buildCountersMap(counters));
            js.setWarnings(getRunningJobWarnings(jobClient, jobID));

            js.setJobName(rj.getJobName());
            js.setTrackingUrl(rj.getTrackingURL());
            js.setIsComplete(rj.isComplete());
            js.setIsSuccessful(rj.isSuccessful());
            js.setMapProgress(rj.mapProgress());
            js.setReduceProgress(rj.reduceProgress());
            js.setTotalMappers((int)counters.findCounter(JobInProgress.Counter.TOTAL_LAUNCHED_MAPS).getCounter());
            js.setTotalReducers((int)counters.findCounter(JobInProgress.Counter.TOTAL_LAUNCHED_REDUCES).getCounter());
            return js;
        } catch (IOException e) {
            LOG.error("Error getting job info.", e);
        }

        return null;
    }

    public Map<String, P2jCounters> buildCountersMap(Counters counters) {
        Map<String, P2jCounters> cMap = Maps.newHashMap();
        for (Group g : counters) {
            P2jCounters countersObj = new P2jCounters();
            cMap.put(g.getDisplayName(), countersObj);
            for (Counter c : g) {
                countersObj.getCounters().put(c.getDisplayName(), c.getValue());
            }
        }
        return cMap;
    }

    public Map<String, P2jWarning> getCompletedJobWarnings(JobClient jobClient, JobStats jobStats) {
        if (context.getExecType() == ExecType.LOCAL) {
            Map<String, P2jWarning> warnings = Maps.newHashMap();
            return warnings;
        } else {
            JobWarnings jw = new JobWarnings();
            return jw.findCompletedJobWarnings(jobClient, jobStats);
        }
    }

    public Map<String, P2jWarning> getRunningJobWarnings(JobClient jobClient, JobID jobId) {
        if (context.getExecType() == ExecType.LOCAL) {
            Map<String, P2jWarning> warnings = Maps.newHashMap();
            return warnings;
        } else {
            JobWarnings jw = new JobWarnings();
            return jw.findRunningJobWarnings(jobClient, jobId.toString());
        }
    }
}
TOP

Related Classes of com.netflix.lipstick.pigtolipstick.BasicP2LClient

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.