Package org.apache.airavata.gfac.core.monitor

Examples of org.apache.airavata.gfac.core.monitor.MonitorID


    private AuthenticationInfo authenticationInfo;


    public ResourceConnection(HostMonitorData hostMonitorData,AuthenticationInfo authInfo) throws SSHApiException {
        MonitorID monitorID = hostMonitorData.getMonitorIDs().get(0);
        try {
            GSISecurityContext securityContext = (GSISecurityContext) monitorID.getJobExecutionContext().getSecurityContext(GSISecurityContext.GSI_SECURITY_CONTEXT);
            if(securityContext != null) {
                cluster = (PBSCluster) securityContext.getPbsCluster();
            }else {
                SSHSecurityContext sshSecurityContext = (SSHSecurityContext) monitorID.getJobExecutionContext().getSecurityContext(SSHSecurityContext.SSH_SECURITY_CONTEXT);
                cluster = (PBSCluster)sshSecurityContext.getPbsCluster();
            }

            // we just use cluster configuration from the incoming request and construct a new cluster because for monitoring
            // we are using our own credentials and not using one users account to do everything.
View Full Code Here


            log.error("Error reading data from job ExecutionContext");
        }
    }

    public ResourceConnection(HostMonitorData hostMonitorData) throws SSHApiException {
        MonitorID monitorID = hostMonitorData.getMonitorIDs().get(0);
        try {
            GSISecurityContext securityContext = (GSISecurityContext) monitorID.getJobExecutionContext().getSecurityContext(GSISecurityContext.GSI_SECURITY_CONTEXT);
            cluster = (PBSCluster) securityContext.getPbsCluster();

            // we just use cluster configuration from the incoming request and construct a new cluster because for monitoring
            // we are using our own credentials and not using one users account to do everything.
            cluster = new PBSCluster(cluster.getServerInfo(), authenticationInfo, cluster.getJobManagerConfiguration());
View Full Code Here

    }

    public void invoke(JobExecutionContext jobExecutionContext) throws GFacHandlerException {
        super.invoke(jobExecutionContext);
        hpcPullMonitor.setGfac(jobExecutionContext.getGfac());
        MonitorID monitorID = new HPCMonitorID(getAuthenticationInfo(), jobExecutionContext);
        try {
            CommonUtils.addMonitortoQueue(hpcPullMonitor.getQueue(), monitorID);
        } catch (AiravataMonitorException e) {
            logger.error("Error adding monitorID object to the queue with experiment ", monitorID.getExperimentID());
        }
    }
View Full Code Here

        // at the tail of the queue
        //todo this polling will not work with multiple usernames but with single user
        // and multiple hosts, currently monitoring will work
        UserMonitorData take = null;
        JobStatusChangeRequest jobStatus = new JobStatusChangeRequest();
        MonitorID currentMonitorID = null;
        HostDescription currentHostDescription = null;
        try {
            take = this.queue.take();
            List<MonitorID> completedJobs = new ArrayList<MonitorID>();
            List<HostMonitorData> hostMonitorData = take.getHostMonitorData();
            for (HostMonitorData iHostMonitorData : hostMonitorData) {
                if (iHostMonitorData.getHost().getType() instanceof GsisshHostType
                        || iHostMonitorData.getHost().getType() instanceof SSHHostType) {
                    currentHostDescription = iHostMonitorData.getHost();
                    String hostName =  iHostMonitorData.getHost().getType().getHostAddress();
                    ResourceConnection connection = null;
                    if (connections.containsKey(hostName)) {
                        logger.debug("We already have this connection so not going to create one");
                        connection = connections.get(hostName);
                    } else {
                        connection = new ResourceConnection(iHostMonitorData,getAuthenticationInfo());
                        connections.put(hostName, connection);
                    }
                    List<MonitorID> monitorID = iHostMonitorData.getMonitorIDs();
                    Map<String, JobState> jobStatuses = connection.getJobStatuses(monitorID);
                    for (MonitorID iMonitorID : monitorID) {
                        currentMonitorID = iMonitorID;
                        iMonitorID.setStatus(jobStatuses.get(iMonitorID.getJobID()));    //IMPORTANT this is not a simple setter we have a logic
                        jobStatus = new JobStatusChangeRequest(iMonitorID);
                        // we have this JobStatus class to handle amqp monitoring

                        publisher.publish(jobStatus);
                        // if the job is completed we do not have to put the job to the queue again
                        iMonitorID.setLastMonitored(new Timestamp((new Date()).getTime()));

                        // After successful monitoring perform following actions to cleanup the queue, if necessary
                        if (jobStatus.getState().equals(JobState.COMPLETE)) {
                            completedJobs.add(iMonitorID);
                            try {
                                gfac.invokeOutFlowHandlers(iMonitorID.getJobExecutionContext());
                            } catch (GFacException e) {
                              publisher.publish(new TaskStatusChangeRequest(new TaskIdentity(iMonitorID.getExperimentID(), iMonitorID.getWorkflowNodeID(),
                    iMonitorID.getTaskID()), TaskState.FAILED));
                              publisher.publish(new ExperimentStatusChangeRequest(new ExperimentIdentity(iMonitorID.getExperimentID()),
                    ExperimentState.FAILED));
                                logger.info(e.getLocalizedMessage(), e);
                            }
                        } else if (iMonitorID.getFailedCount() > 2) {
                            logger.error("Tried to monitor the job with ID " + iMonitorID.getJobID() + " But failed 3 times, so skip this Job from Monitor");
                            iMonitorID.setLastMonitored(new Timestamp((new Date()).getTime()));
                            completedJobs.add(iMonitorID);
                            try {
                                logger.error("Launching outflow handlers to check output are genereated or not");
                                gfac.invokeOutFlowHandlers(iMonitorID.getJobExecutionContext());
                            } catch (GFacException e) {
                                publisher.publish(new TaskStatusChangeRequest(new TaskIdentity(iMonitorID.getExperimentID(), iMonitorID.getWorkflowNodeID(),
                                        iMonitorID.getTaskID()), TaskState.FAILED));
                                publisher.publish(new ExperimentStatusChangeRequest(new ExperimentIdentity(iMonitorID.getExperimentID()),
                                        ExperimentState.FAILED));
                                logger.info(e.getLocalizedMessage(), e);
                            }
                        } else {
                            // Evey
                            iMonitorID.setLastMonitored(new Timestamp((new Date()).getTime()));
                            // if the job is complete we remove it from the Map, if any of these maps
                            // get empty this userMonitorData will get delete from the queue
                        }
                    }
                } else {
                    logger.debug("Qstat Monitor doesn't handle non-gsissh hosts");
                }
            }
            // We have finished all the HostMonitorData object in userMonitorData, now we need to put it back
            // now the userMonitorData goes back to the tail of the queue
            queue.put(take);
            // cleaning up the completed jobs, this method will remove some of the userMonitorData from the queue if
            // they become empty
            for (MonitorID completedJob : completedJobs) {
                CommonUtils.removeMonitorFromQueue(queue, completedJob);
            }
        } catch (InterruptedException e) {
            if (!this.queue.contains(take)) {
                try {
                    this.queue.put(take);
                } catch (InterruptedException e1) {
                    e1.printStackTrace()//To change body of catch statement use File | Settings | File Templates.
                }
            }
            logger.error("Error handling the job with Job ID:" + currentMonitorID.getJobID());
            throw new AiravataMonitorException(e);
        } catch (SSHApiException e) {
            logger.error(e.getMessage());
            if (e.getMessage().contains("Unknown Job Id Error")) {
                // in this case job is finished or may be the given job ID is wrong
                jobStatus.setState(JobState.UNKNOWN);
                publisher.publish(jobStatus);
            } else if (e.getMessage().contains("illegally formed job identifier")) {
                logger.error("Wrong job ID is given so dropping the job from monitoring system");
            } else if (!this.queue.contains(take)) {   // we put the job back to the queue only if its state is not unknown
                if (currentMonitorID == null) {
                    logger.error("Monitoring the jobs failed, for user: " + take.getUserName()
                            + " in Host: " + currentHostDescription.getType().getHostAddress());
                } else {
                    if (currentMonitorID != null) {
                        if (currentMonitorID.getFailedCount() < 2) {
                            try {
                                currentMonitorID.setFailedCount(currentMonitorID.getFailedCount() + 1);
                                this.queue.put(take);
                            } catch (InterruptedException e1) {
                                e1.printStackTrace();
                            }
                        } else {
                            logger.error(e.getMessage());
                            logger.error("Tried to monitor the job 3 times, so dropping of the the Job with ID: " + currentMonitorID.getJobID());
                        }
                    }
                }
            }
            throw new AiravataMonitorException("Error retrieving the job status", e);
        } catch (Exception e) {
            if (currentMonitorID != null) {
                if (currentMonitorID.getFailedCount() < 3) {
                    try {
                        currentMonitorID.setFailedCount(currentMonitorID.getFailedCount() + 1);
                        this.queue.put(take);
                        // if we get a wrong status we wait for a while and request again
                        Thread.sleep(10000);
                    } catch (InterruptedException e1) {
                        e1.printStackTrace();
                    }
                } else {
                    logger.error(e.getMessage());
                    logger.error("Tryied to monitor the job 3 times, so dropping of the the Job with ID: " + currentMonitorID.getJobID());
                }
            }
            throw new AiravataMonitorException("Error retrieving the job status", e);
        }
View Full Code Here

        }
    }
    public static boolean isTheLastJobInQueue(BlockingQueue<MonitorID> queue,MonitorID monitorID){
        Iterator<MonitorID> iterator = queue.iterator();
        while(iterator.hasNext()){
            MonitorID next = iterator.next();
            if(monitorID.getUserName().equals(next.getUserName()) && CommonUtils.isEqual(monitorID.getHost(), next.getHost())){
                return false;
            }
        }
        return true;
    }
View Full Code Here

    public boolean submitJob(JobExecutionContext jobExecutionContext) throws GFacException {
        // We need to check whether this job is submitted as a part of a large workflow. If yes,
        // we need to setup workflow tracking listerner.
        try {
            int stateVal = GFacUtils.getZKExperimentStateValue(zk, jobExecutionContext);   // this is the original state came, if we query again it might be different,so we preserve this state in the environment
            monitorPublisher.publish(new GfacExperimentStateChangeRequest(new MonitorID(jobExecutionContext)
                    , GfacExperimentState.ACCEPTED));                  // immediately we get the request we update the status
            String workflowInstanceID = null;
            if ((workflowInstanceID = (String) jobExecutionContext.getProperty(Constants.PROP_WORKFLOW_INSTANCE_ID)) != null) {
                // This mean we need to register workflow tracking listener.
                //todo implement WorkflowTrackingListener properly
View Full Code Here

                log.info("ExperimentId: " + experimentID + " taskId: " + jobExecutionContext.getTaskData().getTaskID());
            }
        } catch (Exception e) {
            try {
                // we make the experiment as failed due to exception scenario
                monitorPublisher.publish(new GfacExperimentStateChangeRequest(new MonitorID(jobExecutionContext), GfacExperimentState.FAILED));
                monitorPublisher.publish(new
                        ExperimentStatusChangeRequest(new ExperimentIdentity(jobExecutionContext.getExperimentID()),
                        ExperimentState.FAILED));
                // Updating the task status if there's any task associated
                monitorPublisher.publish(new TaskStatusChangeRequest(
                        new TaskIdentity(jobExecutionContext.getExperimentID(),
                                jobExecutionContext.getWorkflowNodeDetails().getNodeInstanceId(),
                                jobExecutionContext.getTaskData().getTaskID()), TaskState.FAILED
                ));
                monitorPublisher.publish(new JobStatusChangeRequest(new MonitorID(jobExecutionContext),
                        new JobIdentity(jobExecutionContext.getExperimentID(),
                                jobExecutionContext.getWorkflowNodeDetails().getNodeInstanceId(),
                                jobExecutionContext.getTaskData().getTaskID(), jobExecutionContext.getJobDetails().getJobID()), JobState.FAILED
                ));
            } catch (NullPointerException e1) {
View Full Code Here

            // We get the provider instance and execute it.
            invokeProvider(jobExecutionContext);
        } catch (Exception e) {
            try {
                // we make the experiment as failed due to exception scenario
                monitorPublisher.publish(new GfacExperimentStateChangeRequest(new MonitorID(jobExecutionContext), GfacExperimentState.FAILED));
                monitorPublisher.publish(new
                        ExperimentStatusChangeRequest(new ExperimentIdentity(jobExecutionContext.getExperimentID()),
                        ExperimentState.FAILED));
                // Updating the task status if there's any task associated
                monitorPublisher.publish(new TaskStatusChangeRequest(
                        new TaskIdentity(jobExecutionContext.getExperimentID(),
                                jobExecutionContext.getWorkflowNodeDetails().getNodeInstanceId(),
                                jobExecutionContext.getTaskData().getTaskID()), TaskState.FAILED
                ));
                monitorPublisher.publish(new JobStatusChangeRequest(new MonitorID(jobExecutionContext),
                        new JobIdentity(jobExecutionContext.getExperimentID(),
                                jobExecutionContext.getWorkflowNodeDetails().getNodeInstanceId(),
                                jobExecutionContext.getTaskData().getTaskID(), jobExecutionContext.getJobDetails().getJobID()), JobState.FAILED
                ));
            } catch (NullPointerException e1) {
View Full Code Here

    }

    private void invokeProvider(JobExecutionContext jobExecutionContext) throws GFacException, ApplicationSettingsException, InterruptedException, KeeperException {
        GFacProvider provider = jobExecutionContext.getProvider();
        if (provider != null) {
            monitorPublisher.publish(new GfacExperimentStateChangeRequest(new MonitorID(jobExecutionContext), GfacExperimentState.PROVIDERINVOKING));
            GFacUtils.createPluginZnode(zk, jobExecutionContext, provider.getClass().getName());
            initProvider(provider, jobExecutionContext);
            executeProvider(provider, jobExecutionContext);
            disposeProvider(provider, jobExecutionContext);
            GFacUtils.updatePluginState(zk, jobExecutionContext, provider.getClass().getName(), GfacPluginState.COMPLETED);
            monitorPublisher.publish(new GfacExperimentStateChangeRequest(new MonitorID(jobExecutionContext), GfacExperimentState.PROVIDERINVOKED));
        }
        if (GFacUtils.isSynchronousMode(jobExecutionContext)) {
            invokeOutFlowHandlers(jobExecutionContext);
        }
    }
View Full Code Here

    }

    private void reInvokeProvider(JobExecutionContext jobExecutionContext) throws GFacException, GFacProviderException, ApplicationSettingsException, InterruptedException, KeeperException {
        GFacProvider provider = jobExecutionContext.getProvider();
        if (provider != null) {
            monitorPublisher.publish(new GfacExperimentStateChangeRequest(new MonitorID(jobExecutionContext), GfacExperimentState.PROVIDERINVOKING));
            String plState = GFacUtils.getPluginState(zk, jobExecutionContext, provider.getClass().getName());
            if (Integer.valueOf(plState) >= GfacPluginState.INVOKED.getValue()) {    // this will make sure if a plugin crashes it will not launch from the scratch, but plugins have to save their invoked state
                if (provider instanceof GFacRecoverableProvider) {
                    GFacUtils.createPluginZnode(zk, jobExecutionContext, provider.getClass().getName());
                    ((GFacRecoverableProvider) provider).recover(jobExecutionContext);
                    GFacUtils.updatePluginState(zk, jobExecutionContext, provider.getClass().getName(), GfacPluginState.COMPLETED);
                }
            } else {
                GFacUtils.createPluginZnode(zk, jobExecutionContext, provider.getClass().getName());
                initProvider(provider, jobExecutionContext);
                executeProvider(provider, jobExecutionContext);
                disposeProvider(provider, jobExecutionContext);
                GFacUtils.updatePluginState(zk, jobExecutionContext, provider.getClass().getName(), GfacPluginState.COMPLETED);
            }
            monitorPublisher.publish(new GfacExperimentStateChangeRequest(new MonitorID(jobExecutionContext), GfacExperimentState.PROVIDERINVOKED));
        }

        if (GFacUtils.isSynchronousMode(jobExecutionContext))

        {
View Full Code Here

TOP

Related Classes of org.apache.airavata.gfac.core.monitor.MonitorID

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.