/***********************************************************************************************************************
* Copyright (C) 2010-2013 by the Stratosphere project (http://stratosphere.eu)
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
* an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
* specific language governing permissions and limitations under the License.
**********************************************************************************************************************/
package eu.stratosphere.test.cancelling;
import static junit.framework.Assert.fail;
import java.util.Iterator;
import junit.framework.Assert;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.fs.FileSystem;
import org.apache.log4j.Level;
import org.junit.After;
import org.junit.Before;
import org.junit.BeforeClass;
import eu.stratosphere.api.common.Plan;
import eu.stratosphere.client.minicluster.NepheleMiniCluster;
import eu.stratosphere.compiler.DataStatistics;
import eu.stratosphere.compiler.PactCompiler;
import eu.stratosphere.compiler.plan.OptimizedPlan;
import eu.stratosphere.compiler.plantranslate.NepheleJobGraphGenerator;
import eu.stratosphere.nephele.client.AbstractJobResult;
import eu.stratosphere.nephele.client.JobCancelResult;
import eu.stratosphere.nephele.client.JobClient;
import eu.stratosphere.nephele.client.JobProgressResult;
import eu.stratosphere.nephele.client.JobSubmissionResult;
import eu.stratosphere.nephele.event.job.AbstractEvent;
import eu.stratosphere.nephele.event.job.JobEvent;
import eu.stratosphere.nephele.jobgraph.JobGraph;
import eu.stratosphere.nephele.jobgraph.JobStatus;
import eu.stratosphere.util.LogUtils;
import eu.stratosphere.util.StringUtils;
/**
*
*/
public abstract class CancellingTestBase {
private static final Log LOG = LogFactory.getLog(CancellingTestBase.class);
private static final int MINIMUM_HEAP_SIZE_MB = 192;
/**
* Defines the number of seconds after which an issued cancel request is expected to have taken effect (i.e. the job
* is canceled), starting from the point in time when the cancel request is issued.
*/
private static final int DEFAULT_CANCEL_FINISHED_INTERVAL = 10 * 1000;
// --------------------------------------------------------------------------------------------
protected NepheleMiniCluster executor;
// --------------------------------------------------------------------------------------------
private void verifyJvmOptions() {
final long heap = Runtime.getRuntime().maxMemory() >> 20;
Assert.assertTrue("Insufficient java heap space " + heap + "mb - set JVM option: -Xmx" + MINIMUM_HEAP_SIZE_MB
+ "m", heap > MINIMUM_HEAP_SIZE_MB - 50);
}
@BeforeClass
public static void initLogging() {
// suppress warnings because this test prints cancel warnings
LogUtils.initializeDefaultConsoleLogger(Level.ERROR);
}
@Before
public void startCluster() throws Exception {
verifyJvmOptions();
this.executor = new NepheleMiniCluster();
this.executor.setDefaultOverwriteFiles(true);
this.executor.start();
}
@After
public void stopCluster() throws Exception {
if (this.executor != null) {
this.executor.stop();
this.executor = null;
FileSystem.closeAll();
System.gc();
}
}
// --------------------------------------------------------------------------------------------
public void runAndCancelJob(Plan plan, int msecsTillCanceling) throws Exception {
runAndCancelJob(plan, msecsTillCanceling, DEFAULT_CANCEL_FINISHED_INTERVAL);
}
public void runAndCancelJob(Plan plan, int msecsTillCanceling, int maxTimeTillCanceled) throws Exception {
try {
// submit job
final JobGraph jobGraph = getJobGraph(plan);
final long startingTime = System.currentTimeMillis();
long cancelTime = -1L;
final JobClient client = this.executor.getJobClient(jobGraph);
final JobSubmissionResult submissionResult = client.submitJob();
if (submissionResult.getReturnCode() != AbstractJobResult.ReturnCode.SUCCESS) {
throw new IllegalStateException(submissionResult.getDescription());
}
final int interval = client.getRecommendedPollingInterval();
final long sleep = interval * 1000L;
Thread.sleep(sleep / 2);
long lastProcessedEventSequenceNumber = -1L;
while (true) {
if (Thread.interrupted()) {
throw new IllegalStateException("Job client has been interrupted");
}
final long now = System.currentTimeMillis();
if (cancelTime < 0L) {
// Cancel job
if (startingTime + msecsTillCanceling < now) {
LOG.info("Issuing cancel request");
final JobCancelResult jcr = client.cancelJob();
if (jcr == null) {
throw new IllegalStateException("Return value of cancelJob is null!");
}
if (jcr.getReturnCode() != AbstractJobResult.ReturnCode.SUCCESS) {
throw new IllegalStateException(jcr.getDescription());
}
// Save when the cancel request has been issued
cancelTime = now;
}
} else {
// Job has already been canceled
if (cancelTime + maxTimeTillCanceled < now) {
throw new IllegalStateException("Cancelling of job took " + (now - cancelTime)
+ " milliseconds, only " + maxTimeTillCanceled + " milliseconds are allowed");
}
}
final JobProgressResult jobProgressResult = client.getJobProgress();
if (jobProgressResult == null) {
throw new IllegalStateException("Returned job progress is unexpectedly null!");
}
if (jobProgressResult.getReturnCode() == AbstractJobResult.ReturnCode.ERROR) {
throw new IllegalStateException("Could not retrieve job progress: "
+ jobProgressResult.getDescription());
}
boolean exitLoop = false;
final Iterator<AbstractEvent> it = jobProgressResult.getEvents();
while (it.hasNext()) {
final AbstractEvent event = it.next();
// Did we already process that event?
if (lastProcessedEventSequenceNumber >= event.getSequenceNumber()) {
continue;
}
lastProcessedEventSequenceNumber = event.getSequenceNumber();
// Check if we can exit the loop
if (event instanceof JobEvent) {
final JobEvent jobEvent = (JobEvent) event;
final JobStatus jobStatus = jobEvent.getCurrentJobStatus();
switch (jobStatus) {
case FINISHED:
throw new IllegalStateException("Job finished successfully");
case FAILED:
throw new IllegalStateException("Job failed");
case CANCELED:
exitLoop = true;
break;
case SCHEDULED: // okay
case RUNNING:
break;
default:
throw new Exception("Bug: Unrecognized Job Status.");
}
}
if (exitLoop) {
break;
}
}
if (exitLoop) {
break;
}
Thread.sleep(sleep);
}
} catch (Exception e) {
LOG.error(e);
fail(StringUtils.stringifyException(e));
return;
}
}
private JobGraph getJobGraph(final Plan plan) throws Exception {
final PactCompiler pc = new PactCompiler(new DataStatistics());
final OptimizedPlan op = pc.compile(plan);
final NepheleJobGraphGenerator jgg = new NepheleJobGraphGenerator();
return jgg.compileJobGraph(op);
}
}