/***********************************************************************************************************************
* Copyright (C) 2010-2013 by the Stratosphere project (http://stratosphere.eu)
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
* an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
* specific language governing permissions and limitations under the License.
**********************************************************************************************************************/
package eu.stratosphere.yarn;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.Writer;
import java.nio.ByteBuffer;
import java.security.PrivilegedAction;
import java.util.Collections;
import java.util.HashMap;
import java.util.Map;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.DataOutputBuffer;
import org.apache.hadoop.security.Credentials;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.security.token.Token;
import org.apache.hadoop.security.token.TokenIdentifier;
import org.apache.hadoop.yarn.api.ApplicationConstants;
import org.apache.hadoop.yarn.api.ApplicationConstants.Environment;
import org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse;
import org.apache.hadoop.yarn.api.records.Container;
import org.apache.hadoop.yarn.api.records.ContainerLaunchContext;
import org.apache.hadoop.yarn.api.records.ContainerStatus;
import org.apache.hadoop.yarn.api.records.FinalApplicationStatus;
import org.apache.hadoop.yarn.api.records.LocalResource;
import org.apache.hadoop.yarn.api.records.Priority;
import org.apache.hadoop.yarn.api.records.Resource;
import org.apache.hadoop.yarn.client.api.AMRMClient;
import org.apache.hadoop.yarn.client.api.AMRMClient.ContainerRequest;
import org.apache.hadoop.yarn.client.api.NMClient;
import org.apache.hadoop.yarn.util.Records;
import com.google.common.base.Preconditions;
import eu.stratosphere.configuration.ConfigConstants;
import eu.stratosphere.configuration.GlobalConfiguration;
import eu.stratosphere.nephele.jobmanager.JobManager;
public class ApplicationMaster {
private static final Log LOG = LogFactory.getLog(ApplicationMaster.class);
private static final int HEAP_LIMIT_CAP = 500;
private void run() throws Exception {
//Utils.logFilesInCurrentDirectory(LOG);
// Initialize clients to ResourceManager and NodeManagers
Configuration conf = Utils.initializeYarnConfiguration();
FileSystem fs = FileSystem.get(conf);
Map<String, String> envs = System.getenv();
final String currDir = envs.get(Environment.PWD.key());
final String logDirs = envs.get(Environment.LOG_DIRS.key());
final String ownHostname = envs.get(Environment.NM_HOST.key());
final String appId = envs.get(Client.ENV_APP_ID);
final String clientHomeDir = envs.get(Client.ENV_CLIENT_HOME_DIR);
final String applicationMasterHost = envs.get(Environment.NM_HOST.key());
final String remoteStratosphereJarPath = envs.get(Client.STRATOSPHERE_JAR_PATH);
final String shipListString = envs.get(Client.ENV_CLIENT_SHIP_FILES);
final String yarnClientUsername = envs.get(Client.ENV_CLIENT_USERNAME);
final int taskManagerCount = Integer.valueOf(envs.get(Client.ENV_TM_COUNT));
final int memoryPerTaskManager = Integer.valueOf(envs.get(Client.ENV_TM_MEMORY));
final int coresPerTaskManager = Integer.valueOf(envs.get(Client.ENV_TM_CORES));
int heapLimit = (int)((float)memoryPerTaskManager*0.85);
if( (memoryPerTaskManager - heapLimit) > HEAP_LIMIT_CAP) {
heapLimit = memoryPerTaskManager-HEAP_LIMIT_CAP;
}
if(currDir == null) {
throw new RuntimeException("Current directory unknown");
}
if(ownHostname == null) {
throw new RuntimeException("Own hostname ("+Environment.NM_HOST+") not set.");
}
LOG.info("Working directory "+currDir);
// load Stratosphere configuration.
Utils.getStratosphereConfiguration(currDir);
final String localWebInterfaceDir = currDir+"/resources/"+ConfigConstants.DEFAULT_JOB_MANAGER_WEB_PATH_NAME;
// Update yaml conf -> set jobManager address to this machine's address.
FileInputStream fis = new FileInputStream(currDir+"/stratosphere-conf.yaml");
BufferedReader br = new BufferedReader(new InputStreamReader(fis));
Writer output = new BufferedWriter(new FileWriter(currDir+"/stratosphere-conf-modified.yaml"));
String line ;
while ( (line = br.readLine()) != null) {
if(line.contains(ConfigConstants.JOB_MANAGER_IPC_ADDRESS_KEY)) {
output.append(ConfigConstants.JOB_MANAGER_IPC_ADDRESS_KEY+": "+ownHostname+"\n");
} else if(line.contains(ConfigConstants.JOB_MANAGER_WEB_ROOT_PATH_KEY)) {
output.append(ConfigConstants.JOB_MANAGER_WEB_ROOT_PATH_KEY+": "+"\n");
} else {
output.append(line+"\n");
}
}
// just to make sure.
output.append(ConfigConstants.JOB_MANAGER_IPC_ADDRESS_KEY+": "+ownHostname+"\n");
output.append(ConfigConstants.JOB_MANAGER_WEB_ROOT_PATH_KEY+": "+localWebInterfaceDir+"\n");
output.append(ConfigConstants.JOB_MANAGER_WEB_LOG_PATH_KEY+": "+logDirs+"\n");
output.close();
br.close();
File newConf = new File(currDir+"/stratosphere-conf-modified.yaml");
if(!newConf.exists()) {
LOG.warn("modified yaml does not exist!");
}
Utils.copyJarContents("resources/"+ConfigConstants.DEFAULT_JOB_MANAGER_WEB_PATH_NAME,
ApplicationMaster.class.getProtectionDomain().getCodeSource().getLocation().getPath());
JobManager jm;
{
String pathToNepheleConfig = currDir+"/stratosphere-conf-modified.yaml";
String[] args = {"-executionMode","cluster", "-configDir", pathToNepheleConfig};
// start the job manager
jm = JobManager.initialize( args );
// Start info server for jobmanager
jm.startInfoServer();
}
AMRMClient<ContainerRequest> rmClient = AMRMClient.createAMRMClient();
rmClient.init(conf);
rmClient.start();
NMClient nmClient = NMClient.createNMClient();
nmClient.init(conf);
nmClient.start();
// Register with ResourceManager
LOG.info("registering ApplicationMaster");
rmClient.registerApplicationMaster(applicationMasterHost, 0, "http://"+applicationMasterHost+":"+GlobalConfiguration.getString(ConfigConstants.JOB_MANAGER_WEB_PORT_KEY, "undefined"));
// Priority for worker containers - priorities are intra-application
Priority priority = Records.newRecord(Priority.class);
priority.setPriority(0);
// Resource requirements for worker containers
Resource capability = Records.newRecord(Resource.class);
capability.setMemory(memoryPerTaskManager);
capability.setVirtualCores(coresPerTaskManager);
// Make container requests to ResourceManager
for (int i = 0; i < taskManagerCount; ++i) {
ContainerRequest containerAsk = new ContainerRequest(capability,
null, null, priority);
LOG.info("Requesting TaskManager container " + i);
rmClient.addContainerRequest(containerAsk);
}
LocalResource stratosphereJar = Records.newRecord(LocalResource.class);
LocalResource stratosphereConf = Records.newRecord(LocalResource.class);
// register Stratosphere Jar with remote HDFS
final Path remoteJarPath = new Path(remoteStratosphereJarPath);
Utils.registerLocalResource(fs, remoteJarPath, stratosphereJar);
// register conf with local fs.
Path remoteConfPath = Utils.setupLocalResource(conf, fs, appId, new Path("file://"+currDir+"/stratosphere-conf-modified.yaml"), stratosphereConf, new Path(clientHomeDir));
LOG.info("Prepared localresource for modified yaml: "+stratosphereConf);
boolean hasLog4j = new File(currDir+"/log4j.properties").exists();
// prepare the files to ship
LocalResource[] remoteShipRsc = null;
String[] remoteShipPaths = shipListString.split(",");
if(!shipListString.isEmpty()) {
remoteShipRsc = new LocalResource[remoteShipPaths.length];
{ // scope for i
int i = 0;
for(String remoteShipPathStr : remoteShipPaths) {
if(remoteShipPathStr == null || remoteShipPathStr.isEmpty()) {
continue;
}
remoteShipRsc[i] = Records.newRecord(LocalResource.class);
Path remoteShipPath = new Path(remoteShipPathStr);
Utils.registerLocalResource(fs, remoteShipPath, remoteShipRsc[i]);
i++;
}
}
}
// respect custom JVM options in the YAML file
final String javaOpts = GlobalConfiguration.getString(ConfigConstants.STRATOSPHERE_JVM_OPTIONS, "");
// Obtain allocated containers and launch
int allocatedContainers = 0;
int completedContainers = 0;
while (allocatedContainers < taskManagerCount) {
AllocateResponse response = rmClient.allocate(0);
for (Container container : response.getAllocatedContainers()) {
LOG.info("Got new Container for TM "+container.getId()+" on host "+container.getNodeId().getHost());
++allocatedContainers;
// Launch container by create ContainerLaunchContext
ContainerLaunchContext ctx = Records.newRecord(ContainerLaunchContext.class);
String tmCommand = "$JAVA_HOME/bin/java -Xmx"+heapLimit+"m " + javaOpts ;
if(hasLog4j) {
tmCommand += " -Dlog.file=\""+ApplicationConstants.LOG_DIR_EXPANSION_VAR +"/taskmanager-log4j.log\" -Dlog4j.configuration=file:log4j.properties";
}
tmCommand += " eu.stratosphere.yarn.YarnTaskManagerRunner -configDir . "
+ " 1>"
+ ApplicationConstants.LOG_DIR_EXPANSION_VAR
+ "/taskmanager-stdout.log"
+ " 2>"
+ ApplicationConstants.LOG_DIR_EXPANSION_VAR
+ "/taskmanager-stderr.log";
ctx.setCommands(Collections.singletonList(tmCommand));
LOG.info("Starting TM with command="+tmCommand);
// copy resources to the TaskManagers.
Map<String, LocalResource> localResources = new HashMap<String, LocalResource>(2);
localResources.put("stratosphere.jar", stratosphereJar);
localResources.put("stratosphere-conf.yaml", stratosphereConf);
// add ship resources
if(!shipListString.isEmpty()) {
Preconditions.checkNotNull(remoteShipRsc);
for( int i = 0; i < remoteShipPaths.length; i++) {
localResources.put(new Path(remoteShipPaths[i]).getName(), remoteShipRsc[i]);
}
}
ctx.setLocalResources(localResources);
// Setup CLASSPATH for Container (=TaskTracker)
Map<String, String> containerEnv = new HashMap<String, String>();
Utils.setupEnv(conf, containerEnv); //add stratosphere.jar to class path.
containerEnv.put(Client.ENV_CLIENT_USERNAME, yarnClientUsername);
ctx.setEnvironment(containerEnv);
UserGroupInformation user = UserGroupInformation.getCurrentUser();
try {
Credentials credentials = user.getCredentials();
DataOutputBuffer dob = new DataOutputBuffer();
credentials.writeTokenStorageToStream(dob);
ByteBuffer securityTokens = ByteBuffer.wrap(dob.getData(),
0, dob.getLength());
ctx.setTokens(securityTokens);
} catch (IOException e) {
LOG.warn("Getting current user info failed when trying to launch the container"
+ e.getMessage());
}
LOG.info("Launching container " + allocatedContainers);
nmClient.startContainer(container, ctx);
}
for (ContainerStatus status : response.getCompletedContainersStatuses()) {
++completedContainers;
LOG.info("Completed container (while allocating) "+status.getContainerId()+". Total Completed:" + completedContainers);
LOG.info("Diagnostics "+status.getDiagnostics());
}
Thread.sleep(100);
}
// Now wait for containers to complete
while (completedContainers < taskManagerCount) {
AllocateResponse response = rmClient.allocate(completedContainers
/ taskManagerCount);
for (ContainerStatus status : response.getCompletedContainersStatuses()) {
++completedContainers;
LOG.info("Completed container "+status.getContainerId()+". Total Completed:" + completedContainers);
LOG.info("Diagnostics "+status.getDiagnostics());
}
Thread.sleep(5000);
}
LOG.info("Shutting down JobManager");
jm.shutdown();
// Un-register with ResourceManager
rmClient.unregisterApplicationMaster(FinalApplicationStatus.SUCCEEDED, "", "");
}
public static void main(String[] args) throws Exception {
final String yarnClientUsername = System.getenv(Client.ENV_CLIENT_USERNAME);
LOG.info("YARN daemon runs as '"+UserGroupInformation.getCurrentUser().getShortUserName()+"' setting"
+ " user to execute Stratosphere ApplicationMaster/JobManager to '"+yarnClientUsername+"'");
UserGroupInformation ugi = UserGroupInformation.createRemoteUser(yarnClientUsername);
for(Token<? extends TokenIdentifier> toks : UserGroupInformation.getCurrentUser().getTokens()) {
ugi.addToken(toks);
}
ugi.doAs(new PrivilegedAction<Object>() {
@Override
public Object run() {
try {
new ApplicationMaster().run();
} catch (Exception e) {
e.printStackTrace();
}
return null;
}
});
}
}