//load the cluster description from the cd argument
String sliderClusterDir = serviceArgs.getSliderClusterURI();
URI sliderClusterURI = new URI(sliderClusterDir);
Path clusterDirPath = new Path(sliderClusterURI);
SliderFileSystem fs = getClusterFS();
// build up information about the running application -this
// will be passed down to the cluster status
MapOperations appInformation = new MapOperations();
AggregateConf instanceDefinition =
InstanceIO.loadInstanceDefinitionUnresolved(fs, clusterDirPath);
instanceDefinition.setName(clustername);
log.info("Deploying cluster {}:", instanceDefinition);
stateForProviders.setApplicationName(clustername);
// triggers resolution and snapshotting in agent
appState.updateInstanceDefinition(instanceDefinition);
File confDir = getLocalConfDir();
if (!confDir.exists() || !confDir.isDirectory()) {
log.info("Conf dir {} does not exist.", confDir);
File parentFile = confDir.getParentFile();
log.info("Parent dir {}:\n{}", parentFile, SliderUtils.listDir(parentFile));
}
Configuration serviceConf = getConfig();
// Try to get the proper filtering of static resources through the yarn proxy working
serviceConf.set(HADOOP_HTTP_FILTER_INITIALIZERS,
SliderAmFilterInitializer.NAME);
serviceConf.set(SliderAmIpFilter.WS_CONTEXT_ROOT, WS_CONTEXT_ROOT + "|" + WS_AGENT_CONTEXT_ROOT);
//get our provider
MapOperations globalInternalOptions =
instanceDefinition.getInternalOperations().getGlobalOptions();
String providerType = globalInternalOptions.getMandatoryOption(
OptionKeys.INTERNAL_PROVIDER_NAME);
log.info("Cluster provider type is {}", providerType);
SliderProviderFactory factory =
SliderProviderFactory.createSliderProviderFactory(
providerType);
providerService = factory.createServerProvider();
// init the provider BUT DO NOT START IT YET
initAndAddService(providerService);
// create a slider AM provider
sliderAMProvider = new SliderAMProviderService();
initAndAddService(sliderAMProvider);
InetSocketAddress address = SliderUtils.getRmSchedulerAddress(serviceConf);
log.info("RM is at {}", address);
yarnRPC = YarnRPC.create(serviceConf);
/*
* Extract the container ID. This is then
* turned into an (incompete) container
*/
appMasterContainerID = ConverterUtils.toContainerId(
SliderUtils.mandatoryEnvVariable(
ApplicationConstants.Environment.CONTAINER_ID.name())
);
appAttemptID = appMasterContainerID.getApplicationAttemptId();
ApplicationId appid = appAttemptID.getApplicationId();
log.info("AM for ID {}", appid.getId());
appInformation.put(StatusKeys.INFO_AM_CONTAINER_ID,
appMasterContainerID.toString());
appInformation.put(StatusKeys.INFO_AM_APP_ID,
appid.toString());
appInformation.put(StatusKeys.INFO_AM_ATTEMPT_ID,
appAttemptID.toString());
UserGroupInformation currentUser = UserGroupInformation.getCurrentUser();
Credentials credentials =
currentUser.getCredentials();
DataOutputBuffer dob = new DataOutputBuffer();
credentials.writeTokenStorageToStream(dob);
dob.close();
// Now remove the AM->RM token so that containers cannot access it.
Iterator<Token<?>> iter = credentials.getAllTokens().iterator();
while (iter.hasNext()) {
Token<?> token = iter.next();
log.info("Token {}", token.getKind());
if (token.getKind().equals(AMRMTokenIdentifier.KIND_NAME)) {
iter.remove();
}
}
allTokens = ByteBuffer.wrap(dob.getData(), 0, dob.getLength());
// set up secret manager
secretManager = new ClientToAMTokenSecretManager(appAttemptID, null);
// if not a secure cluster, extract the username -it will be
// propagated to workers
if (!UserGroupInformation.isSecurityEnabled()) {
hadoop_user_name = System.getenv(HADOOP_USER_NAME);
service_user_name = hadoop_user_name;
log.info(HADOOP_USER_NAME + "='{}'", hadoop_user_name);
} else {
service_user_name = UserGroupInformation.getCurrentUser().getUserName();
}
Map<String, String> envVars;
/**
* It is critical this section is synchronized, to stop async AM events
* arriving while registering a restarting AM.
*/
synchronized (appState) {
int heartbeatInterval = HEARTBEAT_INTERVAL;
//add the RM client -this brings the callbacks in
asyncRMClient = AMRMClientAsync.createAMRMClientAsync(heartbeatInterval,
this);
addService(asyncRMClient);
//wrap it for the app state model
rmOperationHandler = new AsyncRMOperationHandler(asyncRMClient);
//now bring it up
deployChildService(asyncRMClient);
//nmclient relays callbacks back to this class
nmClientAsync = new NMClientAsyncImpl("nmclient", this);
deployChildService(nmClientAsync);
//bring up the Slider RPC service
startSliderRPCServer();
rpcServiceAddress = rpcService.getConnectAddress();
appMasterHostname = rpcServiceAddress.getHostName();
appMasterRpcPort = rpcServiceAddress.getPort();
appMasterTrackingUrl = null;
log.info("AM Server is listening at {}:{}", appMasterHostname,
appMasterRpcPort);
appInformation.put(StatusKeys.INFO_AM_HOSTNAME, appMasterHostname);
appInformation.set(StatusKeys.INFO_AM_RPC_PORT, appMasterRpcPort);
//registry
registry = startRegistrationService();
//build the role map
List<ProviderRole> providerRoles =
new ArrayList<>(providerService.getRoles());
providerRoles.addAll(SliderAMClientProvider.ROLES);
// Start up the WebApp and track the URL for it
certificateManager = new CertificateManager();
certificateManager.initRootCert(
instanceDefinition.getAppConfOperations()
.getComponent(SliderKeys.COMPONENT_AM));
startAgentWebApp(appInformation, serviceConf);
webApp = new SliderAMWebApp(registry);
WebApps.$for(SliderAMWebApp.BASE_PATH, WebAppApi.class,
new WebAppApiImpl(this,
stateForProviders,
providerService,
certificateManager),
RestPaths.WS_CONTEXT)
.with(serviceConf)
.start(webApp);
appMasterTrackingUrl = "http://" + appMasterHostname + ":" + webApp.port();
WebAppService<SliderAMWebApp> webAppService =
new WebAppService<>("slider", webApp);
webAppService.init(serviceConf);
webAppService.start();
addService(webAppService);
appInformation.put(StatusKeys.INFO_AM_WEB_URL, appMasterTrackingUrl + "/");
appInformation.set(StatusKeys.INFO_AM_WEB_PORT, webApp.port());
// Register self with ResourceManager
// This will start heartbeating to the RM
// address = SliderUtils.getRmSchedulerAddress(asyncRMClient.getConfig());
log.info("Connecting to RM at {},address tracking URL={}",
appMasterRpcPort, appMasterTrackingUrl);
RegisterApplicationMasterResponse response = asyncRMClient
.registerApplicationMaster(appMasterHostname,
appMasterRpcPort,
appMasterTrackingUrl);
Resource maxResources =
response.getMaximumResourceCapability();
containerMaxMemory = maxResources.getMemory();
containerMaxCores = maxResources.getVirtualCores();
appState.setContainerLimits(maxResources.getMemory(),
maxResources.getVirtualCores());
// set the RM-defined maximum cluster values
appInformation.put(ResourceKeys.YARN_CORES, Integer.toString(containerMaxCores));
appInformation.put(ResourceKeys.YARN_MEMORY, Integer.toString(containerMaxMemory));
boolean securityEnabled = UserGroupInformation.isSecurityEnabled();
if (securityEnabled) {
secretManager.setMasterKey(
response.getClientToAMTokenMasterKey().array());
applicationACLs = response.getApplicationACLs();
//tell the server what the ACLs are
rpcService.getServer().refreshServiceAcl(serviceConf,
new SliderAMPolicyProvider());
}
// extract container list
List<Container> liveContainers =
response.getContainersFromPreviousAttempts();
//now validate the installation
Configuration providerConf =
providerService.loadProviderConfigurationInformation(confDir);
providerService.validateApplicationConfiguration(instanceDefinition,
confDir,
securityEnabled);
//determine the location for the role history data
Path historyDir = new Path(clusterDirPath, HISTORY_DIR_NAME);
//build the instance
appState.buildInstance(instanceDefinition,
providerConf,
providerRoles,
fs.getFileSystem(),
historyDir,
liveContainers,
appInformation);
// add the AM to the list of nodes in the cluster
appState.buildAppMasterNode(appMasterContainerID,
appMasterHostname,
webApp.port(),
appMasterHostname + ":" + webApp.port());
// build up environment variables that the AM wants set in every container
// irrespective of provider and role.
envVars = new HashMap<>();
if (hadoop_user_name != null) {
envVars.put(HADOOP_USER_NAME, hadoop_user_name);
}
}
String rolesTmpSubdir = appMasterContainerID.toString() + "/roles";
String amTmpDir = globalInternalOptions.getMandatoryOption(OptionKeys.INTERNAL_AM_TMP_DIR);
Path tmpDirPath = new Path(amTmpDir);
Path launcherTmpDirPath = new Path(tmpDirPath, rolesTmpSubdir);
fs.getFileSystem().mkdirs(launcherTmpDirPath);
//launcher service
launchService = new RoleLaunchService(this,
providerService,
fs,