ActorRef monitor = system.actorOf(Props.create(Monitor.class, managerList, directorJobUuid, jobStatus), "ProgressMonitor" + directorJobUuid);
ActorRef collector = system.actorOf(Props.create(ResponseCollector.class, jobInfo), "ResultCollector" + directorJobUuid);
final FiniteDuration gatherResponseDuration = Duration.create(
3600, TimeUnit.SECONDS);
/**
* Gather Result.
*/
Future<Object> totResponse = Patterns.ask(collector, new gatherResponse(monitor, totJobNum), new Timeout(gatherResponseDuration));
BatchResponseFromManager responseFromCollecter = null;
try {
responseFromCollecter = (BatchResponseFromManager) Await.result(totResponse, gatherResponseDuration);
System.out.println("Gather Result Back! : " + responseFromCollecter.responseMap.size());
/**
* Slave Fail Over
*/
int failCount = 3;
while (responseFromCollecter.responseMap.size() < totJobNum && failCount >= 0) {
System.out.println("Response less than request, fail over @@");
failCount -- ;
Map<String, NodeData> failOverMap = gatherFailOverData(nodeDataMapValidSafe, responseFromCollecter);
List<Address> failOverNodeList = new ArrayList<Address>();
int failOverTot = failOverMap.size();
for (Address m : nodeList) {
if (ClusterState.memberLoad.containsKey(m)) {
failOverNodeList.add(m);
failOverTot -= 2000;
if (failOverTot < 0)
break;
}
}
List<ActorRef> failOverManagerList = new ArrayList<ActorRef>();
Queue<Integer> failOverJobQ = new ConcurrentLinkedQueue<Integer>();
if (localMode || failOverNodeList.size()==0) {
agentCommandManager = system.actorOf(
Props.create(LocalManager.class),"AgentCommandManager-" + UUID.randomUUID().toString()
);
failOverJobQ.offer(failOverManagerList.size());
failOverManagerList.add(agentCommandManager);
managerList.add(agentCommandManager);
localMode = true;
}
else {
for (Address m : failOverNodeList) {
agentCommandManager = system.actorOf(
Props.create(LocalManager.class).withDeploy(
new Deploy(
new RemoteScope(
m
)
)),
"AgentCommandManager-" + UUID.randomUUID().toString()
);
failOverJobQ.offer(failOverManagerList.size());
failOverManagerList.add(agentCommandManager);
managerList.add(agentCommandManager);
}
}
if (!localMode) {
List<Map<String, NodeData>> failOverJobGroupList = partDataStore(failOverMap, failOverNodeList.size()==0?
failOverMap.size():Math.min(failOverMap.size()/failOverNodeList.size()+1, 1000));
List<ActorRef> failOverDispatcherList = new ArrayList<ActorRef>();
int failOverRequestChunckSize = failOverJobGroupList.size()/failOverManagerList.size() + 1; // Last one do less
for (int i=0; i<Math.min(3, failOverManagerList.size()) ; i++) {
failOverDispatcherList.add(
system.actorOf(
Props.create(JobDispatcher.class, failOverManagerList, failOverJobGroupList, failOverJobQ, failOverRequestChunckSize,
nodeGroupType, agentCommandType, directorJobUuid, maxConcNum)
)
);
}
for (ActorRef failOverDispatcher : failOverDispatcherList) {
failOverDispatcher.tell("start dispatching", null);
}
} else {
Map<String, NodeGroupDataMap> failOverDataStore = new HashMap<String, NodeGroupDataMap>();
failOverDataStore.put(nodeGroupType, new NodeGroupDataMap(nodeGroupType));
failOverDataStore.get(nodeGroupType).setNodeDataMapValid(failOverMap);
Future<Object> ackFuture = Patterns.ask(failOverManagerList.get(0), new InitialRequestToManager(nodeGroupType,
agentCommandType, directorJobUuid, failOverDataStore, true, false, maxConcNum), new Timeout(Duration.create(
15, TimeUnit.SECONDS)));
try {
Await.result(ackFuture, Duration.create(
15, TimeUnit.SECONDS));
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
failOverManagerList.get(0).tell(new endOfRequest(), null);
}
jobInfo.state = State.processing;
Future<Object> futureFailOverResponse = Patterns.ask(collector, new gatherResponse(monitor, totJobNum), new Timeout(gatherResponseDuration));
BatchResponseFromManager failOverResponse = (BatchResponseFromManager) Await.result(futureFailOverResponse, gatherResponseDuration);
System.out.println("FailOver Result Size" + failOverResponse.responseMap.size());
for (Entry<String, GenericAgentResponse> e : failOverResponse.responseMap.entrySet()) {
responseFromCollecter.responseMap.put(e.getKey(), e.getValue());
}
}
for (Entry<String, GenericAgentResponse> e: responseFromCollecter.getResponseMap().entrySet()) {
AgentDataProviderHelper.getInstance()
.updateResponseFromAgentGenericResponse(nodeGroupType,
agentCommandType, e.getValue(), dataStore);
}
} catch (Exception e) {
System.out.println("Response Collector Timeout");
responseFromCollecter = new BatchResponseFromManager();
}
jobInfo.endTime = System.currentTimeMillis();
jobInfo.aggregationTime = (jobInfo.endTime - jobInfo.finishedNotAggregatedTime)/1000.0;
jobInfo.state = State.gathered;
System.out.println("Clear actors.");
system.stop(monitor);
system.stop(collector);
for (ActorRef m : managerList) {
system.stop(m);
}
if (localMode || nodeList.size() ==0 )
localLoad.addAndGet( -maxConcNum);
for (Address m : nodeList) {
if (ClusterState.memberLoad.containsKey(m))
ClusterState.memberLoad.get(m).addAndGet(-maxConcNum);
}
final FiniteDuration durationLogWorker = Duration.create(
VarUtils.TIMEOUT_ASK_LOGWORKER_SCONDS, TimeUnit.SECONDS);
ActorRef logWorker = ActorConfig.getActorSystem().actorOf(
Props.create(LogWorker.class,
nodeGroupType, agentCommandType, dataStore, directorJobUuid),