Package com.opengamma.engine.calcnode

Source Code of com.opengamma.engine.calcnode.RemoteNodeJobInvoker

/**
* Copyright (C) 2009 - present by OpenGamma Inc. and the OpenGamma group of companies
*
* Please see distribution for license.
*/
package com.opengamma.engine.calcnode;

import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.LinkedList;
import java.util.List;
import java.util.Queue;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentMap;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicReference;

import org.fudgemsg.FudgeContext;
import org.fudgemsg.FudgeMsgEnvelope;
import org.fudgemsg.MutableFudgeMsg;
import org.fudgemsg.mapping.FudgeDeserializer;
import org.fudgemsg.mapping.FudgeSerializer;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.opengamma.OpenGammaRuntimeException;
import com.opengamma.engine.cache.AbstractIdentifierMap;
import com.opengamma.engine.cache.IdentifierMap;
import com.opengamma.engine.calcnode.msg.Cancel;
import com.opengamma.engine.calcnode.msg.Execute;
import com.opengamma.engine.calcnode.msg.Failure;
import com.opengamma.engine.calcnode.msg.Invocations;
import com.opengamma.engine.calcnode.msg.IsAlive;
import com.opengamma.engine.calcnode.msg.Ready;
import com.opengamma.engine.calcnode.msg.RemoteCalcNodeMessage;
import com.opengamma.engine.calcnode.msg.RemoteCalcNodeMessageVisitor;
import com.opengamma.engine.calcnode.msg.Result;
import com.opengamma.engine.calcnode.msg.Scaling;
import com.opengamma.engine.calcnode.stats.FunctionCosts;
import com.opengamma.engine.calcnode.stats.FunctionInvocationStatisticsReceiver;
import com.opengamma.engine.function.NoOpFunction;
import com.opengamma.engine.function.blacklist.FunctionBlacklistMaintainer;
import com.opengamma.engine.function.blacklist.FunctionBlacklistQuery;
import com.opengamma.engine.view.ExecutionLogMode;
import com.opengamma.transport.FudgeConnection;
import com.opengamma.transport.FudgeConnectionStateListener;
import com.opengamma.transport.FudgeMessageReceiver;
import com.opengamma.transport.FudgeMessageSender;

/**
* A JobInvoker for invoking a job on a remote node connected by a FudgeConnection.
*/
/* package */class RemoteNodeJobInvoker implements JobInvoker, FudgeMessageReceiver, FudgeConnectionStateListener {

  private static final Logger s_logger = LoggerFactory.getLogger(RemoteNodeJobInvoker.class);

  private static final class JobInfo {

    /**
     * The callback to receive notification of the job completion.
     */
    private final JobInvocationReceiver _receiver;

    /**
     * The calculation job.
     */
    private final CalculationJob _job;

    public JobInfo(final JobInvocationReceiver receiver, final CalculationJob job) {
      _receiver = receiver;
      _job = job;
    }

    public JobInvocationReceiver getReceiver() {
      return _receiver;
    }

    public int getLaunchDelta() {
      return (_job.getTail() != null) ? _job.getTail().size() - 1 : -1;
    }

    public CalculationJob getJob() {
      return _job;
    }

  }

  private final ConcurrentMap<CalculationJobSpecification, JobInfo> _pendingJobs = new ConcurrentHashMap<CalculationJobSpecification, JobInfo>();
  private final ExecutorService _executorService;
  private final FudgeMessageSender _fudgeMessageSender;
  private final CapabilitySet _capabilitySet = new CapabilitySet();
  private volatile int _capacity;
  private final AtomicInteger _launched = new AtomicInteger();
  private final AtomicReference<JobInvokerRegister> _dispatchCallback = new AtomicReference<JobInvokerRegister>();
  private final IdentifierMap _identifierMap;
  private final FunctionCosts _functionCosts;
  private final FunctionBlacklistQuery _blacklistQuery;
  private final FunctionBlacklistMaintainer _blacklistUpdate;
  private volatile String _invokerId;
  private final RemoteCalcNodeMessageVisitor _messageVisitor = new RemoteCalcNodeMessageVisitor() {

    @Override
    protected void visitUnexpectedMessage(final RemoteCalcNodeMessage message) {
      s_logger.warn("Unexpected message - {}", message);
    }

    @Override
    protected void visitFailureMessage(final Failure message) {
      s_logger.info("Received failure for job {}", message.getJob());
      if (message.getReady() != null) {
        message.getReady().accept(this);
      }
      // We decrement the count (and re-register) before processing the data as the remote node is already available if it's sent us its data.
      final JobInfo job = getPendingJobs().remove(message.getJob());
      if (job == null) {
        s_logger.warn("Duplicate or failure for cancelled callback {} received", message.getJob());
        return;
      }
      if (_launched.addAndGet(job.getLaunchDelta()) < _capacity) {
        // We check for below capacity. We can get "equal" here, but that means there is an invoke taking place which will be dealt with
        // by the notifyWhenAvailable that gets called to reschedule the invoker
        if (registerIfRequired(true)) {
          s_logger.debug("Notified dispatcher of capacity available");
        }
      }
      s_logger.debug("Failed job on {} with message {}", message.getComputeNodeId(), message.getErrorMessage());
      jobFailed(job, message.getComputeNodeId(), new OpenGammaRuntimeException(message.getErrorMessage()));
    }

    @Override
    protected void visitInvocationsMessage(final Invocations message) {
      s_logger.info("Received invocation statistics");
      final Scaling scaling = FunctionInvocationStatisticsReceiver.messageReceived(getFunctionCosts(), message);
      if (scaling != null) {
        s_logger.debug("Sending scaling message ", scaling);
        final MutableFudgeMsg scalingMessage = getFudgeMessageSender().getFudgeContext().newMessage();
        FudgeSerializer.addClassHeader(scalingMessage, scaling.getClass(), RemoteCalcNodeMessage.class);
        scaling.toFudgeMsg(new FudgeSerializer(getFudgeMessageSender().getFudgeContext()), scalingMessage);
        getFudgeMessageSender().send(scalingMessage);
      }
    }

    @Override
    protected void visitReadyMessage(final Ready message) {
      s_logger.debug("Remote invoker ready message - {}", message);
      getCapabilitySet().setParameterCapability(PlatformCapabilities.NODE_COUNT, message.getCapacity());
      // [ENG-42] this is where we'd detect any other capability changes
      _capacity = message.getCapacity();
      final int launched = _launched.get();
      if (launched < 0) {
        // An additional decrement can happen if there is an error in the original job dispatch
        _launched.incrementAndGet();
      } else if (launched < _capacity) {
        if (registerIfRequired(true)) {
          s_logger.info("Remote invoker ready for use by dispatcher, capacity {}", message.getCapacity());
        }
      } else {
        s_logger.info("Remote invoker over capacity {} with {} jobs", message.getCapacity(), launched);
      }
    }

    @Override
    protected void visitResultMessage(final Result message) {
      s_logger.info("Received result for job {}", message.getResult().getSpecification());
      if (message.getReady() != null) {
        message.getReady().accept(this);
      }
      // We decrement the count (and re-register) before processing the data as the remote node is already available if it's sent us its data.
      final JobInfo job = getPendingJobs().remove(message.getResult().getSpecification());
      if (job == null) {
        s_logger.warn("Duplicate or result for cancelled callback {} received", message.getResult().getSpecification());
        return;
      }
      if (_launched.addAndGet(job.getLaunchDelta()) < _capacity) {
        // We check for below capacity. We can get "equal" here, but that means there is an invoke taking place which will be dealt with
        // by the notifyWhenAvailable that gets called to reschedule the invoker
        if (registerIfRequired(true)) {
          s_logger.debug("Notified dispatcher of capacity available");
        }
      }
      final CalculationJobResult result = message.getResult();
      AbstractIdentifierMap.resolveIdentifiers(getIdentifierMap(), result);
      job.getReceiver().jobCompleted(result);
    }

  };

  public RemoteNodeJobInvoker(
      final ExecutorService executorService, final Ready initialMessage, final FudgeConnection fudgeConnection,
      final IdentifierMap identifierMap, final FunctionCosts functionCosts, final FunctionBlacklistQuery blacklistQuery,
      final FunctionBlacklistMaintainer blacklistUpdate) {
    _executorService = executorService;
    _fudgeMessageSender = fudgeConnection.getFudgeMessageSender();
    _identifierMap = identifierMap;
    _invokerId = initialMessage.getHostId();
    _functionCosts = functionCosts;
    _blacklistQuery = blacklistQuery;
    _blacklistUpdate = blacklistUpdate;
    fudgeConnection.setFudgeMessageReceiver(this);
    fudgeConnection.setConnectionStateListener(this);
    initialMessage.accept(_messageVisitor);
    s_logger.info("Remote node invoker created with capacity {}", _capacity);
  }

  private CapabilitySet getCapabilitySet() {
    return _capabilitySet;
  }

  protected void addCapabilities(final Collection<Capability> capabilities) {
    getCapabilitySet().addCapabilities(capabilities);
  }

  @Override
  public Collection<Capability> getCapabilities() {
    return getCapabilitySet().getCapabilities();
  }

  private ConcurrentMap<CalculationJobSpecification, JobInfo> getPendingJobs() {
    return _pendingJobs;
  }

  private FudgeMessageSender getFudgeMessageSender() {
    return _fudgeMessageSender;
  }

  private ExecutorService getExecutorService() {
    return _executorService;
  }

  private IdentifierMap getIdentifierMap() {
    return _identifierMap;
  }

  private FunctionCosts getFunctionCosts() {
    return _functionCosts;
  }

  private FunctionBlacklistQuery getBlacklistQuery() {
    return _blacklistQuery;
  }

  private FunctionBlacklistMaintainer getBlacklistUpdate() {
    return _blacklistUpdate;
  }

  protected void sendMessage(final RemoteCalcNodeMessage message) {
    final FudgeSerializer serializer = new FudgeSerializer(getFudgeMessageSender().getFudgeContext());
    getFudgeMessageSender().send(FudgeSerializer.addClassHeader(serializer.objectToFudgeMsg(message), message.getClass(), RemoteCalcNodeMessage.class));
  }

  private void jobFailed(final JobInvocationReceiver receiver, final CalculationJob job, final String nodeId, final Exception e) {
    receiver.jobFailed(this, nodeId, e);
    if (job.getTail() == null) {
      if (job.getRequiredJobIds() == null) {
        final Collection<CalculationJobItem> items = job.getJobItems();
        if (items.size() <= 1) {
          getBlacklistUpdate().failedJobItems(items);
        }
      }
    }
  }

  private void jobFailed(final JobInfo job, final String nodeId, final Exception e) {
    jobFailed(job.getReceiver(), job.getJob(), nodeId, e);
  }

  /**
   * Replaces any blacklisted job items with no-op functions. This keeps the shape of the job the same and may allow continuation of dependent jobs that can operate on missing inputs.
   */
  /* package */static CalculationJob blacklist(final FunctionBlacklistQuery query, final CalculationJob job) {
    if (query.isEmpty()) {
      return job;
    }
    final List<CalculationJobItem> originalItems = job.getJobItems();
    final int size = originalItems.size();
    for (int i = 0; i < size; i++) {
      CalculationJobItem item = originalItems.get(i);
      if (query.isBlacklisted(item)) {
        final List<CalculationJobItem> newItems = new ArrayList<CalculationJobItem>(size);
        for (int j = 0; j < i; j++) {
          newItems.add(originalItems.get(j));
        }
        newItems.add(new CalculationJobItem(
            NoOpFunction.UNIQUE_ID, item.getFunctionParameters(), item.getComputationTargetSpecification(),
            item.getInputIdentifiers(), item.getOutputIdentifiers(), ExecutionLogMode.INDICATORS));
        for (int j = i + 1; j < size; j++) {
          item = originalItems.get(i);
          if (query.isBlacklisted(item)) {
            newItems.add(new CalculationJobItem(
                NoOpFunction.UNIQUE_ID, item.getFunctionParameters(), item.getComputationTargetSpecification(),
                item.getInputIdentifiers(), item.getOutputIdentifiers(), ExecutionLogMode.INDICATORS));
          } else {
            newItems.add(item);
          }
        }
        return new CalculationJob(job.getSpecification(), job.getFunctionInitializationIdentifier(), job.getResolverVersionCorrection(), job.getRequiredJobIds(), newItems, job.getCacheSelectHint());
      }
    }
    return job;
  }

  @Override
  public boolean invoke(final CalculationJob rootJob, final JobInvocationReceiver receiver) {
    while (_launched.incrementAndGet() > _capacity) {
      if (_launched.decrementAndGet() >= _capacity) {
        s_logger.debug("Capacity reached");
        return false;
      }
    }
    s_logger.info("Dispatching job {}", rootJob.getSpecification());
    // Don't block the dispatcher with outgoing serialization and I/O
    getExecutorService().execute(new Runnable() {

      private void sendJob(final CalculationJob job) throws Exception {
        getPendingJobs().put(job.getSpecification(), new JobInfo(receiver, job));
        AbstractIdentifierMap.convertIdentifiers(getIdentifierMap(), job);
        sendMessage(new Execute(blacklist(getBlacklistQuery(), job)));
      }

      @Override
      public void run() {
        // Breadth first sending of jobs, just in case some can start before we've sent everything
        try {
          sendJob(rootJob);
          if (rootJob.getTail() != null) {
            final Queue<CalculationJob> jobs = new LinkedList<CalculationJob>(rootJob.getTail());
            CalculationJob job = jobs.poll();
            while (job != null) {
              sendJob(job);
              if (job.getTail() != null) {
                jobs.addAll(job.getTail());
              }
              job = jobs.poll();
            }
          }
        } catch (Exception e) {
          s_logger.warn("Error sending job {}", rootJob.getSpecification().getJobId());
          jobFailed(receiver, rootJob, "node on " + getInvokerId(), e);
          // Not knowing where the failure occurred, we may get an additional decrement if any of the jobs started completing. This may have
          // broken the whole connection which will not be a problem. Otherwise We'll check, and adjust, for this when "Ready" messages
          // arrive.
          if (_launched.decrementAndGet() < _capacity) {
            if (registerIfRequired(true)) {
              s_logger.debug("Notified dispatcher of capacity available");
            }
          }
        }
      }
    });
    return true;
  }

  @Override
  public void cancel(final Collection<CalculationJobSpecification> jobs) {
    s_logger.info("Cancelling {} jobs at {}", jobs.size(), getInvokerId());
    sendMessage(new Cancel(jobs));
  }

  @Override
  public void cancel(final CalculationJobSpecification job) {
    s_logger.info("Cancelling {} at {}", job, getInvokerId());
    sendMessage(new Cancel(Collections.singleton(job)));
  }

  /**
   * Returns true with the remote client generating failure messages if anything is not alive.
   */
  @Override
  public boolean isAlive(final Collection<CalculationJobSpecification> jobs) {
    s_logger.info("Querying {} jobs at {}", jobs.size(), getInvokerId());
    sendMessage(new IsAlive(jobs));
    return true;
  }

  @Override
  public boolean isAlive(final CalculationJobSpecification job) {
    s_logger.info("Querying {} at {}", job.getJobId(), getInvokerId());
    sendMessage(new IsAlive(Collections.singleton(job)));
    return true;
  }

  @Override
  public boolean notifyWhenAvailable(final JobInvokerRegister callback) {
    _dispatchCallback.set(callback);
    if (_launched.get() < _capacity) {
      if (registerIfRequired(false)) {
        s_logger.debug("Capacity available at notify");
        return true;
      }
    }
    return false;
  }

  private boolean registerIfRequired(final boolean invokeCallback) {
    final JobInvokerRegister callback = _dispatchCallback.getAndSet(null);
    if (callback != null) {
      if (invokeCallback) {
        callback.registerJobInvoker(this);
      }
      return true;
    } else {
      return false;
    }
  }

  @Override
  public void messageReceived(final FudgeContext fudgeContext, final FudgeMsgEnvelope msgEnvelope) {
    final FudgeDeserializer deserializer = new FudgeDeserializer(fudgeContext);
    final RemoteCalcNodeMessage message = deserializer.fudgeMsgToObject(RemoteCalcNodeMessage.class, msgEnvelope.getMessage());
    message.accept(_messageVisitor);
  }

  @Override
  public void connectionFailed(final FudgeConnection connection, final Exception cause) {
    s_logger.warn("Client connection {} dropped", connection, cause);
    _launched.addAndGet(_capacity); // Force over capacity to prevent any new submissions
    final String invokerId = _invokerId;
    _invokerId = null;
    for (CalculationJobSpecification jobSpec : getPendingJobs().keySet()) {
      final JobInfo job = getPendingJobs().remove(jobSpec);
      // There could still be late messages arriving from a buffer even though the connection has now failed
      if (job != null) {
        s_logger.debug("Cancelling pending operation {}", jobSpec);
        jobFailed(job, "node on " + invokerId, cause);
      }
    }
  }

  @Override
  public void connectionReset(final FudgeConnection connection) {
    s_logger.info("Connection reset by client");
    // We're the server end of a connection, so this isn't going to happen with the socket implementation
  }

  @Override
  public String toString() {
    return _fudgeMessageSender.toString();
  }

  @Override
  public String getInvokerId() {
    return _invokerId;
  }

}
TOP

Related Classes of com.opengamma.engine.calcnode.RemoteNodeJobInvoker

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.