Source Code of com.spotify.helios.agent.QueueingHistoryWriter

/*
 * Copyright (c) 2014 Spotify AB.
 *
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */


package com.spotify.helios.agent;


import com.google.common.base.Function;
import com.google.common.base.Supplier;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Iterables;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.common.util.concurrent.AbstractIdleService;
import com.google.common.util.concurrent.MoreExecutors;


import com.fasterxml.jackson.core.type.TypeReference;
import com.spotify.helios.common.descriptors.JobId;
import com.spotify.helios.common.descriptors.TaskStatus;
import com.spotify.helios.common.descriptors.TaskStatusEvent;
import com.spotify.helios.servicescommon.PersistentAtomicReference;
import com.spotify.helios.servicescommon.coordination.Paths;
import com.spotify.helios.servicescommon.coordination.ZooKeeperClient;


import org.apache.zookeeper.KeeperException;
import org.apache.zookeeper.KeeperException.ConnectionLossException;
import org.apache.zookeeper.KeeperException.NodeExistsException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;


import java.io.IOException;
import java.nio.channels.ClosedByInterruptException;
import java.nio.file.Path;
import java.util.Collections;
import java.util.Deque;
import java.util.List;
import java.util.concurrent.ConcurrentLinkedDeque;
import java.util.concurrent.ConcurrentMap;
import java.util.concurrent.Executors;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.ScheduledThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;


import static com.google.common.base.Preconditions.checkState;
import static java.util.concurrent.TimeUnit.SECONDS;


/**
 * Writes task history to ZK, and attempts to gracefully handle the case where ZK is down, and tries
 * to lose the right things if it has to lose stuff.
 *
 * Just some breadcrumbs so next time, the person that follows me can understand why things are
 * the way they are.
 *
 * Theory of operation:
 * 1. saveHistoryItem should never block for any significant amount of time.  Specifically, it
 *    should not block on ZK being in any particular state, and ideally not while a file write is
 *    occurring, as the file may get large if ZK has been away for a long time.
 * 2. We limit each job to max 30 events in memory (and in ZK for that matter)
 * 3. Maximum of 600 total events, so as not to consume all available memory.
 */
public class QueueingHistoryWriter extends AbstractIdleService implements Runnable {
  private static final Logger log = LoggerFactory.getLogger(QueueingHistoryWriter.class);


  public static final int MAX_NUMBER_STATUS_EVENTS_TO_RETAIN = 30;
  private static final int MAX_QUEUE_SIZE = 30;
  private static final int MAX_TOTAL_SIZE = 600;


  private final ConcurrentMap<JobId, Deque<TaskStatusEvent>> items;
  private final ScheduledExecutorService zkWriterExecutor =
      MoreExecutors.getExitingScheduledExecutorService(
          (ScheduledThreadPoolExecutor) Executors.newScheduledThreadPool(1), 0, SECONDS);
  private final String hostname;
  private final AtomicInteger count;
  private final ZooKeeperClient client;
  private final PersistentAtomicReference<ConcurrentMap<JobId, Deque<TaskStatusEvent>>>
      backingStore;


  public QueueingHistoryWriter(final String hostname, final ZooKeeperClient client,
                               final Path backingFile) throws IOException, InterruptedException {
    this.hostname = hostname;
    this.client = client;
    this.backingStore = PersistentAtomicReference.create(backingFile,
        new TypeReference<ConcurrentMap<JobId, Deque<TaskStatusEvent>>>(){},
        new Supplier<ConcurrentMap<JobId, Deque<TaskStatusEvent>>>() {
          @Override public ConcurrentMap<JobId, Deque<TaskStatusEvent>> get() {
            return Maps.newConcurrentMap();
          }
        });
    this.items = backingStore.get();


    // Clean out any errant null values.  Normally shouldn't have any, but we did have a few
    // where it happened, and this will make sure we can get out of a bad state if we get into it.
    final ImmutableSet<JobId> curKeys = ImmutableSet.copyOf(this.items.keySet());
    for (JobId key : curKeys) {
      if (this.items.get(key) == null) {
        this.items.remove(key);
      }
    }


    int itemCount = 0;
    for (Deque<TaskStatusEvent> deque : items.values()) {
      itemCount += deque.size();
    }
    this.count = new AtomicInteger(itemCount);
  }


  @Override
  protected void startUp() throws Exception {
    zkWriterExecutor.scheduleAtFixedRate(this, 1, 1, TimeUnit.SECONDS);
  }


  @Override
  protected void shutDown() throws Exception {
    zkWriterExecutor.shutdownNow();
    zkWriterExecutor.awaitTermination(1, TimeUnit.MINUTES);
  }


  private void add(TaskStatusEvent item) throws InterruptedException {
    // If too many "globally", toss them
    while (count.get() >= MAX_TOTAL_SIZE) {
      getNext();
    }


    final JobId key = item.getStatus().getJob().getId();
    final Deque<TaskStatusEvent> deque = getDeque(key);


    synchronized (deque) {
      // if too many in the particular deque, toss them
      while (deque.size() >= MAX_QUEUE_SIZE) {
        deque.remove();
        count.decrementAndGet();
      }
      deque.add(item);
      count.incrementAndGet();
    }


    try {
      backingStore.set(items);
    } catch (ClosedByInterruptException e) {
      log.debug("Writing task status event to backing store was interrupted");
    } catch (IOException e) { // We are best effort after all...
      log.warn("Failed to write task status event to backing store", e);
    }
  }


  private Deque<TaskStatusEvent> getDeque(final JobId key) {
    synchronized (items) {
      final Deque<TaskStatusEvent> deque = items.get(key);
      if (deque == null) {  // try more assertively to get a deque
        final ConcurrentLinkedDeque<TaskStatusEvent> newDeque =
            new ConcurrentLinkedDeque<TaskStatusEvent>();
        items.put(key, newDeque);
        return newDeque;
      }
      return deque;
    }
  }


  public void saveHistoryItem(final JobId jobId, final TaskStatus status)
      throws InterruptedException {
    saveHistoryItem(jobId, status, System.currentTimeMillis());
  }


  public void saveHistoryItem(final JobId jobId, final TaskStatus status, long timestamp)
      throws InterruptedException {
    add(new TaskStatusEvent(status, timestamp, hostname));
  }


  private TaskStatusEvent getNext() {
    // Some explanation: We first find the eldest event from amongst the queues (ok, they're
    // deques, but we really use it as a put back queue), and only then to we try to get
    // a lock on the relevant queue from whence we got the event.  Assuming that all worked
    // *and* that the event we have wasn't rolled off due to max-size limitations, we then
    // pull the item off the queue and return it.  We're basically doing optimistic concurrency,
    // and skewing things so that adding to this should be cheap.


    while (true) {
      final TaskStatusEvent current = findEldestEvent();


      // Didn't find anything that needed processing?
      if (current == null) {
        return null;
      }


      final JobId id = current.getStatus().getJob().getId();
      final Deque<TaskStatusEvent> deque = items.get(id);
      if (deque == null) {
        // shouldn't happen because we should be the only one pulling items off, but....
        continue;
      }


      synchronized (deque) {
        if (!deque.peek().equals(current)) {
          // item got rolled off, try again
          continue;
        }


        // Pull it off the queue and be paranoid.
        final TaskStatusEvent newCurrent = deque.poll();
        count.decrementAndGet();
        checkState(current.equals(newCurrent), "current should equal newCurrent");
        // Safe because this is the *only* place we hold these two locks at the same time.
        synchronized (items) {
          // Extra paranoia: curDeque should always == deque
          final Deque<TaskStatusEvent> curDeque = items.get(id);
          if (curDeque != null && curDeque.isEmpty()) {
            items.remove(id);
          }
        }
        return current;
      }
    }
  }


  public boolean isEmpty() {
    return count.get() == 0;
  }


  private void putBack(TaskStatusEvent event) {
    final JobId key = event.getStatus().getJob().getId();
    final Deque<TaskStatusEvent> queue = getDeque(key);
    synchronized (queue) {
      if (queue.size() >= MAX_QUEUE_SIZE) {
        // already full, just toss the event
        return;
      }
      queue.push(event);
      count.incrementAndGet();
    }
  }


  private TaskStatusEvent findEldestEvent() {
    // We don't lock anything because in the worst case, we just put things in out of order which
    // while not perfect, won't cause any actual harm.  Out of order meaning between jobids, not
    // within the same job id.  Whether this is the best strategy (as opposed to fullest deque)
    // is arguable.
    TaskStatusEvent current = null;
    for (Deque<TaskStatusEvent> queue : items.values()) {
      if (queue == null) {
        continue;
      }
      final TaskStatusEvent item = queue.peek();
      if (current == null || (item.getTimestamp() < current.getTimestamp())) {
        current = item;
      }
    }
    return current;
  }


  @Override
  public void run() {
    while (true) {
      final TaskStatusEvent item = getNext();
      if (item == null) {
        return;
      }


      try {
        final JobId jobId = item.getStatus().getJob().getId();
        final String historyPath = Paths.historyJobHostEventsTimestamp(
            jobId, hostname, item.getTimestamp());
        log.debug("writing queued item to zookeeper {} {}", item.getStatus().getJob().getId(),
            item.getTimestamp());
        client.ensurePath(historyPath, true);
        client.createAndSetData(historyPath, item.getStatus().toJsonBytes());


        // See if too many
        final List<String> events = client.getChildren(Paths.historyJobHostEvents(jobId, hostname));
        if (events.size() > MAX_NUMBER_STATUS_EVENTS_TO_RETAIN) {
          trimStatusEvents(events, jobId);
        }
      } catch (NodeExistsException e) {
        // Ahh, the two generals problem...  We handle by doing nothing since the thing
        // we wanted in, is in.
        log.debug("item we wanted in is already there");
      } catch (ConnectionLossException e) {
        log.warn("Connection lost while putting item into zookeeper, will retry");
        putBack(item);
        break;
      } catch (KeeperException e) {
        log.error("Error putting item into zookeeper, will retry", e);
        putBack(item);
        break;
      }
    }
  }


  private void trimStatusEvents(List<String> events, JobId jobId) {
    // CleanupExecutor only has one thread so can assume no others are fiddling as we do this.
    // All this to sort numerically instead of lexically....
    final List<Long> eventsAsLongs = Lists.newArrayList(Iterables.transform(events,
      new Function<String, Long>() {
      @Override
      public Long apply(String name) {
        return Long.valueOf(name);
      }
    }));
    Collections.sort(eventsAsLongs);


    for (int i = 0; i < (eventsAsLongs.size() - MAX_NUMBER_STATUS_EVENTS_TO_RETAIN); i++) {
      try {
        client.delete(Paths.historyJobHostEventsTimestamp(jobId, hostname, eventsAsLongs.get(i)));
      } catch (KeeperException e) {
        log.warn("failure deleting overflow of status items - we're hoping a later"
            + " execution will fix", e);
      }
    }
  }
}
Source Code of com.spotify.helios.agent.QueueingHistoryWriter

Related Classes of com.spotify.helios.agent.QueueingHistoryWriter