Package com.google.appengine.tools.mapreduce

Source Code of com.google.appengine.tools.mapreduce.MapReduceJob$FinalCleanupJob

// Copyright 2011 Google Inc. All Rights Reserved.

package com.google.appengine.tools.mapreduce;

import static com.google.common.base.Preconditions.checkNotNull;

import com.google.appengine.api.files.AppEngineFile;
import com.google.appengine.tools.mapreduce.impl.AbstractWorkerController;
import com.google.appengine.tools.mapreduce.impl.CountersImpl;
import com.google.appengine.tools.mapreduce.impl.DeleteFilesJob;
import com.google.appengine.tools.mapreduce.impl.IntermediateOutput;
import com.google.appengine.tools.mapreduce.impl.MapShardTask;
import com.google.appengine.tools.mapreduce.impl.ReduceShardTask;
import com.google.appengine.tools.mapreduce.impl.ResultAndCounters;
import com.google.appengine.tools.mapreduce.impl.ShuffleJob;
import com.google.appengine.tools.mapreduce.impl.ShuffleResult;
import com.google.appengine.tools.mapreduce.impl.Util;
import com.google.appengine.tools.mapreduce.impl.WorkerResult;
import com.google.appengine.tools.mapreduce.impl.WorkerShardTask;
import com.google.appengine.tools.mapreduce.impl.handlers.MapReduceServletImpl;
import com.google.appengine.tools.mapreduce.impl.shardedjob.ShardedJobServiceFactory;
import com.google.appengine.tools.mapreduce.impl.shardedjob.ShardedJobSettings;
import com.google.appengine.tools.pipeline.FutureValue;
import com.google.appengine.tools.pipeline.Job0;
import com.google.appengine.tools.pipeline.Job1;
import com.google.appengine.tools.pipeline.Job2;
import com.google.appengine.tools.pipeline.NoSuchObjectException;
import com.google.appengine.tools.pipeline.OrphanedObjectException;
import com.google.appengine.tools.pipeline.PipelineService;
import com.google.appengine.tools.pipeline.PipelineServiceFactory;
import com.google.appengine.tools.pipeline.PromisedValue;
import com.google.appengine.tools.pipeline.Value;
import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableList;

import java.io.IOException;
import java.util.List;
import java.util.Map;
import java.util.logging.Logger;

/**
* A Pipeline job that runs a MapReduce.
*
* @author ohler@google.com (Christian Ohler)
*
* @param <I> type of input values
* @param <K> type of intermediate keys
* @param <V> type of intermediate values
* @param <O> type of output values
* @param <R> type of final result
*/
public class MapReduceJob<I, K, V, O, R>
    extends Job2<MapReduceResult<R>, MapReduceSpecification<I, K, V, O, R>, MapReduceSettings> {
  private static final long serialVersionUID = 723635736794527552L;

  @SuppressWarnings("unused")
  private static final Logger log = Logger.getLogger(MapReduceJob.class.getName());

  /**
   * Starts a {@link MapReduceJob} with the given parameters in a new Pipeline.
   * Returns the pipeline id.
   */
  public static <I, K, V, O, R> String start(MapReduceSpecification<I, K, V, O, R> specification,
      MapReduceSettings settings) {
    PipelineService pipelineService = PipelineServiceFactory.newPipelineService();
    return pipelineService.startNewPipeline(new MapReduceJob<I, K, V, O, R>(),
        specification, settings, Util.jobSettings(settings));
  }

  public MapReduceJob() {
  }

  @Override public String toString() {
    return getClass().getSimpleName() + "()";
  }

  private static ShardedJobSettings makeShardedJobSettings(MapReduceSettings mrSettings) {
    return new ShardedJobSettings()
        .setControllerPath(mrSettings.getBaseUrl() + MapReduceServletImpl.CONTROLLER_PATH)
        .setWorkerPath(mrSettings.getBaseUrl() + MapReduceServletImpl.WORKER_PATH)
        .setControllerBackend(mrSettings.getBackend())
        .setWorkerBackend(mrSettings.getBackend())
        .setControllerQueueName(mrSettings.getControllerQueueName())
        .setWorkerQueueName(mrSettings.getWorkerQueueName());
  }

  private static class FillPromiseJob extends Job2<Void, String, Object> {
    private static final long serialVersionUID = 850701484460334898L;

    FillPromiseJob() {
    }

    @Override public Value<Void> run(String promiseHandle, Object value) {
      try {
        PipelineServiceFactory.newPipelineService()
            .submitPromisedValue(promiseHandle, value);
      } catch (OrphanedObjectException e) {
        throw new RuntimeException(
            promiseHandle + ": Object orphaned, can't submit result " + value, e);
      } catch (NoSuchObjectException e) {
        // TODO(ohler): This exception is retryable, we should retry here rather
        // than through the task queue.
        throw new RuntimeException(
            promiseHandle + ": Handle not found, can't submit result " + value, e);
      }
      return immediate(null);
    }
  }

  private static class WorkerController<I, O, R, C extends WorkerContext>
      extends AbstractWorkerController<WorkerShardTask<I, O, C>, O> {
    private static final long serialVersionUID = 931651840864967980L;

    private final CountersImpl initialCounters;
    private final Output<O, R> output;
    private final String resultPromiseHandle;

    WorkerController(String shardedJobName, CountersImpl initialCounters,
        Output<O, R> output,
        String resultPromiseHandle) {
      super(shardedJobName);
      this.initialCounters = checkNotNull(initialCounters, "Null initialCounters");
      this.output = checkNotNull(output, "Null output");
      this.resultPromiseHandle = checkNotNull(resultPromiseHandle, "Null resultPromiseHandle");
    }

    @Override public void completed(WorkerResult<O> finalCombinedResult) {
      Map<Integer, OutputWriter<O>> closedWriterMap = finalCombinedResult.getClosedWriters();
      ImmutableList.Builder<OutputWriter<O>> closedWriters = ImmutableList.builder();
      for (int i = 0; i < closedWriterMap.size(); i++) {
        Preconditions.checkState(closedWriterMap.containsKey(i), "%s: Missing closed writer %s: %s",
            this, i, closedWriterMap);
        closedWriters.add(closedWriterMap.get(i));
      }
      R outputResult;
      try {
        outputResult = output.finish(closedWriters.build());
      } catch (IOException e) {
        throw new RuntimeException(output + ".finish() threw IOException");
      }
      CountersImpl totalCounters = new CountersImpl();
      totalCounters.addAll(initialCounters);
      totalCounters.addAll(finalCombinedResult.getCounters());
      ResultAndCounters<R> result = new ResultAndCounters<R>(outputResult, totalCounters);
      // HACK(ohler): It seems that pipeline doesn't always allow a promise to
      // be filled in the job that created the promise.  We attempt that in the
      // case where there are 0 shards, like in one of our test cases.  Or maybe
      // it's our lack of retries on NoSuchObjectException.  Need to investigate
      // further.
      PipelineServiceFactory.newPipelineService().startNewPipeline(new FillPromiseJob(),
          resultPromiseHandle, result);
    }
  }

  private interface TaskCreator<I, O, C extends WorkerContext> {
    WorkerShardTask<I, O, C> createTask(int shard, int shardCount,
        InputReader<I> input, OutputWriter<O> output);
  }

  private static <I, O, R, C extends WorkerContext> void startShardedJob(
      String shardedJobName, String shardedJobId, CountersImpl initialCounters,
      List<? extends InputReader<I>> readers,
      Output<O, R> output,
      List<? extends OutputWriter<O>> writers,
      TaskCreator<I, O, C> taskCreator,
      String resultPromiseHandle,
      MapReduceSettings settings) {
    Preconditions.checkArgument(readers.size() == writers.size(), "%s: %s readers, %s writers",
        shardedJobName, readers.size(), writers.size());
    ImmutableList.Builder<WorkerShardTask<I, O, C>> initialTasks = ImmutableList.builder();
    for (int i = 0; i < readers.size(); i++) {
      initialTasks.add(taskCreator.createTask(i, readers.size(), readers.get(i), writers.get(i)));
    }
    ShardedJobServiceFactory.getShardedJobService().startJob(
        shardedJobId,
        initialTasks.build(),
        new WorkerController<I, O, R, C>(
            shardedJobName, initialCounters, output, resultPromiseHandle),
        makeShardedJobSettings(settings));
  }

  private static class MapJob<I, K, V> extends Job0<ResultAndCounters<List<AppEngineFile>>> {
    private static final long serialVersionUID = 274712180795282822L;

    private final String mrJobId;
    private final MapReduceSpecification<I, K, V, ?, ?> mrSpec;
    private final MapReduceSettings settings;

    private MapJob(String mrJobId,
        MapReduceSpecification<I, K, V, ?, ?> mrSpec,
        MapReduceSettings settings) {
      this.mrJobId = checkNotNull(mrJobId, "Null mrJobId");
      this.mrSpec = checkNotNull(mrSpec, "Null mrSpec");
      this.settings = checkNotNull(settings, "Null settings");
    }

    @Override public String toString() {
      return getClass().getSimpleName() + "(" + mrJobId + ")";
    }

    @Override public Value<ResultAndCounters<List<AppEngineFile>>> run() {
      @SuppressWarnings("unchecked")
      PromisedValue<ResultAndCounters<List<AppEngineFile>>> result =
          (PromisedValue) newPromise(ResultAndCounters.class);
      String shardedJobId = mrJobId + "-map";
      List<? extends InputReader<I>> readers = Util.createReaders(mrSpec.getInput());
      Output<KeyValue<K, V>, List<AppEngineFile>> output = new IntermediateOutput<K, V>(
          mrJobId,
          readers.size(),
          mrSpec.getIntermediateKeyMarshaller(),
          mrSpec.getIntermediateValueMarshaller());
      startShardedJob(mrSpec.getJobName() + " (map phase)",
          shardedJobId,
          new CountersImpl(),
          readers, output, Util.createWriters(output),
          new TaskCreator<I, KeyValue<K, V>, MapperContext<K, V>>() {
            @Override public WorkerShardTask<I, KeyValue<K, V>, MapperContext<K, V>> createTask(
                int shard, int shardCount,
                InputReader<I> reader, OutputWriter<KeyValue<K, V>> writer) {
              return new MapShardTask<I, K, V>(mrJobId, shard, shardCount,
                  reader, mrSpec.getMapper(), writer,
                  settings.getMillisPerSlice());
            }
          },
          result.getHandle(),
          settings);
      setStatusConsoleUrl(settings.getBaseUrl() + "detail?mapreduce_id=" + shardedJobId);
      return result;
    }
  }

  private static class ReduceJob<K, V, O, R> extends Job2<
      MapReduceResult<R>, ResultAndCounters<List<AppEngineFile>>, ShuffleResult<K, V, O>> {
    private static final long serialVersionUID = 590237832617368335L;

    private final String mrJobId;
    private final MapReduceSpecification<?, K, V, O, R> mrSpec;
    private final MapReduceSettings settings;

    private ReduceJob(String mrJobId,
        MapReduceSpecification<?, K, V, O, R> mrSpec,
        MapReduceSettings settings) {
      this.mrJobId = checkNotNull(mrJobId, "Null mrJobId");
      this.mrSpec = checkNotNull(mrSpec, "Null mrSpec");
      this.settings = checkNotNull(settings, "Null settings");
    }

    @Override public String toString() {
      return getClass().getSimpleName() + "(" + mrJobId + ")";
    }

    @Override public Value<MapReduceResult<R>> run(
        ResultAndCounters<List<AppEngineFile>> mapResult,
        ShuffleResult<K, V, O> shuffleResult) {
      @SuppressWarnings("unchecked")
      PromisedValue<MapReduceResult<R>> result = (PromisedValue) newPromise(MapReduceResult.class);
      String shardedJobId = mrJobId + "-reduce";
      startShardedJob(mrSpec.getJobName() + " (reduce phase)",
          shardedJobId,
          mapResult.getCounters(),
          shuffleResult.getReducerReaders(),
          mrSpec.getOutput(),
          shuffleResult.getReducerWriters(),
          new TaskCreator<KeyValue<K, ReducerInput<V>>, O, ReducerContext<O>>() {
            @Override public WorkerShardTask<KeyValue<K, ReducerInput<V>>, O, ReducerContext<O>>
                createTask(int shard, int shardCount,
                    InputReader<KeyValue<K, ReducerInput<V>>> reader, OutputWriter<O> writer) {
              return new ReduceShardTask<K, V, O>(mrJobId, shard, shardCount,
                  reader, mrSpec.getReducer(), writer,
                  settings.getMillisPerSlice());
            }
          },
          result.getHandle(),
          settings);
      setStatusConsoleUrl(settings.getBaseUrl() + "detail?mapreduce_id=" + shardedJobId);
      return result;
    }
  }

  private static class IntermediateCleanupJob
      extends Job1<Void, ResultAndCounters<List<AppEngineFile>>> {
    private static final long serialVersionUID = 354137030664235135L;

    private final String mrJobId;
    private final MapReduceSettings settings;

    private IntermediateCleanupJob(String mrJobId,
        MapReduceSettings settings) {
      this.mrJobId = checkNotNull(mrJobId, "Null mrJobId");
      this.settings = checkNotNull(settings, "Null settings");
    }

    @Override public String toString() {
      return getClass().getSimpleName() + "(" + mrJobId + ")";
    }

    @Override public Value<Void> run(ResultAndCounters<List<AppEngineFile>> mapResult) {
      futureCall(new DeleteFilesJob("" + this), immediate(mapResult.getOutputResult()),
          Util.jobSettings(settings));
      return immediate(null);
    }
  }

  private static class FinalCleanupJob<K, V, O> extends Job1<Void, ShuffleResult<K, V, O>> {
    private static final long serialVersionUID = 121832907494231026L;

    private final String mrJobId;
    private final MapReduceSettings settings;

    private FinalCleanupJob(String mrJobId,
        MapReduceSettings settings) {
      this.mrJobId = checkNotNull(mrJobId, "Null mrJobId");
      this.settings = checkNotNull(settings, "Null settings");
    }

    @Override public String toString() {
      return getClass().getSimpleName() + "(" + mrJobId + ")";
    }

    @Override public Value<Void> run(ShuffleResult<K, V, O> shuffleResult) {
      futureCall(new DeleteFilesJob("" + this),
          immediate(shuffleResult.getReducerInputFiles()),
          Util.jobSettings(settings));
      return immediate(null);
    }
  }

  public Value<MapReduceResult<R>> run(MapReduceSpecification<I, K, V, O, R> mrSpec,
      MapReduceSettings settings) {
    String mrJobId = getJobKey().getName();
    FutureValue<ResultAndCounters<List<AppEngineFile>>> mapResult = futureCall(
        new MapJob<I, K, V>(mrJobId, mrSpec, settings), Util.jobSettings(settings));
    FutureValue<ShuffleResult<K, V, O>> shuffleResult = futureCall(
        new ShuffleJob<K, V, O>(mrJobId, mrSpec, settings), mapResult, Util.jobSettings(settings));
    futureCall(new IntermediateCleanupJob(mrJobId, settings), mapResult,
        Util.jobSettings(settings, waitFor(shuffleResult)));
    FutureValue<MapReduceResult<R>> reduceResult = futureCall(
        new ReduceJob<K, V, O, R>(mrJobId, mrSpec, settings),
        mapResult, shuffleResult, Util.jobSettings(settings));
    futureCall(new FinalCleanupJob<K, V, O>(mrJobId, settings), shuffleResult,
        Util.jobSettings(settings, waitFor(reduceResult)));
    return reduceResult;
  }

}
TOP

Related Classes of com.google.appengine.tools.mapreduce.MapReduceJob$FinalCleanupJob

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.