Package eu.stratosphere.nephele.jobgraph

Examples of eu.stratosphere.nephele.jobgraph.JobTaskVertex


 
  private static JobTaskVertex createMapper(JobGraph jobGraph, int numSubTasks, TypeSerializerFactory<?> inputSerializer,
      TypeSerializerFactory<?> broadcastVarSerializer, TypeSerializerFactory<?> outputSerializer,
      TypeComparatorFactory<?> outputComparator)
  {
    JobTaskVertex mapper = JobGraphUtils.createTask(IterationIntermediatePactTask.class,
      "Map (Select nearest center)", jobGraph, numSubTasks, numSubTasks);
   
    TaskConfig intermediateConfig = new TaskConfig(mapper.getConfiguration());
    intermediateConfig.setIterationId(ITERATION_ID);
   
    intermediateConfig.setDriver(CollectorMapDriver.class);
    intermediateConfig.setDriverStrategy(DriverStrategy.COLLECTOR_MAP);
    intermediateConfig.addInputToGroup(0);
View Full Code Here


  private static JobTaskVertex createReducer(JobGraph jobGraph, int numSubTasks, TypeSerializerFactory<?> inputSerializer,
      TypeComparatorFactory<?> inputComparator, TypeSerializerFactory<?> outputSerializer)
  {
    // ---------------- the tail (co group) --------------------
   
    JobTaskVertex tail = JobGraphUtils.createTask(IterationTailPactTask.class, "Reduce / Iteration Tail", jobGraph,
      numSubTasks, numSubTasks);
   
    TaskConfig tailConfig = new TaskConfig(tail.getConfiguration());
    tailConfig.setIterationId(ITERATION_ID);
    tailConfig.setIsWorksetUpdate();
   
    // inputs and driver
    tailConfig.setDriver(GroupReduceDriver.class);
View Full Code Here

    // -- vertices ---------------------------------------------------------------------------------------------
    JobInputVertex points = createPointsInput(jobGraph, pointsPath, numSubTasks, serializer);
    JobInputVertex centers = createCentersInput(jobGraph, centersPath, numSubTasks, serializer);
   
    JobTaskVertex head = createIterationHead(jobGraph, numSubTasks, serializer);
    JobTaskVertex mapper = createMapper(jobGraph, numSubTasks, serializer, serializer, serializer, int0Comparator);
   
    JobTaskVertex reducer = createReducer(jobGraph, numSubTasks, serializer, int0Comparator, serializer);
   
    JobOutputVertex fakeTailOutput = JobGraphUtils.createFakeOutput(jobGraph, "FakeTailOutput", numSubTasks, numSubTasks);
   
    JobOutputVertex sync = createSync(jobGraph, numIterations, numSubTasks);
   
    JobOutputVertex output = createOutput(jobGraph, resultPath, numSubTasks, serializer);

    // -- edges ------------------------------------------------------------------------------------------------
    JobGraphUtils.connect(points, mapper, ChannelType.NETWORK, DistributionPattern.POINTWISE);
   
    JobGraphUtils.connect(centers, head, ChannelType.NETWORK, DistributionPattern.POINTWISE);
   
    JobGraphUtils.connect(head, mapper, ChannelType.NETWORK, DistributionPattern.BIPARTITE);
    new TaskConfig(mapper.getConfiguration()).setBroadcastGateIterativeWithNumberOfEventsUntilInterrupt(0, numSubTasks);
    new TaskConfig(mapper.getConfiguration()).setInputCached(0, true);
    new TaskConfig(mapper.getConfiguration()).setInputMaterializationMemory(0, MEMORY_PER_CONSUMER * JobGraphUtils.MEGABYTE);

    JobGraphUtils.connect(mapper, reducer, ChannelType.NETWORK, DistributionPattern.BIPARTITE);
    new TaskConfig(reducer.getConfiguration()).setGateIterativeWithNumberOfEventsUntilInterrupt(0, numSubTasks);
   
    JobGraphUtils.connect(reducer, fakeTailOutput, ChannelType.NETWORK, DistributionPattern.POINTWISE);
   
    JobGraphUtils.connect(head, output, ChannelType.NETWORK, DistributionPattern.POINTWISE);
   
    JobGraphUtils.connect(head, sync, ChannelType.NETWORK, DistributionPattern.BIPARTITE);

    // -- instance sharing -------------------------------------------------------------------------------------
    points.setVertexToShareInstancesWith(output);
    centers.setVertexToShareInstancesWith(output);
    head.setVertexToShareInstancesWith(output);
    mapper.setVertexToShareInstancesWith(output);
    reducer.setVertexToShareInstancesWith(output);
    fakeTailOutput.setVertexToShareInstancesWith(output);
    sync.setVertexToShareInstancesWith(output);

    return jobGraph;
  }
View Full Code Here

    adjacencyListInputConfig.addOutputShipStrategy(ShipStrategyType.PARTITION_HASH);
    adjacencyListInputConfig.setOutputSerializer(vertexWithAdjacencyListSerializer);
    adjacencyListInputConfig.setOutputComparator(vertexWithAdjacencyListComparator, 0);

    // --------------- the head ---------------------
    JobTaskVertex head = JobGraphUtils.createTask(IterationHeadPactTask.class, "IterationHead", jobGraph,
      degreeOfParallelism, numSubTasksPerInstance);
    TaskConfig headConfig = new TaskConfig(head.getConfiguration());
    headConfig.setIterationId(ITERATION_ID);
   
    // initial input / partial solution
    headConfig.addInputToGroup(0);
    headConfig.setIterationHeadPartialSolutionOrWorksetInputIndex(0);
    headConfig.setInputSerializer(vertexWithRankAndDanglingSerializer, 0);
    headConfig.setInputComparator(vertexWithRankAndDanglingComparator, 0);
    headConfig.setInputLocalStrategy(0, LocalStrategy.SORT);
    headConfig.setMemoryInput(0, minorConsumer * JobGraphUtils.MEGABYTE);
    headConfig.setFilehandlesInput(0, NUM_FILE_HANDLES_PER_SORT);
    headConfig.setSpillingThresholdInput(0, SORT_SPILL_THRESHOLD);
   
    // back channel / iterations
    headConfig.setBackChannelMemory(minorConsumer * JobGraphUtils.MEGABYTE);
   
    // output into iteration
    headConfig.setOutputSerializer(vertexWithRankAndDanglingSerializer);
    headConfig.addOutputShipStrategy(ShipStrategyType.FORWARD);
    headConfig.addOutputShipStrategy(ShipStrategyType.FORWARD);
   
    // final output
    TaskConfig headFinalOutConfig = new TaskConfig(new Configuration());
    headFinalOutConfig.setOutputSerializer(vertexWithRankAndDanglingSerializer);
    headFinalOutConfig.addOutputShipStrategy(ShipStrategyType.FORWARD);
    headConfig.setIterationHeadFinalOutputConfig(headFinalOutConfig);
   
    // the sync
    headConfig.setIterationHeadIndexOfSyncOutput(3);
    headConfig.setNumberOfIterations(numIterations);
   
    // the driver
    headConfig.setDriver(CollectorMapDriver.class);
    headConfig.setDriverStrategy(DriverStrategy.COLLECTOR_MAP);
    headConfig.setStubWrapper(new UserCodeClassWrapper<CustomCompensatingMap>(CustomCompensatingMap.class));
    headConfig.setStubParameter("pageRank.numVertices", String.valueOf(numVertices));
    headConfig.setStubParameter("compensation.failingWorker", failingWorkers);
    headConfig.setStubParameter("compensation.failingIteration", String.valueOf(failingIteration));
    headConfig.setStubParameter("compensation.messageLoss", String.valueOf(messageLoss));
    headConfig.addIterationAggregator(CustomCompensatableDotProductCoGroup.AGGREGATOR_NAME, PageRankStatsAggregator.class);

    // --------------- the join ---------------------
   
    JobTaskVertex intermediate = JobGraphUtils.createTask(IterationIntermediatePactTask.class,
      "IterationIntermediate", jobGraph, degreeOfParallelism, numSubTasksPerInstance);
    TaskConfig intermediateConfig = new TaskConfig(intermediate.getConfiguration());
    intermediateConfig.setIterationId(ITERATION_ID);
//    intermediateConfig.setDriver(RepeatableHashjoinMatchDriverWithCachedBuildside.class);
    intermediateConfig.setDriver(BuildSecondCachedMatchDriver.class);
    intermediateConfig.setDriverStrategy(DriverStrategy.HYBRIDHASH_BUILD_SECOND);
    intermediateConfig.setMemoryDriver(matchMemory * JobGraphUtils.MEGABYTE);
    intermediateConfig.addInputToGroup(0);
    intermediateConfig.addInputToGroup(1);
    intermediateConfig.setInputSerializer(vertexWithRankAndDanglingSerializer, 0);
    intermediateConfig.setInputSerializer(vertexWithAdjacencyListSerializer, 1);
    intermediateConfig.setDriverComparator(vertexWithRankAndDanglingComparator, 0);
    intermediateConfig.setDriverComparator(vertexWithAdjacencyListComparator, 1);
    intermediateConfig.setDriverPairComparator(matchComparator);
   
    intermediateConfig.setOutputSerializer(vertexWithRankSerializer);
    intermediateConfig.addOutputShipStrategy(ShipStrategyType.PARTITION_HASH);
    intermediateConfig.setOutputComparator(vertexWithRankComparator, 0);
   
    intermediateConfig.setStubWrapper(new UserCodeClassWrapper<CustomCompensatableDotProductMatch>(CustomCompensatableDotProductMatch.class));
    intermediateConfig.setStubParameter("pageRank.numVertices", String.valueOf(numVertices));
    intermediateConfig.setStubParameter("compensation.failingWorker", failingWorkers);
    intermediateConfig.setStubParameter("compensation.failingIteration", String.valueOf(failingIteration));
    intermediateConfig.setStubParameter("compensation.messageLoss", String.valueOf(messageLoss));

    // ---------------- the tail (co group) --------------------
   
    JobTaskVertex tail = JobGraphUtils.createTask(IterationTailPactTask.class, "IterationTail", jobGraph,
      degreeOfParallelism, numSubTasksPerInstance);
    TaskConfig tailConfig = new TaskConfig(tail.getConfiguration());
    tailConfig.setIterationId(ITERATION_ID);
        tailConfig.setIsWorksetUpdate();
    // TODO we need to combine!
   
    // inputs and driver
    tailConfig.setDriver(CoGroupDriver.class);
    tailConfig.setDriverStrategy(DriverStrategy.CO_GROUP);
    tailConfig.addInputToGroup(0);
    tailConfig.addInputToGroup(1);
    tailConfig.setInputSerializer(vertexWithRankAndDanglingSerializer, 0);
    tailConfig.setInputSerializer(vertexWithRankSerializer, 1);
    tailConfig.setDriverComparator(vertexWithRankAndDanglingComparator, 0);
    tailConfig.setDriverComparator(vertexWithRankComparator, 1);
    tailConfig.setDriverPairComparator(coGroupComparator);
    tailConfig.setInputAsynchronouslyMaterialized(0, true);
    tailConfig.setInputMaterializationMemory(0, minorConsumer * JobGraphUtils.MEGABYTE);
    tailConfig.setInputLocalStrategy(1, LocalStrategy.SORT);
    tailConfig.setInputComparator(vertexWithRankComparator, 1);
    tailConfig.setMemoryInput(1, coGroupSortMemory * JobGraphUtils.MEGABYTE);
    tailConfig.setFilehandlesInput(1, NUM_FILE_HANDLES_PER_SORT);
    tailConfig.setSpillingThresholdInput(1, SORT_SPILL_THRESHOLD);
   
    // output
    tailConfig.addOutputShipStrategy(ShipStrategyType.FORWARD);
    tailConfig.setOutputSerializer(vertexWithRankAndDanglingSerializer);
   
    // the stub
    tailConfig.setStubWrapper(new UserCodeClassWrapper<CustomCompensatableDotProductCoGroup>(CustomCompensatableDotProductCoGroup.class));
    tailConfig.setStubParameter("pageRank.numVertices", String.valueOf(numVertices));
    tailConfig.setStubParameter("pageRank.numDanglingVertices", String.valueOf(numDanglingVertices));
    tailConfig.setStubParameter("compensation.failingWorker", failingWorkers);
    tailConfig.setStubParameter("compensation.failingIteration", String.valueOf(failingIteration));
    tailConfig.setStubParameter("compensation.messageLoss", String.valueOf(messageLoss));
   
    // --------------- the output ---------------------

    JobOutputVertex output = JobGraphUtils.createFileOutput(jobGraph, "FinalOutput", degreeOfParallelism,
      numSubTasksPerInstance);
    TaskConfig outputConfig = new TaskConfig(output.getConfiguration());
    outputConfig.addInputToGroup(0);
    outputConfig.setInputSerializer(vertexWithRankAndDanglingSerializer, 0);
    outputConfig.setStubWrapper(new UserCodeClassWrapper<CustomPageWithRankOutFormat>(CustomPageWithRankOutFormat.class));
    outputConfig.setStubParameter(FileOutputFormat.FILE_PARAMETER_KEY, outputPath);
   
    // --------------- the auxiliaries ---------------------
   
    JobOutputVertex fakeTailOutput = JobGraphUtils.createFakeOutput(jobGraph, "FakeTailOutput",
      degreeOfParallelism, numSubTasksPerInstance);

    JobOutputVertex sync = JobGraphUtils.createSync(jobGraph, degreeOfParallelism);
    TaskConfig syncConfig = new TaskConfig(sync.getConfiguration());
    syncConfig.setNumberOfIterations(numIterations);
    syncConfig.addIterationAggregator(CustomCompensatableDotProductCoGroup.AGGREGATOR_NAME, PageRankStatsAggregator.class);
    syncConfig.setConvergenceCriterion(CustomCompensatableDotProductCoGroup.AGGREGATOR_NAME, DiffL1NormConvergenceCriterion.class);
    syncConfig.setIterationId(ITERATION_ID);
   
    // --------------- the wiring ---------------------

    JobGraphUtils.connect(pageWithRankInput, head, ChannelType.NETWORK, DistributionPattern.BIPARTITE);

    JobGraphUtils.connect(head, intermediate, ChannelType.IN_MEMORY, DistributionPattern.POINTWISE);
    intermediateConfig.setGateIterativeWithNumberOfEventsUntilInterrupt(0, 1);
   
    JobGraphUtils.connect(adjacencyListInput, intermediate, ChannelType.NETWORK, DistributionPattern.BIPARTITE);
   
    JobGraphUtils.connect(head, tail, ChannelType.NETWORK, DistributionPattern.POINTWISE);
    JobGraphUtils.connect(intermediate, tail, ChannelType.NETWORK, DistributionPattern.BIPARTITE);
    tailConfig.setGateIterativeWithNumberOfEventsUntilInterrupt(0, 1);
    tailConfig.setGateIterativeWithNumberOfEventsUntilInterrupt(1, degreeOfParallelism);

    JobGraphUtils.connect(head, output, ChannelType.IN_MEMORY, DistributionPattern.POINTWISE);
    JobGraphUtils.connect(tail, fakeTailOutput, ChannelType.IN_MEMORY, DistributionPattern.POINTWISE);

    JobGraphUtils.connect(head, sync, ChannelType.NETWORK, DistributionPattern.POINTWISE);
   
    fakeTailOutput.setVertexToShareInstancesWith(tail);
    tail.setVertexToShareInstancesWith(head);
    pageWithRankInput.setVertexToShareInstancesWith(head);
    adjacencyListInput.setVertexToShareInstancesWith(head);
    intermediate.setVertexToShareInstancesWith(head);
    output.setVertexToShareInstancesWith(head);
    sync.setVertexToShareInstancesWith(head);
View Full Code Here

      final JobFileInputVertex i1 = new JobFileInputVertex("Input 1", jg);
      i1.setFileInputClass(FileLineReader.class);
      i1.setFilePath(new Path(inputFile.toURI()));

      // task vertex
      final JobTaskVertex t1 = new JobTaskVertex("Task 1", jg);
      t1.setTaskClass(ForwardTask1Input1Output.class);

      // output vertex
      final JobFileOutputVertex o1 = new JobFileOutputVertex("Output 1", jg);
      o1.setFileOutputClass(FileLineWriter.class);
      o1.setFilePath(new Path(new File(ServerTestUtils.getRandomFilename()).toURI()));

      o1.setVertexToShareInstancesWith(i1);
      i1.setVertexToShareInstancesWith(t1);

      // connect vertices
      i1.connectTo(t1);
      t1.connectTo(o1);

      LibraryCacheManager.register(jobID, new String[0]);

      final ExecutionGraph eg = new ExecutionGraph(jg, INSTANCE_MANAGER);
View Full Code Here

      final JobFileInputVertex i1 = new JobFileInputVertex("Input 1", jg);
      i1.setFileInputClass(FileLineReader.class);
      i1.setFilePath(new Path(inputFile.toURI()));

      // task vertex
      final JobTaskVertex t1 = new JobTaskVertex("Task 1", jg);
      t1.setTaskClass(ForwardTask1Input1Output.class);

      // output vertex
      final JobFileOutputVertex o1 = new JobFileOutputVertex("Output 1", jg);
      o1.setFileOutputClass(FileLineWriter.class);
      o1.setFilePath(new Path(new File(ServerTestUtils.getRandomFilename()).toURI()));

      // connect vertices
      i1.connectTo(t1, ChannelType.IN_MEMORY);
      t1.connectTo(o1, ChannelType.IN_MEMORY);

      LibraryCacheManager.register(jobID, new String[0]);

      // now convert job graph to execution graph
      final ExecutionGraph eg = new ExecutionGraph(jg, INSTANCE_MANAGER);
View Full Code Here

      i2.setFileInputClass(FileLineReader.class);
      i2.setFilePath(new Path(inputFile2.toURI()));
      i2.setNumberOfSubtasks(2);

      // task vertex
      final JobTaskVertex t1 = new JobTaskVertex("Task 1", jg);
      t1.setTaskClass(ForwardTask1Input1Output.class);
      t1.setNumberOfSubtasks(2);
      final JobTaskVertex t2 = new JobTaskVertex("Task 2", jg);
      t2.setTaskClass(ForwardTask1Input1Output.class);
      t2.setNumberOfSubtasks(2);
      final JobTaskVertex t3 = new JobTaskVertex("Task 3", jg);
      t3.setTaskClass(ForwardTask2Inputs1Output.class);
      t3.setNumberOfSubtasks(2);

     
      // output vertex
      final JobFileOutputVertex o1 = new JobFileOutputVertex("Output 1", jg);
      o1.setFileOutputClass(FileLineWriter.class);
      o1.setFilePath(new Path(outputFile.toURI()));
      o1.setNumberOfSubtasks(2);
      i1.setVertexToShareInstancesWith(t1);
      t1.setVertexToShareInstancesWith(t3);
      i2.setVertexToShareInstancesWith(t2);
      t2.setVertexToShareInstancesWith(t3);
      t3.setVertexToShareInstancesWith(o1);

      // connect vertices
      i1.connectTo(t1);
      i2.connectTo(t2);
      t1.connectTo(t3);
      t2.connectTo(t3);
      t3.connectTo(o1);

      LibraryCacheManager.register(jobID, new String[0]);

      final ExecutionGraph eg = new ExecutionGraph(jg, INSTANCE_MANAGER);
View Full Code Here

      i2.setFileInputClass(FileLineReader.class);
      i2.setFilePath(new Path(inputFile2.toURI()));
      i2.setNumberOfSubtasks(4);
      i2.setNumberOfSubtasksPerInstance(2);
      // task vertex
      final JobTaskVertex t1 = new JobTaskVertex("Task 1", jg);
      t1.setTaskClass(ForwardTask1Input1Output.class);
      t1.setNumberOfSubtasks(4);
      t1.setNumberOfSubtasksPerInstance(2);
      final JobTaskVertex t2 = new JobTaskVertex("Task 2", jg);
      t2.setTaskClass(ForwardTask1Input1Output.class);
      t2.setNumberOfSubtasks(4);
      t2.setNumberOfSubtasksPerInstance(2);
      final JobTaskVertex t3 = new JobTaskVertex("Task 3", jg);
      t3.setTaskClass(ForwardTask2Inputs1Output.class);
      t3.setNumberOfSubtasks(8);
      t3.setNumberOfSubtasksPerInstance(4);
      final JobTaskVertex t4 = new JobTaskVertex("Task 4", jg);
      t4.setTaskClass(ForwardTask1Input2Outputs.class);
      t4.setNumberOfSubtasks(8);
      t4.setNumberOfSubtasksPerInstance(4);
      // output vertex
      final JobFileOutputVertex o1 = new JobFileOutputVertex("Output 1", jg);
      o1.setFileOutputClass(FileLineWriter.class);
      o1.setFilePath(new Path(outputFile1.toURI()));
      o1.setNumberOfSubtasks(4);
      o1.setNumberOfSubtasksPerInstance(2);
      final JobFileOutputVertex o2 = new JobFileOutputVertex("Output 2", jg);
      o2.setFileOutputClass(FileLineWriter.class);
      o2.setFilePath(new Path(outputFile2.toURI()));
      o2.setNumberOfSubtasks(4);
      o2.setNumberOfSubtasksPerInstance(2);
      o1.setVertexToShareInstancesWith(o2);

      // connect vertices
      i1.connectTo(t1, ChannelType.IN_MEMORY, DistributionPattern.POINTWISE);
      i2.connectTo(t2, ChannelType.IN_MEMORY, DistributionPattern.POINTWISE);
      t1.connectTo(t3, ChannelType.NETWORK);
      t2.connectTo(t3, ChannelType.NETWORK);
      t3.connectTo(t4, ChannelType.IN_MEMORY, DistributionPattern.POINTWISE);
      t4.connectTo(o1, ChannelType.NETWORK);
      t4.connectTo(o2, ChannelType.NETWORK);

      LibraryCacheManager.register(jobID, new String[0]);

      // now convert job graph to execution graph
      final ExecutionGraph eg = new ExecutionGraph(jg, INSTANCE_MANAGER);
View Full Code Here

    adjacencyListInputConfig.addOutputShipStrategy(ShipStrategyType.PARTITION_HASH);
    adjacencyListInputConfig.setOutputSerializer(vertexWithAdjacencyListSerializer);
    adjacencyListInputConfig.setOutputComparator(vertexWithAdjacencyListComparator, 0);

    // --------------- the head ---------------------
    JobTaskVertex head = JobGraphUtils.createTask(IterationHeadPactTask.class, "IterationHead", jobGraph,
      degreeOfParallelism, numSubTasksPerInstance);
    TaskConfig headConfig = new TaskConfig(head.getConfiguration());
    headConfig.setIterationId(ITERATION_ID);
   
    // initial input / partial solution
    headConfig.addInputToGroup(0);
    headConfig.setIterationHeadPartialSolutionOrWorksetInputIndex(0);
    headConfig.setInputSerializer(vertexWithRankAndDanglingSerializer, 0);
    headConfig.setInputComparator(vertexWithRankAndDanglingComparator, 0);
    headConfig.setInputLocalStrategy(0, LocalStrategy.SORT);
    headConfig.setMemoryInput(0, minorConsumer * JobGraphUtils.MEGABYTE);
    headConfig.setFilehandlesInput(0, NUM_FILE_HANDLES_PER_SORT);
    headConfig.setSpillingThresholdInput(0, SORT_SPILL_THRESHOLD);
   
    // back channel / iterations
    headConfig.setBackChannelMemory(minorConsumer * JobGraphUtils.MEGABYTE);
   
    // output into iteration
    headConfig.setOutputSerializer(vertexWithRankAndDanglingSerializer);
    headConfig.addOutputShipStrategy(ShipStrategyType.FORWARD);
    headConfig.addOutputShipStrategy(ShipStrategyType.FORWARD);
   
    // final output
    TaskConfig headFinalOutConfig = new TaskConfig(new Configuration());
    headFinalOutConfig.setOutputSerializer(vertexWithRankAndDanglingSerializer);
    headFinalOutConfig.addOutputShipStrategy(ShipStrategyType.FORWARD);
    headConfig.setIterationHeadFinalOutputConfig(headFinalOutConfig);
   
    // the sync
    headConfig.setIterationHeadIndexOfSyncOutput(3);
    headConfig.setNumberOfIterations(numIterations);
   
    // the driver
    headConfig.setDriver(CollectorMapDriver.class);
    headConfig.setDriverStrategy(DriverStrategy.COLLECTOR_MAP);
    headConfig.setStubWrapper(new UserCodeClassWrapper<CustomCompensatingMap>(CustomCompensatingMap.class));
    headConfig.setStubParameter("pageRank.numVertices", String.valueOf(numVertices));
    headConfig.setStubParameter("compensation.failingWorker", failingWorkers);
    headConfig.setStubParameter("compensation.failingIteration", String.valueOf(failingIteration));
    headConfig.setStubParameter("compensation.messageLoss", String.valueOf(messageLoss));
    headConfig.addIterationAggregator(CustomCompensatableDotProductCoGroup.AGGREGATOR_NAME, PageRankStatsAggregator.class);

    // --------------- the join ---------------------
   
    JobTaskVertex intermediate = JobGraphUtils.createTask(IterationIntermediatePactTask.class,
      "IterationIntermediate", jobGraph, degreeOfParallelism, numSubTasksPerInstance);
    TaskConfig intermediateConfig = new TaskConfig(intermediate.getConfiguration());
    intermediateConfig.setIterationId(ITERATION_ID);
//    intermediateConfig.setDriver(RepeatableHashjoinMatchDriverWithCachedBuildside.class);
    intermediateConfig.setDriver(BuildSecondCachedMatchDriver.class);
    intermediateConfig.setDriverStrategy(DriverStrategy.HYBRIDHASH_BUILD_SECOND);
    intermediateConfig.setMemoryDriver(matchMemory * JobGraphUtils.MEGABYTE);
    intermediateConfig.addInputToGroup(0);
    intermediateConfig.addInputToGroup(1);
    intermediateConfig.setInputSerializer(vertexWithRankAndDanglingSerializer, 0);
    intermediateConfig.setInputSerializer(vertexWithAdjacencyListSerializer, 1);
    intermediateConfig.setDriverComparator(vertexWithRankAndDanglingComparator, 0);
    intermediateConfig.setDriverComparator(vertexWithAdjacencyListComparator, 1);
    intermediateConfig.setDriverPairComparator(matchComparator);
   
    intermediateConfig.setOutputSerializer(vertexWithRankSerializer);
    intermediateConfig.addOutputShipStrategy(ShipStrategyType.FORWARD);
   
    intermediateConfig.setStubWrapper(new UserCodeClassWrapper<CustomCompensatableDotProductMatch>(CustomCompensatableDotProductMatch.class));
    intermediateConfig.setStubParameter("pageRank.numVertices", String.valueOf(numVertices));
    intermediateConfig.setStubParameter("compensation.failingWorker", failingWorkers);
    intermediateConfig.setStubParameter("compensation.failingIteration", String.valueOf(failingIteration));
    intermediateConfig.setStubParameter("compensation.messageLoss", String.valueOf(messageLoss));
   
    // the combiner and the output
    TaskConfig combinerConfig = new TaskConfig(new Configuration());
    combinerConfig.addInputToGroup(0);
    combinerConfig.setInputSerializer(vertexWithRankSerializer, 0);
    combinerConfig.setDriverStrategy(DriverStrategy.SORTED_GROUP_COMBINE);
    combinerConfig.setDriverComparator(vertexWithRankComparator, 0);
    combinerConfig.setMemoryDriver(coGroupSortMemory * JobGraphUtils.MEGABYTE);
    combinerConfig.setOutputSerializer(vertexWithRankSerializer);
    combinerConfig.addOutputShipStrategy(ShipStrategyType.PARTITION_HASH);
    combinerConfig.setOutputComparator(vertexWithRankComparator, 0);
    combinerConfig.setStubWrapper(new UserCodeClassWrapper<CustomRankCombiner>(CustomRankCombiner.class));
    intermediateConfig.addChainedTask(SynchronousChainedCombineDriver.class, combinerConfig, "Combiner");

    // ---------------- the tail (co group) --------------------
   
    JobTaskVertex tail = JobGraphUtils.createTask(IterationTailPactTask.class, "IterationTail", jobGraph,
      degreeOfParallelism, numSubTasksPerInstance);
    TaskConfig tailConfig = new TaskConfig(tail.getConfiguration());
    tailConfig.setIterationId(ITERATION_ID);
        tailConfig.setIsWorksetUpdate();
   
    // inputs and driver
    tailConfig.setDriver(CoGroupDriver.class);
    tailConfig.setDriverStrategy(DriverStrategy.CO_GROUP);
    tailConfig.addInputToGroup(0);
    tailConfig.addInputToGroup(1);
    tailConfig.setInputSerializer(vertexWithRankAndDanglingSerializer, 0);
    tailConfig.setInputSerializer(vertexWithRankSerializer, 1);
    tailConfig.setDriverComparator(vertexWithRankAndDanglingComparator, 0);
    tailConfig.setDriverComparator(vertexWithRankComparator, 1);
    tailConfig.setDriverPairComparator(coGroupComparator);
    tailConfig.setInputAsynchronouslyMaterialized(0, true);
    tailConfig.setInputMaterializationMemory(0, minorConsumer * JobGraphUtils.MEGABYTE);
    tailConfig.setInputLocalStrategy(1, LocalStrategy.SORT);
    tailConfig.setInputComparator(vertexWithRankComparator, 1);
    tailConfig.setMemoryInput(1, coGroupSortMemory * JobGraphUtils.MEGABYTE);
    tailConfig.setFilehandlesInput(1, NUM_FILE_HANDLES_PER_SORT);
    tailConfig.setSpillingThresholdInput(1, SORT_SPILL_THRESHOLD);
    tailConfig.addIterationAggregator(CustomCompensatableDotProductCoGroup.AGGREGATOR_NAME, PageRankStatsAggregator.class);
   
    // output
    tailConfig.addOutputShipStrategy(ShipStrategyType.FORWARD);
    tailConfig.setOutputSerializer(vertexWithRankAndDanglingSerializer);
   
    // the stub
    tailConfig.setStubWrapper(new UserCodeClassWrapper<CustomCompensatableDotProductCoGroup>(CustomCompensatableDotProductCoGroup.class));
    tailConfig.setStubParameter("pageRank.numVertices", String.valueOf(numVertices));
    tailConfig.setStubParameter("pageRank.numDanglingVertices", String.valueOf(numDanglingVertices));
    tailConfig.setStubParameter("compensation.failingWorker", failingWorkers);
    tailConfig.setStubParameter("compensation.failingIteration", String.valueOf(failingIteration));
    tailConfig.setStubParameter("compensation.messageLoss", String.valueOf(messageLoss));
   
    // --------------- the output ---------------------

    JobOutputVertex output = JobGraphUtils.createFileOutput(jobGraph, "FinalOutput", degreeOfParallelism,
      numSubTasksPerInstance);
    TaskConfig outputConfig = new TaskConfig(output.getConfiguration());
    outputConfig.addInputToGroup(0);
    outputConfig.setInputSerializer(vertexWithRankAndDanglingSerializer, 0);
    outputConfig.setStubWrapper(new UserCodeClassWrapper<CustomPageWithRankOutFormat>(CustomPageWithRankOutFormat.class));
    outputConfig.setStubParameter(FileOutputFormat.FILE_PARAMETER_KEY, outputPath);
   
    // --------------- the auxiliaries ---------------------
   
    JobOutputVertex fakeTailOutput = JobGraphUtils.createFakeOutput(jobGraph, "FakeTailOutput",
      degreeOfParallelism, numSubTasksPerInstance);

    JobOutputVertex sync = JobGraphUtils.createSync(jobGraph, degreeOfParallelism);
    TaskConfig syncConfig = new TaskConfig(sync.getConfiguration());
    syncConfig.setNumberOfIterations(numIterations);
    syncConfig.addIterationAggregator(CustomCompensatableDotProductCoGroup.AGGREGATOR_NAME, PageRankStatsAggregator.class);
    syncConfig.setConvergenceCriterion(CustomCompensatableDotProductCoGroup.AGGREGATOR_NAME, DiffL1NormConvergenceCriterion.class);
    syncConfig.setIterationId(ITERATION_ID);
   
    // --------------- the wiring ---------------------

    JobGraphUtils.connect(pageWithRankInput, head, ChannelType.NETWORK, DistributionPattern.BIPARTITE);

    JobGraphUtils.connect(head, intermediate, ChannelType.IN_MEMORY, DistributionPattern.POINTWISE);
    intermediateConfig.setGateIterativeWithNumberOfEventsUntilInterrupt(0, 1);
   
    JobGraphUtils.connect(adjacencyListInput, intermediate, ChannelType.NETWORK, DistributionPattern.BIPARTITE);
   
    JobGraphUtils.connect(head, tail, ChannelType.NETWORK, DistributionPattern.POINTWISE);
    JobGraphUtils.connect(intermediate, tail, ChannelType.NETWORK, DistributionPattern.BIPARTITE);
    tailConfig.setGateIterativeWithNumberOfEventsUntilInterrupt(0, 1);
    tailConfig.setGateIterativeWithNumberOfEventsUntilInterrupt(1, degreeOfParallelism);

    JobGraphUtils.connect(head, output, ChannelType.IN_MEMORY, DistributionPattern.POINTWISE);
    JobGraphUtils.connect(tail, fakeTailOutput, ChannelType.IN_MEMORY, DistributionPattern.POINTWISE);

    JobGraphUtils.connect(head, sync, ChannelType.NETWORK, DistributionPattern.POINTWISE);
   
    fakeTailOutput.setVertexToShareInstancesWith(tail);
    tail.setVertexToShareInstancesWith(head);
    pageWithRankInput.setVertexToShareInstancesWith(head);
    adjacencyListInput.setVertexToShareInstancesWith(head);
    intermediate.setVertexToShareInstancesWith(head);
    output.setVertexToShareInstancesWith(head);
    sync.setVertexToShareInstancesWith(head);
View Full Code Here

    // input
    JobInputVertex vertices = createVerticesInput(jobGraph, verticesPath, numSubTasks, serializer, comparator);
    JobInputVertex edges = createEdgesInput(jobGraph, edgesPath, numSubTasks, serializer, comparator);

    // head
    JobTaskVertex head = createIterationHead(jobGraph, numSubTasks, serializer, comparator, pairComparator);
    TaskConfig headConfig = new TaskConfig(head.getConfiguration());
    headConfig.setWaitForSolutionSetUpdate();

    // intermediate
    JobTaskVertex intermediate = createIterationIntermediate(jobGraph, numSubTasks, serializer, comparator);
    TaskConfig intermediateConfig = new TaskConfig(intermediate.getConfiguration());

    // output and auxiliaries
    JobOutputVertex output = createOutput(jobGraph, resultPath, numSubTasks, serializer);
    JobOutputVertex ssFakeTail = createFakeTail(jobGraph, numSubTasks);
    JobOutputVertex wsFakeTail = createFakeTail(jobGraph, numSubTasks);
    JobOutputVertex sync = createSync(jobGraph, numSubTasks, maxIterations);

    // ------------------ the intermediate (ss join) ----------------------
    JobTaskVertex ssJoinIntermediate = JobGraphUtils.createTask(IterationIntermediatePactTask.class,
      "Solution Set Join", jobGraph, numSubTasks, numSubTasks);
    TaskConfig ssJoinIntermediateConfig = new TaskConfig(ssJoinIntermediate.getConfiguration());
    {
      ssJoinIntermediateConfig.setIterationId(ITERATION_ID);

      // inputs
      ssJoinIntermediateConfig.addInputToGroup(0);
      ssJoinIntermediateConfig.setInputSerializer(serializer, 0);

      // output
      ssJoinIntermediateConfig.addOutputShipStrategy(ShipStrategyType.FORWARD);
      ssJoinIntermediateConfig.addOutputShipStrategy(ShipStrategyType.FORWARD);
      ssJoinIntermediateConfig.setOutputComparator(comparator, 0);
      ssJoinIntermediateConfig.setOutputComparator(comparator, 1);

      ssJoinIntermediateConfig.setOutputSerializer(serializer);

      // driver
      ssJoinIntermediateConfig.setDriver(JoinWithSolutionSetSecondDriver.class);
      ssJoinIntermediateConfig.setDriverStrategy(DriverStrategy.HYBRIDHASH_BUILD_SECOND);
      ssJoinIntermediateConfig.setDriverComparator(comparator, 0);
      ssJoinIntermediateConfig.setDriverPairComparator(pairComparator);
     
      ssJoinIntermediateConfig.setStubWrapper(
        new UserCodeClassWrapper<UpdateComponentIdMatch>(UpdateComponentIdMatch.class));
    }

    // -------------------------- ss tail --------------------------------
    JobTaskVertex ssTail = JobGraphUtils.createTask(IterationTailPactTask.class, "IterationSolutionSetTail",
      jobGraph, numSubTasks, numSubTasks);
    TaskConfig ssTailConfig = new TaskConfig(ssTail.getConfiguration());
    {
      ssTailConfig.setIterationId(ITERATION_ID);
      ssTailConfig.setIsSolutionSetUpdate();
      ssTailConfig.setIsWorksetIteration();

      // inputs and driver
      ssTailConfig.addInputToGroup(0);
      ssTailConfig.setInputSerializer(serializer, 0);
      ssTailConfig.setInputAsynchronouslyMaterialized(0, true);
      ssTailConfig.setInputMaterializationMemory(0, MEM_PER_CONSUMER * JobGraphUtils.MEGABYTE);

      // output
      ssTailConfig.addOutputShipStrategy(ShipStrategyType.FORWARD);
      ssTailConfig.setOutputSerializer(serializer);

      // the driver
      ssTailConfig.setDriver(CollectorMapDriver.class);
      ssTailConfig.setDriverStrategy(DriverStrategy.COLLECTOR_MAP);
      ssTailConfig.setStubWrapper(new UserCodeClassWrapper<DummyMapper>(DummyMapper.class));
    }

    // -------------------------- ws tail --------------------------------
    JobTaskVertex wsTail = JobGraphUtils.createTask(IterationTailPactTask.class, "IterationWorksetTail",
      jobGraph, numSubTasks, numSubTasks);
    TaskConfig wsTailConfig = new TaskConfig(wsTail.getConfiguration());
    {
      wsTailConfig.setIterationId(ITERATION_ID);
      wsTailConfig.setIsWorksetIteration();
      wsTailConfig.setIsWorksetUpdate();

      // inputs and driver
      wsTailConfig.addInputToGroup(0);
      wsTailConfig.setInputSerializer(serializer, 0);

      // output
      wsTailConfig.addOutputShipStrategy(ShipStrategyType.FORWARD);
      wsTailConfig.setOutputSerializer(serializer);

      // the driver
      wsTailConfig.setDriver(CollectorMapDriver.class);
      wsTailConfig.setDriverStrategy(DriverStrategy.COLLECTOR_MAP);
      wsTailConfig.setStubWrapper(new UserCodeClassWrapper<DummyMapper>(DummyMapper.class));
    }

    // --------------- the wiring ---------------------

    JobGraphUtils.connect(vertices, head, ChannelType.NETWORK, DistributionPattern.BIPARTITE);
    JobGraphUtils.connect(edges, head, ChannelType.NETWORK, DistributionPattern.BIPARTITE);
    JobGraphUtils.connect(vertices, head, ChannelType.NETWORK, DistributionPattern.BIPARTITE);

    JobGraphUtils.connect(head, intermediate, ChannelType.NETWORK, DistributionPattern.BIPARTITE);
    intermediateConfig.setGateIterativeWithNumberOfEventsUntilInterrupt(0, numSubTasks);

    JobGraphUtils.connect(intermediate, ssJoinIntermediate, ChannelType.NETWORK, DistributionPattern.POINTWISE);
    ssJoinIntermediateConfig.setGateIterativeWithNumberOfEventsUntilInterrupt(0, 1);

    JobGraphUtils.connect(ssJoinIntermediate, ssTail, ChannelType.IN_MEMORY, DistributionPattern.POINTWISE);
    ssTailConfig.setGateIterativeWithNumberOfEventsUntilInterrupt(0, 1);

    JobGraphUtils.connect(ssJoinIntermediate, wsTail, ChannelType.IN_MEMORY, DistributionPattern.POINTWISE);
    wsTailConfig.setGateIterativeWithNumberOfEventsUntilInterrupt(0, 1);

    JobGraphUtils.connect(head, output, ChannelType.IN_MEMORY, DistributionPattern.POINTWISE);

    JobGraphUtils.connect(ssTail, ssFakeTail, ChannelType.IN_MEMORY, DistributionPattern.POINTWISE);
    JobGraphUtils.connect(wsTail, wsFakeTail, ChannelType.IN_MEMORY, DistributionPattern.POINTWISE);

    JobGraphUtils.connect(head, sync, ChannelType.NETWORK, DistributionPattern.POINTWISE);

    vertices.setVertexToShareInstancesWith(head);
    edges.setVertexToShareInstancesWith(head);

    intermediate.setVertexToShareInstancesWith(head);

    ssJoinIntermediate.setVertexToShareInstancesWith(head);
    wsTail.setVertexToShareInstancesWith(head);

    output.setVertexToShareInstancesWith(head);
    sync.setVertexToShareInstancesWith(head);

    ssTail.setVertexToShareInstancesWith(wsTail);
View Full Code Here

TOP

Related Classes of eu.stratosphere.nephele.jobgraph.JobTaskVertex

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.