Package eu.stratosphere.test.compiler.iterations

Source Code of eu.stratosphere.test.compiler.iterations.ConnectedComponentsTest$UpdateComponentIdMatchMirrored

/***********************************************************************************************************************
* Copyright (C) 2010-2013 by the Stratosphere project (http://stratosphere.eu)
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
* an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
* specific language governing permissions and limitations under the License.
**********************************************************************************************************************/
package eu.stratosphere.test.compiler.iterations;

import java.io.Serializable;

import org.junit.Assert;
import org.junit.Test;

import eu.stratosphere.api.common.Plan;
import eu.stratosphere.api.java.record.operators.FileDataSink;
import eu.stratosphere.api.java.record.operators.FileDataSource;
import eu.stratosphere.api.java.record.operators.DeltaIteration;
import eu.stratosphere.api.common.operators.util.FieldList;
import eu.stratosphere.api.java.record.functions.JoinFunction;
import eu.stratosphere.api.java.record.functions.FunctionAnnotation.ConstantFieldsSecond;
import eu.stratosphere.api.java.record.io.CsvInputFormat;
import eu.stratosphere.api.java.record.io.CsvOutputFormat;
import eu.stratosphere.api.java.record.operators.JoinOperator;
import eu.stratosphere.api.java.record.operators.MapOperator;
import eu.stratosphere.api.java.record.operators.ReduceOperator;
import eu.stratosphere.compiler.dag.TempMode;
import eu.stratosphere.compiler.plan.DualInputPlanNode;
import eu.stratosphere.compiler.plan.OptimizedPlan;
import eu.stratosphere.compiler.plan.SingleInputPlanNode;
import eu.stratosphere.compiler.plan.SinkPlanNode;
import eu.stratosphere.compiler.plan.SourcePlanNode;
import eu.stratosphere.compiler.plan.WorksetIterationPlanNode;
import eu.stratosphere.compiler.plandump.PlanJSONDumpGenerator;
import eu.stratosphere.compiler.plantranslate.NepheleJobGraphGenerator;
import eu.stratosphere.pact.runtime.shipping.ShipStrategyType;
import eu.stratosphere.pact.runtime.task.DriverStrategy;
import eu.stratosphere.pact.runtime.task.util.LocalStrategy;
import eu.stratosphere.test.compiler.util.CompilerTestBase;
import eu.stratosphere.test.recordJobs.graph.WorksetConnectedComponents;
import eu.stratosphere.test.recordJobs.graph.WorksetConnectedComponents.DuplicateLongMap;
import eu.stratosphere.test.recordJobs.graph.WorksetConnectedComponents.MinimumComponentIDReduce;
import eu.stratosphere.test.recordJobs.graph.WorksetConnectedComponents.NeighborWithComponentIDJoin;
import eu.stratosphere.types.LongValue;
import eu.stratosphere.types.Record;
import eu.stratosphere.util.Collector;

/**
*
*/
public class ConnectedComponentsTest extends CompilerTestBase {
 
  private static final String VERTEX_SOURCE = "Vertices";
 
  private static final String ITERATION_NAME = "Connected Components Iteration";
 
  private static final String EDGES_SOURCE = "Edges";
  private static final String JOIN_NEIGHBORS_MATCH = "Join Candidate Id With Neighbor";
  private static final String MIN_ID_REDUCER = "Find Minimum Candidate Id";
  private static final String UPDATE_ID_MATCH = "Update Component Id";
 
  private static final String SINK = "Result";
 
  private static final boolean PRINT_PLAN = false;
 
  private final FieldList set0 = new FieldList(0);
 
 
  @Test
  public void testWorksetConnectedComponents() {
    WorksetConnectedComponents cc = new WorksetConnectedComponents();

    Plan plan = cc.getPlan(String.valueOf(DEFAULT_PARALLELISM),
        IN_FILE, IN_FILE, OUT_FILE, String.valueOf(100));

    OptimizedPlan optPlan = compileNoStats(plan);
    OptimizerPlanNodeResolver or = getOptimizerPlanNodeResolver(optPlan);
   
    if (PRINT_PLAN) {
      PlanJSONDumpGenerator dumper = new PlanJSONDumpGenerator();
      String json = dumper.getOptimizerPlanAsJSON(optPlan);
      System.out.println(json);
    }
   
    SourcePlanNode vertexSource = or.getNode(VERTEX_SOURCE);
    SourcePlanNode edgesSource = or.getNode(EDGES_SOURCE);
    SinkPlanNode sink = or.getNode(SINK);
    WorksetIterationPlanNode iter = or.getNode(ITERATION_NAME);
   
    DualInputPlanNode neighborsJoin = or.getNode(JOIN_NEIGHBORS_MATCH);
    SingleInputPlanNode minIdReducer = or.getNode(MIN_ID_REDUCER);
    SingleInputPlanNode minIdCombiner = (SingleInputPlanNode) minIdReducer.getPredecessor();
    DualInputPlanNode updatingMatch = or.getNode(UPDATE_ID_MATCH);
   
    // test all drivers
    Assert.assertEquals(DriverStrategy.NONE, sink.getDriverStrategy());
    Assert.assertEquals(DriverStrategy.NONE, vertexSource.getDriverStrategy());
    Assert.assertEquals(DriverStrategy.NONE, edgesSource.getDriverStrategy());
   
//    Assert.assertEquals(DriverStrategy.HYBRIDHASH_BUILD_SECOND, neighborsJoin.getDriverStrategy());
    Assert.assertEquals(set0, neighborsJoin.getKeysForInput1());
    Assert.assertEquals(set0, neighborsJoin.getKeysForInput2());
   
    Assert.assertEquals(DriverStrategy.HYBRIDHASH_BUILD_SECOND, updatingMatch.getDriverStrategy());
    Assert.assertEquals(set0, updatingMatch.getKeysForInput1());
    Assert.assertEquals(set0, updatingMatch.getKeysForInput2());
   
    // test all the shipping strategies
    Assert.assertEquals(ShipStrategyType.FORWARD, sink.getInput().getShipStrategy());
    Assert.assertEquals(ShipStrategyType.PARTITION_HASH, iter.getInitialSolutionSetInput().getShipStrategy());
    Assert.assertEquals(set0, iter.getInitialSolutionSetInput().getShipStrategyKeys());
    Assert.assertEquals(ShipStrategyType.PARTITION_HASH, iter.getInitialWorksetInput().getShipStrategy());
    Assert.assertEquals(set0, iter.getInitialWorksetInput().getShipStrategyKeys());
   
    Assert.assertEquals(ShipStrategyType.FORWARD, neighborsJoin.getInput1().getShipStrategy()); // workset
    Assert.assertEquals(ShipStrategyType.PARTITION_HASH, neighborsJoin.getInput2().getShipStrategy()); // edges
    Assert.assertEquals(set0, neighborsJoin.getInput2().getShipStrategyKeys());
    Assert.assertTrue(neighborsJoin.getInput2().getTempMode().isCached());
   
    Assert.assertEquals(ShipStrategyType.PARTITION_HASH, minIdReducer.getInput().getShipStrategy());
    Assert.assertEquals(set0, minIdReducer.getInput().getShipStrategyKeys());
    Assert.assertEquals(ShipStrategyType.FORWARD, minIdCombiner.getInput().getShipStrategy());
   
    Assert.assertEquals(ShipStrategyType.FORWARD, updatingMatch.getInput1().getShipStrategy()); // min id
    Assert.assertEquals(ShipStrategyType.FORWARD, updatingMatch.getInput2().getShipStrategy()); // solution set
   
    // test all the local strategies
    Assert.assertEquals(LocalStrategy.NONE, sink.getInput().getLocalStrategy());
    Assert.assertEquals(LocalStrategy.NONE, iter.getInitialSolutionSetInput().getLocalStrategy());
//    Assert.assertEquals(LocalStrategy.NONE, iter.getInitialWorksetInput().getLocalStrategy());
   
    Assert.assertEquals(LocalStrategy.NONE, neighborsJoin.getInput1().getLocalStrategy()); // workset
    Assert.assertEquals(LocalStrategy.NONE, neighborsJoin.getInput2().getLocalStrategy()); // edges
   
    Assert.assertEquals(LocalStrategy.COMBININGSORT, minIdReducer.getInput().getLocalStrategy());
    Assert.assertEquals(set0, minIdReducer.getInput().getLocalStrategyKeys());
    Assert.assertEquals(LocalStrategy.NONE, minIdCombiner.getInput().getLocalStrategy());
   
    Assert.assertEquals(LocalStrategy.NONE, updatingMatch.getInput1().getLocalStrategy()); // min id
    Assert.assertEquals(LocalStrategy.NONE, updatingMatch.getInput2().getLocalStrategy()); // solution set
   
    // check the dams
    Assert.assertTrue(TempMode.PIPELINE_BREAKER == iter.getInitialWorksetInput().getTempMode() ||
              LocalStrategy.SORT == iter.getInitialWorksetInput().getLocalStrategy());
   
    NepheleJobGraphGenerator jgg = new NepheleJobGraphGenerator();
    jgg.compileJobGraph(optPlan);
  }
 
  @Test
  public void testWorksetConnectedComponentsWithSolutionSetAsFirstInput() {

    Plan plan = getPlanForWorksetConnectedComponentsWithSolutionSetAsFirstInput(DEFAULT_PARALLELISM,
        IN_FILE, IN_FILE, OUT_FILE, 100);

    OptimizedPlan optPlan = compileNoStats(plan);
    OptimizerPlanNodeResolver or = getOptimizerPlanNodeResolver(optPlan);
   
    if (PRINT_PLAN) {
      PlanJSONDumpGenerator dumper = new PlanJSONDumpGenerator();
      String json = dumper.getOptimizerPlanAsJSON(optPlan);
      System.out.println(json);
    }
   
    SourcePlanNode vertexSource = or.getNode(VERTEX_SOURCE);
    SourcePlanNode edgesSource = or.getNode(EDGES_SOURCE);
    SinkPlanNode sink = or.getNode(SINK);
    WorksetIterationPlanNode iter = or.getNode(ITERATION_NAME);
   
    DualInputPlanNode neighborsJoin = or.getNode(JOIN_NEIGHBORS_MATCH);
    SingleInputPlanNode minIdReducer = or.getNode(MIN_ID_REDUCER);
    SingleInputPlanNode minIdCombiner = (SingleInputPlanNode) minIdReducer.getPredecessor();
    DualInputPlanNode updatingMatch = or.getNode(UPDATE_ID_MATCH);
   
    // test all drivers
    Assert.assertEquals(DriverStrategy.NONE, sink.getDriverStrategy());
    Assert.assertEquals(DriverStrategy.NONE, vertexSource.getDriverStrategy());
    Assert.assertEquals(DriverStrategy.NONE, edgesSource.getDriverStrategy());
   
//    Assert.assertEquals(DriverStrategy.HYBRIDHASH_BUILD_SECOND, neighborsJoin.getDriverStrategy());
    Assert.assertEquals(set0, neighborsJoin.getKeysForInput1());
    Assert.assertEquals(set0, neighborsJoin.getKeysForInput2());
   
    Assert.assertEquals(DriverStrategy.HYBRIDHASH_BUILD_FIRST, updatingMatch.getDriverStrategy());
    Assert.assertEquals(set0, updatingMatch.getKeysForInput1());
    Assert.assertEquals(set0, updatingMatch.getKeysForInput2());
   
    // test all the shipping strategies
    Assert.assertEquals(ShipStrategyType.FORWARD, sink.getInput().getShipStrategy());
    Assert.assertEquals(ShipStrategyType.PARTITION_HASH, iter.getInitialSolutionSetInput().getShipStrategy());
    Assert.assertEquals(set0, iter.getInitialSolutionSetInput().getShipStrategyKeys());
    Assert.assertEquals(ShipStrategyType.PARTITION_HASH, iter.getInitialWorksetInput().getShipStrategy());
    Assert.assertEquals(set0, iter.getInitialWorksetInput().getShipStrategyKeys());
   
    Assert.assertEquals(ShipStrategyType.FORWARD, neighborsJoin.getInput1().getShipStrategy()); // workset
    Assert.assertEquals(ShipStrategyType.PARTITION_HASH, neighborsJoin.getInput2().getShipStrategy()); // edges
    Assert.assertEquals(set0, neighborsJoin.getInput2().getShipStrategyKeys());
    Assert.assertTrue(neighborsJoin.getInput2().getTempMode().isCached());
   
    Assert.assertEquals(ShipStrategyType.PARTITION_HASH, minIdReducer.getInput().getShipStrategy());
    Assert.assertEquals(set0, minIdReducer.getInput().getShipStrategyKeys());
    Assert.assertEquals(ShipStrategyType.FORWARD, minIdCombiner.getInput().getShipStrategy());
   
    Assert.assertEquals(ShipStrategyType.FORWARD, updatingMatch.getInput1().getShipStrategy()); // solution set
    Assert.assertEquals(ShipStrategyType.FORWARD, updatingMatch.getInput2().getShipStrategy()); // min id
   
    // test all the local strategies
    Assert.assertEquals(LocalStrategy.NONE, sink.getInput().getLocalStrategy());
    Assert.assertEquals(LocalStrategy.NONE, iter.getInitialSolutionSetInput().getLocalStrategy());
//    Assert.assertEquals(LocalStrategy.NONE, iter.getInitialWorksetInput().getLocalStrategy());
   
    Assert.assertEquals(LocalStrategy.NONE, neighborsJoin.getInput1().getLocalStrategy()); // workset
    Assert.assertEquals(LocalStrategy.NONE, neighborsJoin.getInput2().getLocalStrategy()); // edges
   
    Assert.assertEquals(LocalStrategy.COMBININGSORT, minIdReducer.getInput().getLocalStrategy());
    Assert.assertEquals(set0, minIdReducer.getInput().getLocalStrategyKeys());
    Assert.assertEquals(LocalStrategy.NONE, minIdCombiner.getInput().getLocalStrategy());
   
    Assert.assertEquals(LocalStrategy.NONE, updatingMatch.getInput1().getLocalStrategy()); // min id
    Assert.assertEquals(LocalStrategy.NONE, updatingMatch.getInput2().getLocalStrategy()); // solution set
   
    // check the dams
    Assert.assertTrue(TempMode.PIPELINE_BREAKER == iter.getInitialWorksetInput().getTempMode() ||
              LocalStrategy.SORT == iter.getInitialWorksetInput().getLocalStrategy());
   
    NepheleJobGraphGenerator jgg = new NepheleJobGraphGenerator();
    jgg.compileJobGraph(optPlan);
  }
 
 
  @ConstantFieldsSecond(0)
  public static final class UpdateComponentIdMatchMirrored extends JoinFunction implements Serializable {
   
    private static final long serialVersionUID = 1L;

    @Override
    public void join(Record currentVertexWithComponent, Record newVertexWithComponent, Collector<Record> out){
 
      long candidateComponentID = newVertexWithComponent.getField(1, LongValue.class).getValue();
      long currentComponentID = currentVertexWithComponent.getField(1, LongValue.class).getValue();
 
      if (candidateComponentID < currentComponentID) {
        out.collect(newVertexWithComponent);
      }
    }
  }
 
  @SuppressWarnings("unchecked")
  private static Plan getPlanForWorksetConnectedComponentsWithSolutionSetAsFirstInput(
      int numSubTasks, String verticesInput, String edgeInput, String output, int maxIterations)
  {
    // create DataSourceContract for the vertices
    FileDataSource initialVertices = new FileDataSource(new CsvInputFormat(' ', LongValue.class), verticesInput, "Vertices");
   
    MapOperator verticesWithId = MapOperator.builder(DuplicateLongMap.class).input(initialVertices).name("Assign Vertex Ids").build();
   
    DeltaIteration iteration = new DeltaIteration(0, "Connected Components Iteration");
    iteration.setInitialSolutionSet(verticesWithId);
    iteration.setInitialWorkset(verticesWithId);
    iteration.setMaximumNumberOfIterations(maxIterations);
   
    // create DataSourceContract for the edges
    FileDataSource edges = new FileDataSource(new CsvInputFormat(' ', LongValue.class, LongValue.class), edgeInput, "Edges");

    // create CrossOperator for distance computation
    JoinOperator joinWithNeighbors = JoinOperator.builder(new NeighborWithComponentIDJoin(), LongValue.class, 0, 0)
        .input1(iteration.getWorkset())
        .input2(edges)
        .name("Join Candidate Id With Neighbor")
        .build();

    // create ReduceOperator for finding the nearest cluster centers
    ReduceOperator minCandidateId = ReduceOperator.builder(new MinimumComponentIDReduce(), LongValue.class, 0)
        .input(joinWithNeighbors)
        .name("Find Minimum Candidate Id")
        .build();
   
    // create CrossOperator for distance computation
    JoinOperator updateComponentId = JoinOperator.builder(new UpdateComponentIdMatchMirrored(), LongValue.class, 0, 0)
        .input1(iteration.getSolutionSet())
        .input2(minCandidateId)
        .name("Update Component Id")
        .build();
   
    iteration.setNextWorkset(updateComponentId);
    iteration.setSolutionSetDelta(updateComponentId);

    // create DataSinkContract for writing the new cluster positions
    FileDataSink result = new FileDataSink(new CsvOutputFormat(), output, iteration, "Result");
    CsvOutputFormat.configureRecordFormat(result)
      .recordDelimiter('\n')
      .fieldDelimiter(' ')
      .field(LongValue.class, 0)
      .field(LongValue.class, 1);

    // return the plan
    Plan plan = new Plan(result, "Workset Connected Components");
    plan.setDefaultParallelism(numSubTasks);
    return plan;
  }
}
TOP

Related Classes of eu.stratosphere.test.compiler.iterations.ConnectedComponentsTest$UpdateComponentIdMatchMirrored

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.