Package eu.stratosphere.test.iterative

Source Code of eu.stratosphere.test.iterative.ConnectedComponentsWithDeferredUpdateITCase$IdentityMap

/***********************************************************************************************************************
* Copyright (C) 2010-2013 by the Stratosphere project (http://stratosphere.eu)
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
* an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
* specific language governing permissions and limitations under the License.
**********************************************************************************************************************/

package eu.stratosphere.test.iterative;

import java.io.BufferedReader;
import java.io.Serializable;
import java.util.Collection;

import eu.stratosphere.test.util.RecordAPITestBase;
import org.junit.runner.RunWith;
import org.junit.runners.Parameterized;
import org.junit.runners.Parameterized.Parameters;

import eu.stratosphere.api.common.Plan;
import eu.stratosphere.api.java.record.operators.FileDataSink;
import eu.stratosphere.api.java.record.operators.FileDataSource;
import eu.stratosphere.api.java.record.operators.DeltaIteration;
import eu.stratosphere.api.java.record.functions.JoinFunction;
import eu.stratosphere.api.java.record.functions.MapFunction;
import eu.stratosphere.api.java.record.io.CsvInputFormat;
import eu.stratosphere.api.java.record.io.CsvOutputFormat;
import eu.stratosphere.api.java.record.operators.JoinOperator;
import eu.stratosphere.api.java.record.operators.MapOperator;
import eu.stratosphere.api.java.record.operators.ReduceOperator;
import eu.stratosphere.configuration.Configuration;
import eu.stratosphere.test.recordJobs.graph.WorksetConnectedComponents.DuplicateLongMap;
import eu.stratosphere.test.recordJobs.graph.WorksetConnectedComponents.MinimumComponentIDReduce;
import eu.stratosphere.test.recordJobs.graph.WorksetConnectedComponents.NeighborWithComponentIDJoin;
import eu.stratosphere.test.testdata.ConnectedComponentsData;
import eu.stratosphere.types.LongValue;
import eu.stratosphere.types.Record;
import eu.stratosphere.util.Collector;

@RunWith(Parameterized.class)
public class ConnectedComponentsWithDeferredUpdateITCase extends RecordAPITestBase {
 
  private static final long SEED = 0xBADC0FFEEBEEFL;
 
  private static final int NUM_VERTICES = 1000;
 
  private static final int NUM_EDGES = 10000;

 
  protected String verticesPath;
  protected String edgesPath;
  protected String resultPath;
 
 
  public ConnectedComponentsWithDeferredUpdateITCase(Configuration config) {
    super(config);
  }
 
  @Override
  protected void preSubmit() throws Exception {
    verticesPath = createTempFile("vertices.txt", ConnectedComponentsData.getEnumeratingVertices(NUM_VERTICES));
    edgesPath = createTempFile("edges.txt", ConnectedComponentsData.getRandomOddEvenEdges(NUM_EDGES, NUM_VERTICES, SEED));
    resultPath = getTempFilePath("results");
  }
 
  @Override
  protected Plan getTestJob() {
    boolean extraMapper = config.getBoolean("ExtraMapper", false);
    return getPlan(4, verticesPath, edgesPath, resultPath, 100, extraMapper);
  }

  @Override
  protected void postSubmit() throws Exception {
    for (BufferedReader reader : getResultReader(resultPath)) {
      ConnectedComponentsData.checkOddEvenResult(reader);
    }
  }

  @Parameters
  public static Collection<Object[]> getConfigurations() {
    Configuration config1 = new Configuration();
    config1.setBoolean("ExtraMapper", false);
   
    Configuration config2 = new Configuration();
    config2.setBoolean("ExtraMapper", true);
   
    return toParameterList(config1, config2);
  }
 
  @SuppressWarnings("unchecked")
  public static Plan getPlan(int numSubTasks, String verticesInput, String edgeInput, String output, int maxIterations, boolean extraMap) {

    // data source for initial vertices
    FileDataSource initialVertices = new FileDataSource(new CsvInputFormat(' ', LongValue.class), verticesInput, "Vertices");
   
    MapOperator verticesWithId = MapOperator.builder(DuplicateLongMap.class).input(initialVertices).name("Assign Vertex Ids").build();
   
    // the loop takes the vertices as the solution set and changed vertices as the workset
    // initially, all vertices are changed
    DeltaIteration iteration = new DeltaIteration(0, "Connected Components Iteration");
    iteration.setInitialSolutionSet(verticesWithId);
    iteration.setInitialWorkset(verticesWithId);
    iteration.setMaximumNumberOfIterations(maxIterations);
   
    // data source for the edges
    FileDataSource edges = new FileDataSource(new CsvInputFormat(' ', LongValue.class, LongValue.class), edgeInput, "Edges");

    // join workset (changed vertices) with the edges to propagate changes to neighbors
    JoinOperator joinWithNeighbors = JoinOperator.builder(new NeighborWithComponentIDJoin(), LongValue.class, 0, 0)
        .input1(iteration.getWorkset())
        .input2(edges)
        .name("Join Candidate Id With Neighbor")
        .build();

    // find for each neighbor the smallest of all candidates
    ReduceOperator minCandidateId = ReduceOperator.builder(new MinimumComponentIDReduce(), LongValue.class, 0)
        .input(joinWithNeighbors)
        .name("Find Minimum Candidate Id")
        .build();
   
    // join candidates with the solution set and update if the candidate component-id is smaller
    JoinOperator updateComponentId = JoinOperator.builder(new UpdateComponentIdMatchNonPreserving(), LongValue.class, 0, 0)
        .input1(minCandidateId)
        .input2(iteration.getSolutionSet())
        .name("Update Component Id")
        .build();
   
    if (extraMap) {
      MapOperator mapper = MapOperator.builder(IdentityMap.class).input(updateComponentId).name("idmap").build();
      iteration.setSolutionSetDelta(mapper);
    } else {
      iteration.setSolutionSetDelta(updateComponentId);
    }
   
    iteration.setNextWorkset(updateComponentId);

    // sink is the iteration result
    FileDataSink result = new FileDataSink(new CsvOutputFormat("\n", " ", LongValue.class, LongValue.class), output, iteration, "Result");

    // return the PACT plan
    Plan plan = new Plan(result, "Workset Connected Components");
    plan.setDefaultParallelism(numSubTasks);
    return plan;
  }
 
  public static final class UpdateComponentIdMatchNonPreserving extends JoinFunction implements Serializable {
    private static final long serialVersionUID = 1L;

    @Override
    public void join(Record newVertexWithComponent, Record currentVertexWithComponent, Collector<Record> out){
 
      long candidateComponentID = newVertexWithComponent.getField(1, LongValue.class).getValue();
      long currentComponentID = currentVertexWithComponent.getField(1, LongValue.class).getValue();
 
      if (candidateComponentID < currentComponentID) {
        out.collect(newVertexWithComponent);
      }
    }
  }
 
  public static final class IdentityMap extends MapFunction {
    private static final long serialVersionUID = 1L;

    @Override
    public void map(Record record, Collector<Record> out) throws Exception {
      out.collect(record);
    }
  }
}
TOP

Related Classes of eu.stratosphere.test.iterative.ConnectedComponentsWithDeferredUpdateITCase$IdentityMap

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.