Package eu.stratosphere.test.iterative

Source Code of eu.stratosphere.test.iterative.ConnectedComponentsWithSolutionSetFirstITCase$UpdateComponentIdMatchMirrored

/***********************************************************************************************************************
* Copyright (C) 2010-2013 by the Stratosphere project (http://stratosphere.eu)
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
* an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
* specific language governing permissions and limitations under the License.
**********************************************************************************************************************/

package eu.stratosphere.test.iterative;

import java.io.BufferedReader;
import java.io.Serializable;

import eu.stratosphere.api.common.Plan;
import eu.stratosphere.api.java.record.operators.DeltaIteration;
import eu.stratosphere.api.java.record.operators.FileDataSink;
import eu.stratosphere.api.java.record.operators.FileDataSource;
import eu.stratosphere.api.java.record.functions.FunctionAnnotation.ConstantFieldsSecondExcept;
import eu.stratosphere.api.java.record.functions.JoinFunction;
import eu.stratosphere.api.java.record.io.CsvInputFormat;
import eu.stratosphere.api.java.record.io.CsvOutputFormat;
import eu.stratosphere.api.java.record.operators.JoinOperator;
import eu.stratosphere.api.java.record.operators.MapOperator;
import eu.stratosphere.api.java.record.operators.ReduceOperator;
import eu.stratosphere.test.recordJobs.graph.WorksetConnectedComponents.DuplicateLongMap;
import eu.stratosphere.test.recordJobs.graph.WorksetConnectedComponents.MinimumComponentIDReduce;
import eu.stratosphere.test.recordJobs.graph.WorksetConnectedComponents.NeighborWithComponentIDJoin;
import eu.stratosphere.test.testdata.ConnectedComponentsData;
import eu.stratosphere.test.util.RecordAPITestBase;
import eu.stratosphere.types.LongValue;
import eu.stratosphere.types.Record;
import eu.stratosphere.util.Collector;

/**
* Tests a bug that prevented that the solution set can be on both sides of the match/cogroup function.
*/
public class ConnectedComponentsWithSolutionSetFirstITCase extends RecordAPITestBase {
 
  private static final long SEED = 0xBADC0FFEEBEEFL;
 
  private static final int NUM_VERTICES = 1000;
 
  private static final int NUM_EDGES = 10000;

 
  protected String verticesPath;
  protected String edgesPath;
  protected String resultPath;
 
 
  @Override
  protected void preSubmit() throws Exception {
    verticesPath = createTempFile("vertices.txt", ConnectedComponentsData.getEnumeratingVertices(NUM_VERTICES));
    edgesPath = createTempFile("edges.txt", ConnectedComponentsData.getRandomOddEvenEdges(NUM_EDGES, NUM_VERTICES, SEED));
    resultPath = getTempFilePath("results");
  }
 
  @Override
  protected Plan getTestJob() {
    return getPlanForWorksetConnectedComponentsWithSolutionSetAsFirstInput(4, verticesPath, edgesPath, resultPath, 100);
  }

  @Override
  protected void postSubmit() throws Exception {
    for (BufferedReader reader : getResultReader(resultPath)) {
      ConnectedComponentsData.checkOddEvenResult(reader);
    }
  }
 
  // --------------------------------------------------------------------------------------------
  //  Classes and methods for the test program
  // --------------------------------------------------------------------------------------------
 
  @ConstantFieldsSecondExcept({})
  public static final class UpdateComponentIdMatchMirrored extends JoinFunction implements Serializable {
   
    private static final long serialVersionUID = 1L;

    @Override
    public void join(Record currentVertexWithComponent, Record newVertexWithComponent, Collector<Record> out){
 
      long candidateComponentID = newVertexWithComponent.getField(1, LongValue.class).getValue();
      long currentComponentID = currentVertexWithComponent.getField(1, LongValue.class).getValue();
 
      if (candidateComponentID < currentComponentID) {
        out.collect(newVertexWithComponent);
      }
    }
  }
 
  @SuppressWarnings("unchecked")
  private static Plan getPlanForWorksetConnectedComponentsWithSolutionSetAsFirstInput(
      int numSubTasks, String verticesInput, String edgeInput, String output, int maxIterations)
  {
    // data source for initial vertices
    FileDataSource initialVertices = new FileDataSource(new CsvInputFormat(' ', LongValue.class), verticesInput, "Vertices");
   
    MapOperator verticesWithId = MapOperator.builder(DuplicateLongMap.class).input(initialVertices).name("Assign Vertex Ids").build();
   
    DeltaIteration iteration = new DeltaIteration(0, "Connected Components Iteration");
    iteration.setInitialSolutionSet(verticesWithId);
    iteration.setInitialWorkset(verticesWithId);
    iteration.setMaximumNumberOfIterations(maxIterations);
   
    // create DataSourceContract for the edges
    FileDataSource edges = new FileDataSource(new CsvInputFormat(' ', LongValue.class, LongValue.class), edgeInput, "Edges");

    // create CrossOperator for distance computation
    JoinOperator joinWithNeighbors = JoinOperator.builder(new NeighborWithComponentIDJoin(), LongValue.class, 0, 0)
        .input1(iteration.getWorkset())
        .input2(edges)
        .name("Join Candidate Id With Neighbor")
        .build();

    // create ReduceOperator for finding the nearest cluster centers
    ReduceOperator minCandidateId = ReduceOperator.builder(new MinimumComponentIDReduce(), LongValue.class, 0)
        .input(joinWithNeighbors)
        .name("Find Minimum Candidate Id")
        .build();
   
    // create CrossOperator for distance computation
    JoinOperator updateComponentId = JoinOperator.builder(new UpdateComponentIdMatchMirrored(), LongValue.class, 0, 0)
        .input1(iteration.getSolutionSet())
        .input2(minCandidateId)
        .name("Update Component Id")
        .build();
   
    iteration.setNextWorkset(updateComponentId);
    iteration.setSolutionSetDelta(updateComponentId);

    // create DataSinkContract for writing the new cluster positions
    FileDataSink result = new FileDataSink(new CsvOutputFormat(), output, iteration, "Result");
    CsvOutputFormat.configureRecordFormat(result)
      .recordDelimiter('\n')
      .fieldDelimiter(' ')
      .field(LongValue.class, 0)
      .field(LongValue.class, 1);

    // return the PACT plan
    Plan plan = new Plan(result, "Workset Connected Components");
    plan.setDefaultParallelism(numSubTasks);
    return plan;
  }
}
TOP

Related Classes of eu.stratosphere.test.iterative.ConnectedComponentsWithSolutionSetFirstITCase$UpdateComponentIdMatchMirrored

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.