/***********************************************************************************************************************
* Copyright (C) 2010-2013 by the Stratosphere project (http://stratosphere.eu)
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
* an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
* specific language governing permissions and limitations under the License.
**********************************************************************************************************************/
package eu.stratosphere.compiler.dag;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import eu.stratosphere.api.common.operators.base.DeltaIterationBase;
import eu.stratosphere.api.common.operators.util.FieldList;
import eu.stratosphere.compiler.CompilerException;
import eu.stratosphere.compiler.DataStatistics;
import eu.stratosphere.compiler.PactCompiler.InterestingPropertyVisitor;
import eu.stratosphere.compiler.costs.CostEstimator;
import eu.stratosphere.compiler.dataproperties.GlobalProperties;
import eu.stratosphere.compiler.dataproperties.InterestingProperties;
import eu.stratosphere.compiler.dataproperties.LocalProperties;
import eu.stratosphere.compiler.dataproperties.PartitioningProperty;
import eu.stratosphere.compiler.dataproperties.RequestedGlobalProperties;
import eu.stratosphere.compiler.dataproperties.RequestedLocalProperties;
import eu.stratosphere.compiler.operators.OperatorDescriptorDual;
import eu.stratosphere.compiler.operators.SolutionSetDeltaOperator;
import eu.stratosphere.compiler.plan.Channel;
import eu.stratosphere.compiler.plan.DualInputPlanNode;
import eu.stratosphere.compiler.plan.NamedChannel;
import eu.stratosphere.compiler.plan.PlanNode;
import eu.stratosphere.compiler.plan.SingleInputPlanNode;
import eu.stratosphere.compiler.plan.SolutionSetPlanNode;
import eu.stratosphere.compiler.plan.WorksetIterationPlanNode;
import eu.stratosphere.compiler.plan.WorksetPlanNode;
import eu.stratosphere.compiler.util.NoOpBinaryUdfOp;
import eu.stratosphere.pact.runtime.shipping.ShipStrategyType;
import eu.stratosphere.pact.runtime.task.DriverStrategy;
import eu.stratosphere.pact.runtime.task.util.LocalStrategy;
import eu.stratosphere.types.Nothing;
import eu.stratosphere.types.NothingTypeInfo;
import eu.stratosphere.util.Visitor;
/**
* A node in the optimizer's program representation for a workset iteration.
*/
public class WorksetIterationNode extends TwoInputNode implements IterationNode {
private static final int DEFAULT_COST_WEIGHT = 20;
private final FieldList solutionSetKeyFields;
private final GlobalProperties partitionedProperties;
private SolutionSetNode solutionSetNode;
private WorksetNode worksetNode;
private OptimizerNode solutionSetDelta;
private OptimizerNode nextWorkset;
private PactConnection solutionSetDeltaRootConnection;
private PactConnection nextWorksetRootConnection;
private SingleRootJoiner singleRoot;
private boolean solutionDeltaImmediatelyAfterSolutionJoin;
private final int costWeight;
// --------------------------------------------------------------------------------------------
/**
* Creates a new node with a single input for the optimizer plan.
*
* @param iteration The iteration operator that the node represents.
*/
public WorksetIterationNode(DeltaIterationBase<?, ?> iteration) {
super(iteration);
final int[] ssKeys = iteration.getSolutionSetKeyFields();
if (ssKeys == null || ssKeys.length == 0) {
throw new CompilerException("Invalid WorksetIteration: No key fields defined for the solution set.");
}
this.solutionSetKeyFields = new FieldList(ssKeys);
this.partitionedProperties = new GlobalProperties();
this.partitionedProperties.setHashPartitioned(this.solutionSetKeyFields);
int weight = iteration.getMaximumNumberOfIterations() > 0 ?
iteration.getMaximumNumberOfIterations() : DEFAULT_COST_WEIGHT;
if (weight > OptimizerNode.MAX_DYNAMIC_PATH_COST_WEIGHT) {
weight = OptimizerNode.MAX_DYNAMIC_PATH_COST_WEIGHT;
}
this.costWeight = weight;
this.possibleProperties.add(new WorksetOpDescriptor(this.solutionSetKeyFields));
}
// --------------------------------------------------------------------------------------------
public DeltaIterationBase<?, ?> getIterationContract() {
return (DeltaIterationBase<?, ?>) getPactContract();
}
public SolutionSetNode getSolutionSetNode() {
return this.solutionSetNode;
}
public WorksetNode getWorksetNode() {
return this.worksetNode;
}
public OptimizerNode getNextWorkset() {
return this.nextWorkset;
}
public OptimizerNode getSolutionSetDelta() {
return this.solutionSetDelta;
}
public void setPartialSolution(SolutionSetNode solutionSetNode, WorksetNode worksetNode) {
if (this.solutionSetNode != null || this.worksetNode != null) {
throw new IllegalStateException("Error: Initializing WorksetIterationNode multiple times.");
}
this.solutionSetNode = solutionSetNode;
this.worksetNode = worksetNode;
}
public void setNextPartialSolution(OptimizerNode solutionSetDelta, OptimizerNode nextWorkset) {
// check whether the next partial solution is itself the join with
// the partial solution (so we can potentially do direct updates)
if (solutionSetDelta instanceof TwoInputNode) {
TwoInputNode solutionDeltaTwoInput = (TwoInputNode) solutionSetDelta;
if (solutionDeltaTwoInput.getFirstPredecessorNode() == this.solutionSetNode ||
solutionDeltaTwoInput.getSecondPredecessorNode() == this.solutionSetNode)
{
this.solutionDeltaImmediatelyAfterSolutionJoin = true;
}
}
// attach an extra node to the solution set delta for the cases where we need to repartition
UnaryOperatorNode solutionSetDeltaUpdateAux = new UnaryOperatorNode("Solution-Set Delta", getSolutionSetKeyFields(),
new SolutionSetDeltaOperator(getSolutionSetKeyFields()));
solutionSetDeltaUpdateAux.setDegreeOfParallelism(getDegreeOfParallelism());
solutionSetDeltaUpdateAux.setSubtasksPerInstance(getSubtasksPerInstance());
PactConnection conn = new PactConnection(solutionSetDelta, solutionSetDeltaUpdateAux);
solutionSetDeltaUpdateAux.setIncomingConnection(conn);
solutionSetDelta.addOutgoingConnection(conn);
this.solutionSetDelta = solutionSetDeltaUpdateAux;
this.nextWorkset = nextWorkset;
this.singleRoot = new SingleRootJoiner();
this.solutionSetDeltaRootConnection = new PactConnection(solutionSetDeltaUpdateAux, this.singleRoot);
this.nextWorksetRootConnection = new PactConnection(nextWorkset, this.singleRoot);
this.singleRoot.setInputs(this.solutionSetDeltaRootConnection, this.nextWorksetRootConnection);
solutionSetDeltaUpdateAux.addOutgoingConnection(this.solutionSetDeltaRootConnection);
nextWorkset.addOutgoingConnection(this.nextWorksetRootConnection);
}
public int getCostWeight() {
return this.costWeight;
}
public TwoInputNode getSingleRootOfStepFunction() {
return this.singleRoot;
}
public FieldList getSolutionSetKeyFields() {
return this.solutionSetKeyFields;
}
public OptimizerNode getInitialSolutionSetPredecessorNode() {
return getFirstPredecessorNode();
}
public OptimizerNode getInitialWorksetPredecessorNode() {
return getSecondPredecessorNode();
}
// --------------------------------------------------------------------------------------------
@Override
public String getName() {
return "Workset Iteration";
}
@Override
public boolean isFieldConstant(int input, int fieldNumber) {
return false;
}
protected void readStubAnnotations() {}
@Override
protected void computeOperatorSpecificDefaultEstimates(DataStatistics statistics) {
this.estimatedOutputSize = getFirstPredecessorNode().getEstimatedOutputSize();
this.estimatedNumRecords = getFirstPredecessorNode().getEstimatedNumRecords();
}
// --------------------------------------------------------------------------------------------
// Properties and Optimization
// --------------------------------------------------------------------------------------------
@Override
public boolean isMemoryConsumer() {
return true;
}
@Override
protected List<OperatorDescriptorDual> getPossibleProperties() {
return new ArrayList<OperatorDescriptorDual>(1);
}
@Override
public void computeInterestingPropertiesForInputs(CostEstimator estimator) {
// our own solution (the solution set) is always partitioned and this cannot be adjusted
// depending on what the successor to the workset iteration requests. for that reason,
// we ignore incoming interesting properties.
// in addition, we need to make 2 interesting property passes, because the root of the step function
// that computes the next workset needs the interesting properties as generated by the
// workset source of the step function. the second pass concerns only the workset path.
// as initial interesting properties, we have the trivial ones for the step function,
// and partitioned on the solution set key for the solution set delta
RequestedGlobalProperties partitionedProperties = new RequestedGlobalProperties();
partitionedProperties.setHashPartitioned(this.solutionSetKeyFields);
InterestingProperties partitionedIP = new InterestingProperties();
partitionedIP.addGlobalProperties(partitionedProperties);
partitionedIP.addLocalProperties(new RequestedLocalProperties());
this.nextWorksetRootConnection.setInterestingProperties(new InterestingProperties());
this.solutionSetDeltaRootConnection.setInterestingProperties(partitionedIP.clone());
InterestingPropertyVisitor ipv = new InterestingPropertyVisitor(estimator);
this.nextWorkset.accept(ipv);
this.solutionSetDelta.accept(ipv);
// take the interesting properties of the partial solution and add them to the root interesting properties
InterestingProperties worksetIntProps = this.worksetNode.getInterestingProperties();
InterestingProperties intProps = new InterestingProperties();
intProps.getGlobalProperties().addAll(worksetIntProps.getGlobalProperties());
intProps.getLocalProperties().addAll(worksetIntProps.getLocalProperties());
// clear all interesting properties to prepare the second traversal
this.nextWorksetRootConnection.clearInterestingProperties();
this.nextWorkset.accept(InterestingPropertiesClearer.INSTANCE);
// 2nd pass
this.nextWorksetRootConnection.setInterestingProperties(intProps);
this.nextWorkset.accept(ipv);
// now add the interesting properties of the workset to the workset input
final InterestingProperties inProps = this.worksetNode.getInterestingProperties().clone();
inProps.addGlobalProperties(new RequestedGlobalProperties());
inProps.addLocalProperties(new RequestedLocalProperties());
this.input2.setInterestingProperties(inProps);
// the partial solution must be hash partitioned, so it has only that as interesting properties
this.input1.setInterestingProperties(partitionedIP);
}
@Override
protected void instantiate(OperatorDescriptorDual operator, Channel solutionSetIn, Channel worksetIn,
List<Set<? extends NamedChannel>> broadcastPlanChannels, List<PlanNode> target, CostEstimator estimator,
RequestedGlobalProperties globPropsReqSolutionSet,RequestedGlobalProperties globPropsReqWorkset,
RequestedLocalProperties locPropsReqSolutionSet, RequestedLocalProperties locPropsReqWorkset)
{
// check for pipeline breaking using hash join with build on the solution set side
placePipelineBreakersIfNecessary(DriverStrategy.HYBRIDHASH_BUILD_FIRST, solutionSetIn, worksetIn);
// NOTES ON THE ENUMERATION OF THE STEP FUNCTION PLANS:
// Whenever we instantiate the iteration, we enumerate new candidates for the step function.
// That way, we make sure we have an appropriate plan for each candidate for the initial partial solution,
// we have a fitting candidate for the step function (often, work is pushed out of the step function).
// Among the candidates of the step function, we keep only those that meet the requested properties of the
// current candidate initial partial solution. That makes sure these properties exist at the beginning of
// every iteration.
// 1) Because we enumerate multiple times, we may need to clean the cached plans
// before starting another enumeration
this.nextWorkset.accept(PlanCacheCleaner.INSTANCE);
this.solutionSetDelta.accept(PlanCacheCleaner.INSTANCE);
// 2) Give the partial solution the properties of the current candidate for the initial partial solution
// This concerns currently only the workset.
this.worksetNode.setCandidateProperties(worksetIn.getGlobalProperties(), worksetIn.getLocalProperties(), worksetIn);
this.solutionSetNode.setCandidateProperties(this.partitionedProperties, new LocalProperties(), solutionSetIn);
final SolutionSetPlanNode sspn = this.solutionSetNode.getCurrentSolutionSetPlanNode();
final WorksetPlanNode wspn = this.worksetNode.getCurrentWorksetPlanNode();
// 3) Get the alternative plans
List<PlanNode> solutionSetDeltaCandidates = this.solutionSetDelta.getAlternativePlans(estimator);
List<PlanNode> worksetCandidates = this.nextWorkset.getAlternativePlans(estimator);
// 4) Throw away all that are not compatible with the properties currently requested to the
// initial partial solution
// Make sure that the workset candidates fulfill the input requirements
for (Iterator<PlanNode> planDeleter = worksetCandidates.iterator(); planDeleter.hasNext(); ) {
PlanNode candidate = planDeleter.next();
if (!(globPropsReqWorkset.isMetBy(candidate.getGlobalProperties()) && locPropsReqWorkset.isMetBy(candidate.getLocalProperties()))) {
planDeleter.remove();
}
}
if (worksetCandidates.isEmpty()) {
return;
}
// sanity check the solution set delta and cancel out the delta node, if it is not needed
for (Iterator<PlanNode> deltaPlans = solutionSetDeltaCandidates.iterator(); deltaPlans.hasNext(); ) {
SingleInputPlanNode candidate = (SingleInputPlanNode) deltaPlans.next();
GlobalProperties gp = candidate.getGlobalProperties();
if (gp.getPartitioning() != PartitioningProperty.HASH_PARTITIONED || gp.getPartitioningFields() == null ||
!gp.getPartitioningFields().equals(this.solutionSetKeyFields))
{
throw new CompilerException("Bug: The solution set delta is not partitioned.");
}
}
// 5) Create a candidate for the Iteration Node for every remaining plan of the step function.
final GlobalProperties gp = new GlobalProperties();
gp.setHashPartitioned(this.solutionSetKeyFields);
gp.addUniqueFieldCombination(this.solutionSetKeyFields);
final LocalProperties lp = new LocalProperties();
lp.addUniqueFields(this.solutionSetKeyFields);
// take all combinations of solution set delta and workset plans
for (PlanNode solutionSetCandidate : solutionSetDeltaCandidates) {
for (PlanNode worksetCandidate : worksetCandidates) {
// check whether they have the same operator at their latest branching point
if (this.singleRoot.areBranchCompatible(solutionSetCandidate, worksetCandidate)) {
SingleInputPlanNode siSolutionDeltaCandidate = (SingleInputPlanNode) solutionSetCandidate;
boolean immediateDeltaUpdate;
// check whether we need a dedicated solution set delta operator, or whether we can update on the fly
if (siSolutionDeltaCandidate.getInput().getShipStrategy() == ShipStrategyType.FORWARD && this.solutionDeltaImmediatelyAfterSolutionJoin) {
// we do not need this extra node. we can make the predecessor the delta
// sanity check the node and connection
if (siSolutionDeltaCandidate.getDriverStrategy() != DriverStrategy.UNARY_NO_OP || siSolutionDeltaCandidate.getInput().getLocalStrategy() != LocalStrategy.NONE) {
throw new CompilerException("Invalid Solution set delta node.");
}
solutionSetCandidate = siSolutionDeltaCandidate.getInput().getSource();
immediateDeltaUpdate = true;
} else {
// was not partitioned, we need to keep this node.
// mark that we materialize the input
siSolutionDeltaCandidate.getInput().setTempMode(TempMode.PIPELINE_BREAKER);
immediateDeltaUpdate = false;
}
WorksetIterationPlanNode wsNode = new WorksetIterationPlanNode(
this, "WorksetIteration ("+this.getPactContract().getName()+")", solutionSetIn, worksetIn, sspn, wspn, worksetCandidate, solutionSetCandidate);
wsNode.setImmediateSolutionSetUpdate(immediateDeltaUpdate);
wsNode.initProperties(gp, lp);
target.add(wsNode);
}
}
}
}
@Override
public void computeUnclosedBranchStack() {
if (this.openBranches != null) {
return;
}
// IMPORTANT: First compute closed branches from the two inputs
// we need to do this because the runtime iteration head effectively joins
addClosedBranches(getFirstPredecessorNode().closedBranchingNodes);
addClosedBranches(getSecondPredecessorNode().closedBranchingNodes);
List<UnclosedBranchDescriptor> result1 = getFirstPredecessorNode().getBranchesForParent(getFirstIncomingConnection());
List<UnclosedBranchDescriptor> result2 = getSecondPredecessorNode().getBranchesForParent(getSecondIncomingConnection());
ArrayList<UnclosedBranchDescriptor> inputsMerged1 = new ArrayList<UnclosedBranchDescriptor>();
mergeLists(result1, result2, inputsMerged1);
addClosedBranches(getSingleRootOfStepFunction().closedBranchingNodes);
ArrayList<UnclosedBranchDescriptor> inputsMerged2 = new ArrayList<UnclosedBranchDescriptor>();
List<UnclosedBranchDescriptor> result3 = getSingleRootOfStepFunction().openBranches;
mergeLists(inputsMerged1, result3, inputsMerged2);
// handle the data flow branching for the broadcast inputs
List<UnclosedBranchDescriptor> result = computeUnclosedBranchStackForBroadcastInputs(inputsMerged2);
this.openBranches = (result == null || result.isEmpty()) ? Collections.<UnclosedBranchDescriptor>emptyList() : result;
}
// --------------------------------------------------------------------------------------------
// Iteration Specific Traversals
// --------------------------------------------------------------------------------------------
public void acceptForStepFunction(Visitor<OptimizerNode> visitor) {
this.singleRoot.accept(visitor);
}
// --------------------------------------------------------------------------------------------
// Utility Classes
// --------------------------------------------------------------------------------------------
private static final class WorksetOpDescriptor extends OperatorDescriptorDual {
private WorksetOpDescriptor(FieldList solutionSetKeys) {
super(solutionSetKeys, null);
}
@Override
public DriverStrategy getStrategy() {
return DriverStrategy.NONE;
}
@Override
protected List<GlobalPropertiesPair> createPossibleGlobalProperties() {
RequestedGlobalProperties partitionedGp = new RequestedGlobalProperties();
partitionedGp.setHashPartitioned(this.keys1);
return Collections.singletonList(new GlobalPropertiesPair(partitionedGp, new RequestedGlobalProperties()));
}
@Override
protected List<LocalPropertiesPair> createPossibleLocalProperties() {
// all properties are possible
return Collections.singletonList(new LocalPropertiesPair(
new RequestedLocalProperties(), new RequestedLocalProperties()));
}
@Override
public boolean areCoFulfilled(RequestedLocalProperties requested1, RequestedLocalProperties requested2,
LocalProperties produced1, LocalProperties produced2) {
return true;
}
@Override
public DualInputPlanNode instantiate(Channel in1, Channel in2, TwoInputNode node) {
throw new UnsupportedOperationException();
}
@Override
public GlobalProperties computeGlobalProperties(GlobalProperties in1, GlobalProperties in2) {
throw new UnsupportedOperationException();
}
@Override
public LocalProperties computeLocalProperties(LocalProperties in1, LocalProperties in2) {
throw new UnsupportedOperationException();
}
}
public static class SingleRootJoiner extends TwoInputNode {
SingleRootJoiner() {
super(new NoOpBinaryUdfOp<Nothing>(new NothingTypeInfo()));
setDegreeOfParallelism(1);
setSubtasksPerInstance(1);
}
public void setInputs(PactConnection input1, PactConnection input2) {
this.input1 = input1;
this.input2 = input2;
}
@Override
public String getName() {
return "Internal Utility Node";
}
@Override
protected List<OperatorDescriptorDual> getPossibleProperties() {
return Collections.emptyList();
}
@Override
protected void computeOperatorSpecificDefaultEstimates(DataStatistics statistics) {
// no estimates are needed here
}
}
}