/***********************************************************************************************************************
* Copyright (C) 2010-2013 by the Stratosphere project (http://stratosphere.eu)
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
* an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
* specific language governing permissions and limitations under the License.
**********************************************************************************************************************/
package eu.stratosphere.compiler.plan;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import eu.stratosphere.api.common.operators.Operator;
import eu.stratosphere.api.common.operators.util.FieldSet;
import eu.stratosphere.compiler.CompilerException;
import eu.stratosphere.compiler.costs.Costs;
import eu.stratosphere.compiler.dag.OptimizerNode;
import eu.stratosphere.compiler.dag.OptimizerNode.UnclosedBranchDescriptor;
import eu.stratosphere.compiler.dataproperties.GlobalProperties;
import eu.stratosphere.compiler.dataproperties.LocalProperties;
import eu.stratosphere.compiler.plandump.DumpableConnection;
import eu.stratosphere.compiler.plandump.DumpableNode;
import eu.stratosphere.pact.runtime.task.DriverStrategy;
import eu.stratosphere.util.Visitable;
/**
* The representation of a data exchange between to operators. The data exchange can realize a shipping strategy,
* which established global properties, and a local strategy, which establishes local properties.
* <p>
* Because we currently deal only with plans where the operator order is fixed, many properties are equal
* among candidates and are determined prior to the enumeration (such as for example constant/dynamic path membership).
* Hence, many methods will delegate to the {@code OptimizerNode} that represents the node this candidate was
* created for.
*/
public abstract class PlanNode implements Visitable<PlanNode>, DumpableNode<PlanNode> {
protected final OptimizerNode template;
protected final List<Channel> outChannels;
private List<NamedChannel> broadcastInputs;
private final String nodeName;
private DriverStrategy driverStrategy; // The local strategy (sorting / hashing, ...)
protected LocalProperties localProps; // local properties of the data produced by this node
protected GlobalProperties globalProps; // global properties of the data produced by this node
protected Map<OptimizerNode, PlanNode> branchPlan; // the actual plan alternative chosen at a branch point
protected Costs nodeCosts; // the costs incurred by this node
protected Costs cumulativeCosts; // the cumulative costs of all operators in the sub-tree
private long memoryPerSubTask; // the amount of memory dedicated to each task, in bytes
private int degreeOfParallelism;
private int subtasksPerInstance;
private boolean pFlag; // flag for the internal pruning algorithm
// --------------------------------------------------------------------------------------------
public PlanNode(OptimizerNode template, String nodeName, DriverStrategy strategy) {
this.outChannels = new ArrayList<Channel>(2);
this.broadcastInputs = new ArrayList<NamedChannel>();
this.template = template;
this.nodeName = nodeName;
this.driverStrategy = strategy;
this.degreeOfParallelism = template.getDegreeOfParallelism();
this.subtasksPerInstance = template.getSubtasksPerInstance();
// check, if there is branch at this node. if yes, this candidate must be associated with
// the branching template node.
if (template.isBranching()) {
this.branchPlan = new HashMap<OptimizerNode, PlanNode>(6);
this.branchPlan.put(template, this);
}
}
protected void mergeBranchPlanMaps(PlanNode pred1, PlanNode pred2) {
mergeBranchPlanMaps(pred1.branchPlan, pred2.branchPlan);
}
protected void mergeBranchPlanMaps(Map<OptimizerNode, PlanNode> branchPlan1, Map<OptimizerNode, PlanNode> branchPlan2) {
// merge the branchPlan maps according the the template's uncloseBranchesStack
if (this.template.hasUnclosedBranches()) {
if (this.branchPlan == null) {
this.branchPlan = new HashMap<OptimizerNode, PlanNode>(8);
}
for (UnclosedBranchDescriptor uc : this.template.getOpenBranches()) {
OptimizerNode brancher = uc.getBranchingNode();
PlanNode selectedCandidate = null;
if (branchPlan1 != null) {
// predecessor 1 has branching children, see if it got the branch we are looking for
selectedCandidate = branchPlan1.get(brancher);
}
if (selectedCandidate == null && branchPlan2 != null) {
// predecessor 2 has branching children, see if it got the branch we are looking for
selectedCandidate = branchPlan2.get(brancher);
}
// it may be that the branch candidate is only found once the broadcast variables are set
if (selectedCandidate != null) {
this.branchPlan.put(brancher, selectedCandidate);
}
}
}
}
// --------------------------------------------------------------------------------------------
// Accessors
// --------------------------------------------------------------------------------------------
/**
* Gets the optimizer's pact node for which this plan candidate node was created.
*
* @return The template optimizer's node.
*/
public OptimizerNode getOriginalOptimizerNode() {
return this.template;
}
/**
* Gets the pact contract this node represents in the plan.
*
* @return The pact contract this node represents in the plan.
*/
public Operator<?> getPactContract() {
return this.template.getPactContract();
}
/**
* Gets the name of the plan node.
*
* @return The name of the plan node.
*/
public String getNodeName() {
return this.nodeName;
}
public int getMemoryConsumerWeight() {
return this.driverStrategy.isMaterializing() ? 1 : 0;
}
/**
* Gets the memory dedicated to each sub-task for this node.
*
* @return The memory per task, in bytes.
*/
public long getMemoryPerSubTask() {
return this.memoryPerSubTask;
}
/**
* Sets the memory dedicated to each task for this node.
*
* @param memoryPerTask The memory per sub-task, in bytes.
*/
public void setMemoryPerSubTask(long memoryPerTask) {
this.memoryPerSubTask = memoryPerTask;
}
/**
* Gets the driver strategy from this node. This determines for example for a <i>match</i> Pact whether
* to use a merge or a hybrid hash strategy.
*
* @return The driver strategy.
*/
public DriverStrategy getDriverStrategy() {
return this.driverStrategy;
}
/**
* Sets the driver strategy for this node. Usually should not be changed.
*
* @return The driver strategy.
*/
public void setDriverStrategy(DriverStrategy newDriverStrategy) {
this.driverStrategy = newDriverStrategy;
}
public void initProperties(GlobalProperties globals, LocalProperties locals) {
if (this.globalProps != null || this.localProps != null) {
throw new IllegalStateException();
}
this.globalProps = globals;
this.localProps = locals;
}
/**
* Gets the local properties from this PlanNode.
*
* @return The local properties.
*/
public LocalProperties getLocalProperties() {
return this.localProps;
}
/**
* Gets the global properties from this PlanNode.
*
* @return The global properties.
*/
public GlobalProperties getGlobalProperties() {
return this.globalProps;
}
/**
* Gets the costs incurred by this node. The costs reflect also the costs incurred by the shipping strategies
* of the incoming connections.
*
* @return The node-costs, or null, if not yet set.
*/
public Costs getNodeCosts() {
return this.nodeCosts;
}
/**
* Gets the cumulative costs of this nose. The cumulative costs are the the sum of the costs
* of this node and of all nodes in the subtree below this node.
*
* @return The cumulative costs, or null, if not yet set.
*/
public Costs getCumulativeCosts() {
return this.cumulativeCosts;
}
public Costs getCumulativeCostsShare() {
if (this.cumulativeCosts == null){
return null;
} else {
Costs result = cumulativeCosts.clone();
if (this.template != null && this.template.getOutgoingConnections() != null) {
int outDegree = this.template.getOutgoingConnections().size();
if (outDegree > 0) {
result.divideBy(outDegree);
}
}
return result;
}
}
/**
* Sets the basic cost for this node to the given value, and sets the cumulative costs
* to those costs plus the cost shares of all inputs (regular and broadcast).
*
* @param nodeCosts The already knows costs for this node
* (this cost a produces by a concrete {@code OptimizerNode} subclass.
*/
public void setCosts(Costs nodeCosts) {
// set the node costs
this.nodeCosts = nodeCosts;
// the cumulative costs are the node costs plus the costs of all inputs
this.cumulativeCosts = nodeCosts.clone();
// add all the normal inputs
for (Iterator<PlanNode> preds = getPredecessors(); preds.hasNext();) {
Costs parentCosts = preds.next().getCumulativeCostsShare();
if (parentCosts != null) {
this.cumulativeCosts.addCosts(parentCosts);
} else {
throw new CompilerException("Trying to set the costs of an operator before the predecessor costs are computed.");
}
}
// add all broadcast variable inputs
if (this.broadcastInputs != null) {
for (NamedChannel nc : this.broadcastInputs) {
Costs bcInputCost = nc.getSource().getCumulativeCostsShare();
if (bcInputCost != null) {
this.cumulativeCosts.addCosts(bcInputCost);
} else {
throw new CompilerException("Trying to set the costs of an operator before the broadcast input costs are computed.");
}
}
}
}
public void setDegreeOfParallelism(int parallelism) {
this.degreeOfParallelism = parallelism;
}
public void setSubtasksPerInstance(int subTasksPerInstance) {
this.subtasksPerInstance = subTasksPerInstance;
}
public int getDegreeOfParallelism() {
return this.degreeOfParallelism;
}
public int getSubtasksPerInstance() {
return this.subtasksPerInstance;
}
public long getGuaranteedAvailableMemory() {
return this.template.getMinimalMemoryAcrossAllSubTasks();
}
public Map<OptimizerNode, PlanNode> getBranchPlan() {
return branchPlan;
}
// --------------------------------------------------------------------------------------------
// Input, Predecessors, Successors
// --------------------------------------------------------------------------------------------
public abstract Iterator<Channel> getInputs();
@Override
public abstract Iterator<PlanNode> getPredecessors();
/**
* Sets a list of all broadcast inputs attached to this node.
*/
public void setBroadcastInputs(List<NamedChannel> broadcastInputs) {
if (broadcastInputs != null) {
this.broadcastInputs = broadcastInputs;
// update the branch map
for (NamedChannel nc : broadcastInputs) {
PlanNode source = nc.getSource();
mergeBranchPlanMaps(branchPlan, source.branchPlan);
}
}
// do a sanity check that if we are branching, we have now candidates for each branch point
if (this.template.hasUnclosedBranches()) {
if (this.branchPlan == null) {
throw new CompilerException("Branching and rejoining logic did not find a candidate for the branching point.");
}
for (UnclosedBranchDescriptor uc : this.template.getOpenBranches()) {
OptimizerNode brancher = uc.getBranchingNode();
if (this.branchPlan.get(brancher) == null) {
throw new CompilerException("Branching and rejoining logic did not find a candidate for the branching point.");
}
}
}
}
/**
* Gets a list of all broadcast inputs attached to this node.
*/
public List<NamedChannel> getBroadcastInputs() {
return this.broadcastInputs;
}
/**
* Adds a channel to a successor node to this node.
*
* @param channel The channel to the successor.
*/
public void addOutgoingChannel(Channel channel) {
this.outChannels.add(channel);
}
/**
* Gets a list of all outgoing channels leading to successors.
*
* @return A list of all channels leading to successors.
*/
public List<Channel> getOutgoingChannels() {
return this.outChannels;
}
// --------------------------------------------------------------------------------------------
// Miscellaneous
// --------------------------------------------------------------------------------------------
public void updatePropertiesWithUniqueSets(Set<FieldSet> uniqueFieldCombinations) {
if (uniqueFieldCombinations == null || uniqueFieldCombinations.isEmpty()) {
return;
}
for (FieldSet fields : uniqueFieldCombinations) {
this.globalProps.addUniqueFieldCombination(fields);
this.localProps.addUniqueFields(fields);
}
}
public PlanNode getCandidateAtBranchPoint(OptimizerNode branchPoint) {
if(branchPlan == null){
return null;
}else{
return this.branchPlan.get(branchPoint);
}
}
/**
* Sets the pruning marker to true.
*/
public void setPruningMarker() {
this.pFlag = true;
}
/**
* Checks whether the pruning marker was set.
*
* @return True, if the pruning marker was set, false otherwise.
*/
public boolean isPruneMarkerSet() {
return this.pFlag;
}
public boolean isOnDynamicPath() {
return this.template.isOnDynamicPath();
}
public int getCostWeight() {
return this.template.getCostWeight();
}
// --------------------------------------------------------------------------------------------
public abstract SourceAndDamReport hasDamOnPathDownTo(PlanNode source);
// --------------------------------------------------------------------------------------------
@Override
public String toString() {
return this.template.getName() + " \"" + getPactContract().getName() + "\" : " + this.driverStrategy +
" [[ " + this.globalProps + " ]] [[ " + this.localProps + " ]]";
}
// --------------------------------------------------------------------------------------------
@Override
public OptimizerNode getOptimizerNode() {
return this.template;
}
@Override
public PlanNode getPlanNode() {
return this;
}
@Override
public Iterator<DumpableConnection<PlanNode>> getDumpableInputs() {
List<DumpableConnection<PlanNode>> allInputs = new ArrayList<DumpableConnection<PlanNode>>();
for (Iterator<Channel> inputs = getInputs(); inputs.hasNext();) {
allInputs.add(inputs.next());
}
for (NamedChannel c : getBroadcastInputs()) {
allInputs.add(c);
}
return allInputs.iterator();
}
public static enum SourceAndDamReport {
NOT_FOUND, FOUND_SOURCE, FOUND_SOURCE_AND_DAM;
}
}