Package eu.stratosphere.compiler.dag

Source Code of eu.stratosphere.compiler.dag.WorksetIterationNode

/***********************************************************************************************************************
* Copyright (C) 2010-2013 by the Stratosphere project (http://stratosphere.eu)
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
* an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
* specific language governing permissions and limitations under the License.
**********************************************************************************************************************/

package eu.stratosphere.compiler.dag;

import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.Set;

import eu.stratosphere.api.common.operators.base.DeltaIterationBase;
import eu.stratosphere.api.common.operators.util.FieldList;
import eu.stratosphere.compiler.CompilerException;
import eu.stratosphere.compiler.DataStatistics;
import eu.stratosphere.compiler.PactCompiler.InterestingPropertyVisitor;
import eu.stratosphere.compiler.costs.CostEstimator;
import eu.stratosphere.compiler.dataproperties.GlobalProperties;
import eu.stratosphere.compiler.dataproperties.InterestingProperties;
import eu.stratosphere.compiler.dataproperties.LocalProperties;
import eu.stratosphere.compiler.dataproperties.PartitioningProperty;
import eu.stratosphere.compiler.dataproperties.RequestedGlobalProperties;
import eu.stratosphere.compiler.dataproperties.RequestedLocalProperties;
import eu.stratosphere.compiler.operators.OperatorDescriptorDual;
import eu.stratosphere.compiler.operators.SolutionSetDeltaOperator;
import eu.stratosphere.compiler.plan.Channel;
import eu.stratosphere.compiler.plan.DualInputPlanNode;
import eu.stratosphere.compiler.plan.NamedChannel;
import eu.stratosphere.compiler.plan.PlanNode;
import eu.stratosphere.compiler.plan.SingleInputPlanNode;
import eu.stratosphere.compiler.plan.SolutionSetPlanNode;
import eu.stratosphere.compiler.plan.WorksetIterationPlanNode;
import eu.stratosphere.compiler.plan.WorksetPlanNode;
import eu.stratosphere.compiler.util.NoOpBinaryUdfOp;
import eu.stratosphere.pact.runtime.shipping.ShipStrategyType;
import eu.stratosphere.pact.runtime.task.DriverStrategy;
import eu.stratosphere.pact.runtime.task.util.LocalStrategy;
import eu.stratosphere.types.Nothing;
import eu.stratosphere.types.NothingTypeInfo;
import eu.stratosphere.util.Visitor;

/**
* A node in the optimizer's program representation for a workset iteration.
*/
public class WorksetIterationNode extends TwoInputNode implements IterationNode {
 
  private static final int DEFAULT_COST_WEIGHT = 20;
 
 
  private final FieldList solutionSetKeyFields;
 
  private final GlobalProperties partitionedProperties;
 
  private SolutionSetNode solutionSetNode;
 
  private WorksetNode worksetNode;
 
  private OptimizerNode solutionSetDelta;
 
  private OptimizerNode nextWorkset;
 
  private PactConnection solutionSetDeltaRootConnection;
 
  private PactConnection nextWorksetRootConnection;
 
  private SingleRootJoiner singleRoot;
 
  private boolean solutionDeltaImmediatelyAfterSolutionJoin;
 
  private final int costWeight;

  // --------------------------------------------------------------------------------------------
 
  /**
   * Creates a new node with a single input for the optimizer plan.
   *
   * @param iteration The iteration operator that the node represents.
   */
  public WorksetIterationNode(DeltaIterationBase<?, ?> iteration) {
    super(iteration);
   
    final int[] ssKeys = iteration.getSolutionSetKeyFields();
    if (ssKeys == null || ssKeys.length == 0) {
      throw new CompilerException("Invalid WorksetIteration: No key fields defined for the solution set.");
    }
    this.solutionSetKeyFields = new FieldList(ssKeys);
    this.partitionedProperties = new GlobalProperties();
    this.partitionedProperties.setHashPartitioned(this.solutionSetKeyFields);
   
    int weight = iteration.getMaximumNumberOfIterations() > 0 ?
      iteration.getMaximumNumberOfIterations() : DEFAULT_COST_WEIGHT;
     
    if (weight > OptimizerNode.MAX_DYNAMIC_PATH_COST_WEIGHT) {
      weight = OptimizerNode.MAX_DYNAMIC_PATH_COST_WEIGHT;
    }
    this.costWeight = weight;
   
    this.possibleProperties.add(new WorksetOpDescriptor(this.solutionSetKeyFields));
  }

  // --------------------------------------------------------------------------------------------
 
  public DeltaIterationBase<?, ?> getIterationContract() {
    return (DeltaIterationBase<?, ?>) getPactContract();
  }
 
  public SolutionSetNode getSolutionSetNode() {
    return this.solutionSetNode;
  }
 
  public WorksetNode getWorksetNode() {
    return this.worksetNode;
  }
 
  public OptimizerNode getNextWorkset() {
    return this.nextWorkset;
  }
 
  public OptimizerNode getSolutionSetDelta() {
    return this.solutionSetDelta;
  }

  public void setPartialSolution(SolutionSetNode solutionSetNode, WorksetNode worksetNode) {
    if (this.solutionSetNode != null || this.worksetNode != null) {
      throw new IllegalStateException("Error: Initializing WorksetIterationNode multiple times.");
    }
    this.solutionSetNode = solutionSetNode;
    this.worksetNode = worksetNode;
  }
 
  public void setNextPartialSolution(OptimizerNode solutionSetDelta, OptimizerNode nextWorkset) {
    // check whether the next partial solution is itself the join with
    // the partial solution (so we can potentially do direct updates)
    if (solutionSetDelta instanceof TwoInputNode) {
      TwoInputNode solutionDeltaTwoInput = (TwoInputNode) solutionSetDelta;
      if (solutionDeltaTwoInput.getFirstPredecessorNode() == this.solutionSetNode ||
        solutionDeltaTwoInput.getSecondPredecessorNode() == this.solutionSetNode)
      {
        this.solutionDeltaImmediatelyAfterSolutionJoin = true;
      }
    }
   
    // attach an extra node to the solution set delta for the cases where we need to repartition
    UnaryOperatorNode solutionSetDeltaUpdateAux = new UnaryOperatorNode("Solution-Set Delta", getSolutionSetKeyFields(),
        new SolutionSetDeltaOperator(getSolutionSetKeyFields()));
    solutionSetDeltaUpdateAux.setDegreeOfParallelism(getDegreeOfParallelism());
    solutionSetDeltaUpdateAux.setSubtasksPerInstance(getSubtasksPerInstance());
   
    PactConnection conn = new PactConnection(solutionSetDelta, solutionSetDeltaUpdateAux);
    solutionSetDeltaUpdateAux.setIncomingConnection(conn);
    solutionSetDelta.addOutgoingConnection(conn);
   
    this.solutionSetDelta = solutionSetDeltaUpdateAux;
    this.nextWorkset = nextWorkset;
   
    this.singleRoot = new SingleRootJoiner();
    this.solutionSetDeltaRootConnection = new PactConnection(solutionSetDeltaUpdateAux, this.singleRoot);
    this.nextWorksetRootConnection = new PactConnection(nextWorkset, this.singleRoot);
    this.singleRoot.setInputs(this.solutionSetDeltaRootConnection, this.nextWorksetRootConnection);
   
    solutionSetDeltaUpdateAux.addOutgoingConnection(this.solutionSetDeltaRootConnection);
    nextWorkset.addOutgoingConnection(this.nextWorksetRootConnection);
  }
 
  public int getCostWeight() {
    return this.costWeight;
  }
 
  public TwoInputNode getSingleRootOfStepFunction() {
    return this.singleRoot;
  }
 
  public FieldList getSolutionSetKeyFields() {
    return this.solutionSetKeyFields;
  }
 
  public OptimizerNode getInitialSolutionSetPredecessorNode() {
    return getFirstPredecessorNode();
  }
 
  public OptimizerNode getInitialWorksetPredecessorNode() {
    return getSecondPredecessorNode();
  }

  // --------------------------------------------------------------------------------------------
 
  @Override
  public String getName() {
    return "Workset Iteration";
  }
 
  @Override
  public boolean isFieldConstant(int input, int fieldNumber) {
    return false;
  }
 
  protected void readStubAnnotations() {}
 
  @Override
  protected void computeOperatorSpecificDefaultEstimates(DataStatistics statistics) {
    this.estimatedOutputSize = getFirstPredecessorNode().getEstimatedOutputSize();
    this.estimatedNumRecords = getFirstPredecessorNode().getEstimatedNumRecords();
  }
 
  // --------------------------------------------------------------------------------------------
  //                             Properties and Optimization
  // --------------------------------------------------------------------------------------------
 
  @Override
  public boolean isMemoryConsumer() {
    return true;
  }
 
  @Override
  protected List<OperatorDescriptorDual> getPossibleProperties() {
    return new ArrayList<OperatorDescriptorDual>(1);
  }
 
  @Override
  public void computeInterestingPropertiesForInputs(CostEstimator estimator) {
    // our own solution (the solution set) is always partitioned and this cannot be adjusted
    // depending on what the successor to the workset iteration requests. for that reason,
    // we ignore incoming interesting properties.
   
    // in addition, we need to make 2 interesting property passes, because the root of the step function
    // that computes the next workset needs the interesting properties as generated by the
    // workset source of the step function. the second pass concerns only the workset path.
    // as initial interesting properties, we have the trivial ones for the step function,
    // and partitioned on the solution set key for the solution set delta
   
    RequestedGlobalProperties partitionedProperties = new RequestedGlobalProperties();
    partitionedProperties.setHashPartitioned(this.solutionSetKeyFields);
    InterestingProperties partitionedIP = new InterestingProperties();
    partitionedIP.addGlobalProperties(partitionedProperties);
    partitionedIP.addLocalProperties(new RequestedLocalProperties());
   
    this.nextWorksetRootConnection.setInterestingProperties(new InterestingProperties());
    this.solutionSetDeltaRootConnection.setInterestingProperties(partitionedIP.clone());
   
    InterestingPropertyVisitor ipv = new InterestingPropertyVisitor(estimator);
    this.nextWorkset.accept(ipv);
    this.solutionSetDelta.accept(ipv);
   
    // take the interesting properties of the partial solution and add them to the root interesting properties
    InterestingProperties worksetIntProps = this.worksetNode.getInterestingProperties();
    InterestingProperties intProps = new InterestingProperties();
    intProps.getGlobalProperties().addAll(worksetIntProps.getGlobalProperties());
    intProps.getLocalProperties().addAll(worksetIntProps.getLocalProperties());
   
    // clear all interesting properties to prepare the second traversal
    this.nextWorksetRootConnection.clearInterestingProperties();
    this.nextWorkset.accept(InterestingPropertiesClearer.INSTANCE);
   
    // 2nd pass
    this.nextWorksetRootConnection.setInterestingProperties(intProps);
    this.nextWorkset.accept(ipv);
   
    // now add the interesting properties of the workset to the workset input
    final InterestingProperties inProps = this.worksetNode.getInterestingProperties().clone();
    inProps.addGlobalProperties(new RequestedGlobalProperties());
    inProps.addLocalProperties(new RequestedLocalProperties());
    this.input2.setInterestingProperties(inProps);
   
    // the partial solution must be hash partitioned, so it has only that as interesting properties
    this.input1.setInterestingProperties(partitionedIP);
  }
 
 
  @Override
  protected void instantiate(OperatorDescriptorDual operator, Channel solutionSetIn, Channel worksetIn,
      List<Set<? extends NamedChannel>> broadcastPlanChannels, List<PlanNode> target, CostEstimator estimator,
      RequestedGlobalProperties globPropsReqSolutionSet,RequestedGlobalProperties globPropsReqWorkset,
      RequestedLocalProperties locPropsReqSolutionSet, RequestedLocalProperties locPropsReqWorkset)
  {
    // check for pipeline breaking using hash join with build on the solution set side
    placePipelineBreakersIfNecessary(DriverStrategy.HYBRIDHASH_BUILD_FIRST, solutionSetIn, worksetIn);
   
    // NOTES ON THE ENUMERATION OF THE STEP FUNCTION PLANS:
    // Whenever we instantiate the iteration, we enumerate new candidates for the step function.
    // That way, we make sure we have an appropriate plan for each candidate for the initial partial solution,
    // we have a fitting candidate for the step function (often, work is pushed out of the step function).
    // Among the candidates of the step function, we keep only those that meet the requested properties of the
    // current candidate initial partial solution. That makes sure these properties exist at the beginning of
    // every iteration.
   
    // 1) Because we enumerate multiple times, we may need to clean the cached plans
    //    before starting another enumeration
    this.nextWorkset.accept(PlanCacheCleaner.INSTANCE);
    this.solutionSetDelta.accept(PlanCacheCleaner.INSTANCE);
   
    // 2) Give the partial solution the properties of the current candidate for the initial partial solution
    //    This concerns currently only the workset.
    this.worksetNode.setCandidateProperties(worksetIn.getGlobalProperties(), worksetIn.getLocalProperties(), worksetIn);
    this.solutionSetNode.setCandidateProperties(this.partitionedProperties, new LocalProperties(), solutionSetIn);
   
    final SolutionSetPlanNode sspn = this.solutionSetNode.getCurrentSolutionSetPlanNode();
    final WorksetPlanNode wspn = this.worksetNode.getCurrentWorksetPlanNode();
   
    // 3) Get the alternative plans
    List<PlanNode> solutionSetDeltaCandidates = this.solutionSetDelta.getAlternativePlans(estimator);
    List<PlanNode> worksetCandidates = this.nextWorkset.getAlternativePlans(estimator);
   
    // 4) Throw away all that are not compatible with the properties currently requested to the
    //    initial partial solution
   
    // Make sure that the workset candidates fulfill the input requirements
    for (Iterator<PlanNode> planDeleter = worksetCandidates.iterator(); planDeleter.hasNext(); ) {
      PlanNode candidate = planDeleter.next();
      if (!(globPropsReqWorkset.isMetBy(candidate.getGlobalProperties()) && locPropsReqWorkset.isMetBy(candidate.getLocalProperties()))) {
        planDeleter.remove();
      }
    }
    if (worksetCandidates.isEmpty()) {
      return;
    }
   
    // sanity check the solution set delta and cancel out the delta node, if it is not needed
    for (Iterator<PlanNode> deltaPlans = solutionSetDeltaCandidates.iterator(); deltaPlans.hasNext(); ) {
      SingleInputPlanNode candidate = (SingleInputPlanNode) deltaPlans.next();
      GlobalProperties gp = candidate.getGlobalProperties();
     
      if (gp.getPartitioning() != PartitioningProperty.HASH_PARTITIONED || gp.getPartitioningFields() == null ||
          !gp.getPartitioningFields().equals(this.solutionSetKeyFields))
      {
        throw new CompilerException("Bug: The solution set delta is not partitioned.");
      }
    }
   
    // 5) Create a candidate for the Iteration Node for every remaining plan of the step function.
   
    final GlobalProperties gp = new GlobalProperties();
    gp.setHashPartitioned(this.solutionSetKeyFields);
    gp.addUniqueFieldCombination(this.solutionSetKeyFields);
   
    final LocalProperties lp = new LocalProperties();
    lp.addUniqueFields(this.solutionSetKeyFields);
   
    // take all combinations of solution set delta and workset plans
    for (PlanNode solutionSetCandidate : solutionSetDeltaCandidates) {
      for (PlanNode worksetCandidate : worksetCandidates) {
        // check whether they have the same operator at their latest branching point
        if (this.singleRoot.areBranchCompatible(solutionSetCandidate, worksetCandidate)) {
         
          SingleInputPlanNode siSolutionDeltaCandidate = (SingleInputPlanNode) solutionSetCandidate;
          boolean immediateDeltaUpdate;
         
          // check whether we need a dedicated solution set delta operator, or whether we can update on the fly
          if (siSolutionDeltaCandidate.getInput().getShipStrategy() == ShipStrategyType.FORWARD && this.solutionDeltaImmediatelyAfterSolutionJoin) {
            // we do not need this extra node. we can make the predecessor the delta
            // sanity check the node and connection
            if (siSolutionDeltaCandidate.getDriverStrategy() != DriverStrategy.UNARY_NO_OP || siSolutionDeltaCandidate.getInput().getLocalStrategy() != LocalStrategy.NONE) {
              throw new CompilerException("Invalid Solution set delta node.");
            }
           
            solutionSetCandidate = siSolutionDeltaCandidate.getInput().getSource();
            immediateDeltaUpdate = true;
          } else {
            // was not partitioned, we need to keep this node.
            // mark that we materialize the input
            siSolutionDeltaCandidate.getInput().setTempMode(TempMode.PIPELINE_BREAKER);
            immediateDeltaUpdate = false;
          }
         
          WorksetIterationPlanNode wsNode = new WorksetIterationPlanNode(
            this, "WorksetIteration ("+this.getPactContract().getName()+")", solutionSetIn, worksetIn, sspn, wspn, worksetCandidate, solutionSetCandidate);
          wsNode.setImmediateSolutionSetUpdate(immediateDeltaUpdate);
          wsNode.initProperties(gp, lp);
          target.add(wsNode);
        }
      }
    }
  }

  @Override
  public void computeUnclosedBranchStack() {
    if (this.openBranches != null) {
      return;
    }

   
    // IMPORTANT: First compute closed branches from the two inputs
    // we need to do this because the runtime iteration head effectively joins
    addClosedBranches(getFirstPredecessorNode().closedBranchingNodes);
    addClosedBranches(getSecondPredecessorNode().closedBranchingNodes);

    List<UnclosedBranchDescriptor> result1 = getFirstPredecessorNode().getBranchesForParent(getFirstIncomingConnection());
    List<UnclosedBranchDescriptor> result2 = getSecondPredecessorNode().getBranchesForParent(getSecondIncomingConnection());

    ArrayList<UnclosedBranchDescriptor> inputsMerged1 = new ArrayList<UnclosedBranchDescriptor>();
    mergeLists(result1, result2, inputsMerged1);
   
    addClosedBranches(getSingleRootOfStepFunction().closedBranchingNodes);

    ArrayList<UnclosedBranchDescriptor> inputsMerged2 = new ArrayList<UnclosedBranchDescriptor>();
    List<UnclosedBranchDescriptor> result3 = getSingleRootOfStepFunction().openBranches;
    mergeLists(inputsMerged1, result3, inputsMerged2);

    // handle the data flow branching for the broadcast inputs
    List<UnclosedBranchDescriptor> result = computeUnclosedBranchStackForBroadcastInputs(inputsMerged2);

    this.openBranches = (result == null || result.isEmpty()) ? Collections.<UnclosedBranchDescriptor>emptyList() : result;
  }
 
  // --------------------------------------------------------------------------------------------
  //                      Iteration Specific Traversals
  // --------------------------------------------------------------------------------------------

  public void acceptForStepFunction(Visitor<OptimizerNode> visitor) {
    this.singleRoot.accept(visitor);
  }
 
  // --------------------------------------------------------------------------------------------
  //                             Utility Classes
  // --------------------------------------------------------------------------------------------
 
  private static final class WorksetOpDescriptor extends OperatorDescriptorDual {
   
    private WorksetOpDescriptor(FieldList solutionSetKeys) {
      super(solutionSetKeys, null);
    }

    @Override
    public DriverStrategy getStrategy() {
      return DriverStrategy.NONE;
    }

    @Override
    protected List<GlobalPropertiesPair> createPossibleGlobalProperties() {
      RequestedGlobalProperties partitionedGp = new RequestedGlobalProperties();
      partitionedGp.setHashPartitioned(this.keys1);
      return Collections.singletonList(new GlobalPropertiesPair(partitionedGp, new RequestedGlobalProperties()));
    }

    @Override
    protected List<LocalPropertiesPair> createPossibleLocalProperties() {
      // all properties are possible
      return Collections.singletonList(new LocalPropertiesPair(
        new RequestedLocalProperties(), new RequestedLocalProperties()));
    }
   
    @Override
    public boolean areCoFulfilled(RequestedLocalProperties requested1, RequestedLocalProperties requested2,
        LocalProperties produced1, LocalProperties produced2) {
      return true;
    }

    @Override
    public DualInputPlanNode instantiate(Channel in1, Channel in2, TwoInputNode node) {
      throw new UnsupportedOperationException();
    }

    @Override
    public GlobalProperties computeGlobalProperties(GlobalProperties in1, GlobalProperties in2) {
      throw new UnsupportedOperationException();
    }

    @Override
    public LocalProperties computeLocalProperties(LocalProperties in1, LocalProperties in2) {
      throw new UnsupportedOperationException();
    }
  }
 
  public static class SingleRootJoiner extends TwoInputNode {
   
    SingleRootJoiner() {
      super(new NoOpBinaryUdfOp<Nothing>(new NothingTypeInfo()));
     
      setDegreeOfParallelism(1);
      setSubtasksPerInstance(1);
    }
   
    public void setInputs(PactConnection input1, PactConnection input2) {
      this.input1 = input1;
      this.input2 = input2;
    }
   
    @Override
    public String getName() {
      return "Internal Utility Node";
    }

    @Override
    protected List<OperatorDescriptorDual> getPossibleProperties() {
      return Collections.emptyList();
    }

    @Override
    protected void computeOperatorSpecificDefaultEstimates(DataStatistics statistics) {
      // no estimates are needed here
    }

  }
}
TOP

Related Classes of eu.stratosphere.compiler.dag.WorksetIterationNode

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.