Package eu.stratosphere.compiler.dag

Source Code of eu.stratosphere.compiler.dag.BinaryUnionNode

/***********************************************************************************************************************
* Copyright (C) 2010-2013 by the Stratosphere project (http://stratosphere.eu)
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
* an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
* specific language governing permissions and limitations under the License.
**********************************************************************************************************************/

package eu.stratosphere.compiler.dag;

import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;

import eu.stratosphere.api.common.operators.Union;
import eu.stratosphere.compiler.CompilerException;
import eu.stratosphere.compiler.DataStatistics;
import eu.stratosphere.compiler.costs.CostEstimator;
import eu.stratosphere.compiler.dataproperties.GlobalProperties;
import eu.stratosphere.compiler.dataproperties.InterestingProperties;
import eu.stratosphere.compiler.dataproperties.RequestedGlobalProperties;
import eu.stratosphere.compiler.dataproperties.RequestedLocalProperties;
import eu.stratosphere.compiler.operators.BinaryUnionOpDescriptor;
import eu.stratosphere.compiler.operators.OperatorDescriptorDual;
import eu.stratosphere.compiler.plan.Channel;
import eu.stratosphere.compiler.plan.NamedChannel;
import eu.stratosphere.compiler.plan.PlanNode;
import eu.stratosphere.pact.runtime.shipping.ShipStrategyType;

/**
* The Optimizer representation of a binary <i>Union</i>.
*/
public class BinaryUnionNode extends TwoInputNode {
 
  private Set<RequestedGlobalProperties> channelProps;

  public BinaryUnionNode(Union<?> union){
    super(union);
  }

  @Override
  public String getName() {
    return "Union";
  }

  @Override
  protected List<OperatorDescriptorDual> getPossibleProperties() {
    return new ArrayList<OperatorDescriptorDual>();
  }
 
  @Override
  protected void computeOperatorSpecificDefaultEstimates(DataStatistics statistics) {
    long card1 = getFirstPredecessorNode().getEstimatedNumRecords();
    long card2 = getSecondPredecessorNode().getEstimatedNumRecords();
    this.estimatedNumRecords = (card1 < 0 || card2 < 0) ? -1 : card1 + card2;
   
    long size1 = getFirstPredecessorNode().getEstimatedOutputSize();
    long size2 = getSecondPredecessorNode().getEstimatedOutputSize();
    this.estimatedOutputSize = (size1 < 0 || size2 < 0) ? -1 : size1 + size2;
  }
 
  @Override
  public void computeUnionOfInterestingPropertiesFromSuccessors() {
    super.computeUnionOfInterestingPropertiesFromSuccessors();
    // clear all local properties, as they are destroyed anyways
    getInterestingProperties().getLocalProperties().clear();
  }
 
  @Override
  public void computeInterestingPropertiesForInputs(CostEstimator estimator) {
    final InterestingProperties props = getInterestingProperties();
   
    // if no other properties exist, add the pruned trivials back
    if (props.getGlobalProperties().isEmpty()) {
      props.addGlobalProperties(new RequestedGlobalProperties());
    }
    props.addLocalProperties(new RequestedLocalProperties());
    this.input1.setInterestingProperties(props.clone());
    this.input2.setInterestingProperties(props.clone());
   
    this.channelProps = props.getGlobalProperties();
  }
 
  @Override
  public List<PlanNode> getAlternativePlans(CostEstimator estimator) {
    // check if we have a cached version
    if (this.cachedPlans != null) {
      return this.cachedPlans;
    }

    // step down to all producer nodes and calculate alternative plans
    final List<? extends PlanNode> subPlans1 = getFirstPredecessorNode().getAlternativePlans(estimator);
    final List<? extends PlanNode> subPlans2 = getSecondPredecessorNode().getAlternativePlans(estimator);
   
    // calculate alternative sub-plans for broadcast inputs
    final List<Set<? extends NamedChannel>> broadcastPlanChannels = new ArrayList<Set<? extends NamedChannel>>();
    List<PactConnection> broadcastConnections = getBroadcastConnections();
    List<String> broadcastConnectionNames = getBroadcastConnectionNames();
    for (int i = 0; i < broadcastConnections.size(); i++ ) {
      PactConnection broadcastConnection = broadcastConnections.get(i);
      String broadcastConnectionName = broadcastConnectionNames.get(i);
      List<PlanNode> broadcastPlanCandidates = broadcastConnection.getSource().getAlternativePlans(estimator);
      // wrap the plan candidates in named channels
      HashSet<NamedChannel> broadcastChannels = new HashSet<NamedChannel>(broadcastPlanCandidates.size());
      for (PlanNode plan: broadcastPlanCandidates) {
        final NamedChannel c = new NamedChannel(broadcastConnectionName, plan);
        c.setShipStrategy(ShipStrategyType.BROADCAST);
        broadcastChannels.add(c);
      }
      broadcastPlanChannels.add(broadcastChannels);
    }
   
    final ArrayList<PlanNode> outputPlans = new ArrayList<PlanNode>();
   
    final BinaryUnionOpDescriptor operator = new BinaryUnionOpDescriptor();
    final RequestedLocalProperties noLocalProps = new RequestedLocalProperties();
   
    final int dop = getDegreeOfParallelism();
    final int subPerInstance = getSubtasksPerInstance();
    final int numInstances = dop / subPerInstance + (dop % subPerInstance == 0 ? 0 : 1);
    final int inDop1 = getFirstPredecessorNode().getDegreeOfParallelism();
    final int inSubPerInstance1 = getFirstPredecessorNode().getSubtasksPerInstance();
    final int inNumInstances1 = inDop1 / inSubPerInstance1 + (inDop1 % inSubPerInstance1 == 0 ? 0 : 1);
    final int inDop2 = getSecondPredecessorNode().getDegreeOfParallelism();
    final int inSubPerInstance2 = getSecondPredecessorNode().getSubtasksPerInstance();
    final int inNumInstances2 = inDop2 / inSubPerInstance2 + (inDop2 % inSubPerInstance2 == 0 ? 0 : 1);
   
    final boolean globalDopChange1 = numInstances != inNumInstances1;
    final boolean globalDopChange2 = numInstances != inNumInstances2;
    final boolean localDopChange1 = numInstances == inNumInstances1 & subPerInstance != inSubPerInstance1;
    final boolean localDopChange2 = numInstances == inNumInstances2 & subPerInstance != inSubPerInstance2;
   
    // enumerate all pairwise combination of the children's plans together with
    // all possible operator strategy combination
   
    // create all candidates
    for (PlanNode child1 : subPlans1) {
      for (PlanNode child2 : subPlans2) {
       
        // check that the children go together. that is the case if they build upon the same
        // candidate at the joined branch plan.
        if (!areBranchCompatible(child1, child2)) {
          continue;
        }
       
        for (RequestedGlobalProperties igps: this.channelProps) {
          // create a candidate channel for the first input. mark it cached, if the connection says so
          Channel c1 = new Channel(child1, this.input1.getMaterializationMode());
          if (this.input1.getShipStrategy() == null) {
            // free to choose the ship strategy
            igps.parameterizeChannel(c1, globalDopChange1, localDopChange1);
           
            // if the DOP changed, make sure that we cancel out properties, unless the
            // ship strategy preserves/establishes them even under changing DOPs
            if (globalDopChange1 && !c1.getShipStrategy().isNetworkStrategy()) {
              c1.getGlobalProperties().reset();
            }
            if (localDopChange1 && !(c1.getShipStrategy().isNetworkStrategy() ||
                  c1.getShipStrategy().compensatesForLocalDOPChanges())) {
              c1.getGlobalProperties().reset();
            }
          } else {
            // ship strategy fixed by compiler hint
            if (this.keys1 != null) {
              c1.setShipStrategy(this.input1.getShipStrategy(), this.keys1.toFieldList());
            } else {
              c1.setShipStrategy(this.input1.getShipStrategy());
            }
           
            if (globalDopChange1) {
              c1.adjustGlobalPropertiesForFullParallelismChange();
            } else if (localDopChange1) {
              c1.adjustGlobalPropertiesForLocalParallelismChange();
            }
          }
         
          // create a candidate channel for the first input. mark it cached, if the connection says so
          Channel c2 = new Channel(child2, this.input2.getMaterializationMode());
          if (this.input2.getShipStrategy() == null) {
            // free to choose the ship strategy
            igps.parameterizeChannel(c2, globalDopChange2, localDopChange2);
           
            // if the DOP changed, make sure that we cancel out properties, unless the
            // ship strategy preserves/establishes them even under changing DOPs
            if (globalDopChange2 && !c2.getShipStrategy().isNetworkStrategy()) {
              c2.getGlobalProperties().reset();
            }
            if (localDopChange2 && !(c2.getShipStrategy().isNetworkStrategy() ||
                  c2.getShipStrategy().compensatesForLocalDOPChanges())) {
              c2.getGlobalProperties().reset();
            }
          } else {
            // ship strategy fixed by compiler hint
            if (this.keys2 != null) {
              c2.setShipStrategy(this.input2.getShipStrategy(), this.keys2.toFieldList());
            } else {
              c2.setShipStrategy(this.input2.getShipStrategy());
            }
           
            if (globalDopChange2) {
              c2.adjustGlobalPropertiesForFullParallelismChange();
            } else if (localDopChange2) {
              c2.adjustGlobalPropertiesForLocalParallelismChange();
            }
          }
         
          // get the global properties and clear unique fields (not preserved anyways during the union)
          GlobalProperties p1 = c1.getGlobalProperties();
          GlobalProperties p2 = c2.getGlobalProperties();
          p1.clearUniqueFieldCombinations();
          p2.clearUniqueFieldCombinations();
         
          // adjust the partitionings, if they exist but are not equal. this may happen when both channels have a
          // partitioning that fulfills the requirements, but both are incompatible. For example may a property requirement
          // be ANY_PARTITIONING on fields (0) and one channel is range partitioned on that field, the other is hash
          // partitioned on that field.
          if (!igps.isTrivial() && !(p1.equals(p2))) {
            if (c1.getShipStrategy() == ShipStrategyType.FORWARD && c2.getShipStrategy() != ShipStrategyType.FORWARD) {
              // adjust c2 to c1
              c2 = c2.clone();
              p1.parameterizeChannel(c2,globalDopChange2);
            } else if (c2.getShipStrategy() == ShipStrategyType.FORWARD && c1.getShipStrategy() != ShipStrategyType.FORWARD) {
              // adjust c1 to c2
              c1 = c1.clone();
              p2.parameterizeChannel(c1,globalDopChange1);
            } else if (c1.getShipStrategy() == ShipStrategyType.FORWARD && c2.getShipStrategy() == ShipStrategyType.FORWARD) {
              boolean adjustC1 = c1.getEstimatedOutputSize() <= 0 || c2.getEstimatedOutputSize() <= 0 ||
                  c1.getEstimatedOutputSize() <= c2.getEstimatedOutputSize();
              if (adjustC1) {
                c2 = c2.clone();
                p1.parameterizeChannel(c2, globalDopChange2);
              } else {
                c1 = c1.clone();
                p2.parameterizeChannel(c1, globalDopChange1);
              }
            } else {
              // this should never happen, as it implies both realize a different strategy, which is
              // excluded by the check that the required strategies must match
              throw new CompilerException("Bug in Plan Enumeration for Union Node.");
            }
          }
         
          instantiate(operator, c1, c2, broadcastPlanChannels, outputPlans, estimator, igps, igps, noLocalProps, noLocalProps);
        }
      }
    }

    // cost and prune the plans
    for (PlanNode node : outputPlans) {
      estimator.costOperator(node);
    }
    prunePlanAlternatives(outputPlans);
    outputPlans.trimToSize();

    this.cachedPlans = outputPlans;
    return outputPlans;
  }
 
  @Override
  protected void readStubAnnotations() {}

  @Override
  public boolean isFieldConstant(int input, int fieldNumber) {
    return true;
  }
 
  @Override
  public void computeOutputEstimates(DataStatistics statistics) {
    OptimizerNode in1 = getFirstPredecessorNode();
    OptimizerNode in2 = getSecondPredecessorNode();
   
    this.estimatedNumRecords = in1.estimatedNumRecords > 0 && in2.estimatedNumRecords > 0 ?
        in1.estimatedNumRecords + in2.estimatedNumRecords : -1;
    this.estimatedOutputSize = in1.estimatedOutputSize > 0 && in2.estimatedOutputSize > 0 ?
      in1.estimatedOutputSize + in2.estimatedOutputSize : -1;
  }
}
TOP

Related Classes of eu.stratosphere.compiler.dag.BinaryUnionNode

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.