Package eu.stratosphere.api.java.operators

Source Code of eu.stratosphere.api.java.operators.TwoInputUdfOperator

/***********************************************************************************************************************
*
* Copyright (C) 2010-2013 by the Stratosphere project (http://stratosphere.eu)
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
* an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
* specific language governing permissions and limitations under the License.
*
**********************************************************************************************************************/
package eu.stratosphere.api.java.operators;

import java.lang.annotation.Annotation;
import java.util.Collections;
import java.util.HashMap;
import java.util.Map;
import java.util.Set;

import eu.stratosphere.api.common.operators.DualInputSemanticProperties;
import eu.stratosphere.api.java.DataSet;
import eu.stratosphere.api.java.functions.FunctionAnnotation;
import eu.stratosphere.api.java.functions.SemanticPropUtil;
import eu.stratosphere.types.TypeInformation;
import eu.stratosphere.configuration.Configuration;

/**
* The <tt>TwoInputUdfOperator</tt> is the base class of all binary operators that execute
* user-defined functions (UDFs). The UDFs encapsulated by this operator are naturally UDFs that
* have two inputs (such as {@link JoinFunction} or {@link CoGroupFunction}).
* <p>
* This class encapsulates utilities for the UDFs, such as broadcast variables, parameterization
* through configuration objects, and semantic properties.
*
* @param <IN1> The data type of the first input data set.
* @param <IN2> The data type of the second input data set.
* @param <OUT> The data type of the returned data set.
*/
public abstract class TwoInputUdfOperator<IN1, IN2, OUT, O extends TwoInputUdfOperator<IN1, IN2, OUT, O>>
  extends TwoInputOperator<IN1, IN2, OUT, O> implements UdfOperator<O>
{
  private Configuration parameters;

  private Map<String, DataSet<?>> broadcastVariables;

  private DualInputSemanticProperties udfSemantics;

  // --------------------------------------------------------------------------------------------

  /**
   * Creates a new operators with the two given data sets as inputs. The given result type
   * describes the data type of the elements in the data set produced by the operator.
   *
   * @param input1 The data set for the first input.
   * @param input2 The data set for the second input.
   * @param resultType The type of the elements in the resulting data set.
   */
  protected TwoInputUdfOperator(DataSet<IN1> input1, DataSet<IN2> input2, TypeInformation<OUT> resultType) {
    super(input1, input2, resultType);
  }
 
  protected void extractSemanticAnnotationsFromUdf(Class<?> udfClass) {
    Set<Annotation> annotations = FunctionAnnotation.readDualConstantAnnotations(udfClass);
   
    DualInputSemanticProperties dsp = SemanticPropUtil.getSemanticPropsDual(annotations,
          getInput1Type(), getInput2Type(), getResultType());

    setSemanticProperties(dsp);
  }

  // --------------------------------------------------------------------------------------------
  // Fluent API methods
  // --------------------------------------------------------------------------------------------

  @Override
  public O withParameters(Configuration parameters) {
    this.parameters = parameters;

    @SuppressWarnings("unchecked")
    O returnType = (O) this;
    return returnType;
  }

  @Override
  public O withBroadcastSet(DataSet<?> data, String name) {
    if (this.broadcastVariables == null) {
      this.broadcastVariables = new HashMap<String, DataSet<?>>();
    }

    this.broadcastVariables.put(name, data);

    @SuppressWarnings("unchecked")
    O returnType = (O) this;
    return returnType;
  }

  /**
   * Adds a constant-set annotation for the first input of the UDF.
   *
   * <p>
   * Constant set annotations are used by the optimizer to infer the existence of data properties (sorted, partitioned, grouped).
   * In certain cases, these annotations allow the optimizer to generate a more efficient execution plan which can lead to improved performance.
   * Constant set annotations can only be specified if the first input and the output type of the UDF are of {@link Tuple} data types.
   *
   * <p>
   * A constant-set annotation is a set of constant field specifications. The constant field specification String "4->3" specifies, that this UDF copies the fourth field of
   * an input tuple to the third field of the output tuple. Field references are zero-indexed.
   *
   * <p>
   * <b>NOTICE: Constant set annotations are optional, but if given need to be correct. Otherwise, the program might produce wrong results!</b>
   *
   * @param constantSetFirst A list of constant field specification Strings for the first input.
   * @return This operator with an annotated constant field set for the first input.
   */
  @SuppressWarnings("unchecked")
  public O withConstantSetFirst(String... constantSetFirst) {
    if (this.udfSemantics == null) {
      this.udfSemantics = new DualInputSemanticProperties();
    }
   
    SemanticPropUtil.getSemanticPropsDualFromString(this.udfSemantics, constantSetFirst, null,
        null, null, null, null, this.getInput1Type(), this.getInput2Type(), this.getResultType());

    O returnType = (O) this;
    return returnType;
  }
 
  /**
   * Adds a constant-set annotation for the second input of the UDF.
   *
   * <p>
   * Constant set annotations are used by the optimizer to infer the existence of data properties (sorted, partitioned, grouped).
   * In certain cases, these annotations allow the optimizer to generate a more efficient execution plan which can lead to improved performance.
   * Constant set annotations can only be specified if the second input and the output type of the UDF are of {@link Tuple} data types.
   *
   * <p>
   * A constant-set annotation is a set of constant field specifications. The constant field specification String "4->3" specifies, that this UDF copies the fourth field of
   * an input tuple to the third field of the output tuple. Field references are zero-indexed.
   *
   * <p>
   * <b>NOTICE: Constant set annotations are optional, but if given need to be correct. Otherwise, the program might produce wrong results!</b>
   *
   * @param constantSetSecond A list of constant field specification Strings for the second input.
   * @return This operator with an annotated constant field set for the second input.
   */
  @SuppressWarnings("unchecked")
  public O withConstantSetSecond(String... constantSetSecond) {
    if (this.udfSemantics == null) {
      this.udfSemantics = new DualInputSemanticProperties();
    }
   
    SemanticPropUtil.getSemanticPropsDualFromString(this.udfSemantics, null, constantSetSecond,
        null, null, null, null, this.getInput1Type(), this.getInput2Type(), this.getResultType());

    O returnType = (O) this;
    return returnType;
  }

  // --------------------------------------------------------------------------------------------
  // Accessors
  // --------------------------------------------------------------------------------------------

  @Override
  public Map<String, DataSet<?>> getBroadcastSets() {
    return this.broadcastVariables == null ?
        Collections.<String, DataSet<?>>emptyMap() :
        Collections.unmodifiableMap(this.broadcastVariables);
  }

  @Override
  public Configuration getParameters() {
    return this.parameters;
  }

  @Override
  public DualInputSemanticProperties getSematicProperties() {
    return this.udfSemantics;
  }

  /**
   * Sets the semantic properties for the user-defined function (UDF). The semantic properties
   * define how fields of tuples and other objects are modified or preserved through this UDF.
   * The configured properties can be retrieved via {@link UdfOperator#getSematicProperties()}.
   *
   * @param properties The semantic properties for the UDF.
   * @see UdfOperator#getSematicProperties()
   */
  public void setSemanticProperties(DualInputSemanticProperties properties) {
    this.udfSemantics = properties;
  }
}
TOP

Related Classes of eu.stratosphere.api.java.operators.TwoInputUdfOperator

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.