Package com.odiago.flumebase.plan

Source Code of com.odiago.flumebase.plan.PropagateSchemas

/**
* Licensed to Odiago, Inc. under one or more contributor license
* agreements.  See the NOTICE.txt file distributed with this work for
* additional information regarding copyright ownership.  Odiago, Inc.
* licenses this file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance with the
* License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  See the
* License for the specific language governing permissions and limitations
* under the License.
*/

package com.odiago.flumebase.plan;

import java.util.List;

import org.apache.avro.Schema;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.odiago.flumebase.util.DAG;
import com.odiago.flumebase.util.DAGOperatorException;

/**
* With certain components of the DAG marked with input and output
* schemas, flow this information forward to all nodes in the DAG.
*
* <p>At each node:
* <ul>
*   <li>The output schemas from each predecessor node should be identical.
*   Fail if they are not.</li>
*   <li>The input schema of this node should match the output schemas from
*   the predecessor nodes. Fail if this is not. If this is unset, copy from
*   a predecssor node.</li>
*   <li>If a node has multiple input schemas, each of the precedessors should
*   match one of the node's inputs.</li>
*   <li>The output schema of this node, if unset, should be set to match the
*   input schema. Fail if a node has multiple input schemas.</li>
* </ul>
* </p>
*
* This is a DAG operator to be used with bfs after logical plan formation
* from the AST, but before physical plan resolution.
*/
public class PropagateSchemas extends DAG.Operator<PlanNode> {
  private static final Logger LOG = LoggerFactory.getLogger(
      PropagateSchemas.class.getName());

  @Override
  public void process(PlanNode node) throws DAGOperatorException {
    if (LOG.isDebugEnabled()) {
      LOG.debug("Operating on node: [" + node + "]");
    }

    List<PlanNode> parents = node.getParents();

    // A node is either defined as having one well-specified input schema, or a list
    // of options. If the list is set, each predecessor output must match one of the
    // options.
    List<Schema> myInputSchemas = (List<Schema>)
        node.getAttr(PlanNode.MULTI_INPUT_SCHEMA_ATTR);
    Schema inputSchema = null;
    for (PlanNode parent : parents) {
      Schema parentOutputSchema = (Schema) parent.getAttr(PlanNode.OUTPUT_SCHEMA_ATTR);
      if (null == parentOutputSchema) {
        // This should not happen if we use this operator correctly with BFS.
        throw new DAGOperatorException("Node " + parent + " does not have output schema set");
      }

      if (myInputSchemas != null) {
        // This node accepts multiple input schemas. Check that the parent's output
        // schema matches one of them.
        boolean match = false;
        for (Schema candidate : myInputSchemas) {
          if (candidate.equals(parentOutputSchema)) {
            match = true;
            break;
          }
        }

        if (!match) {
          throw new DAGOperatorException("Schema resolution execption; node [" + node
              + "] has a parent output schema that does not match any candidate input schema.");
        }
      } else if (null == inputSchema) {
        // This node will have a single input schema. Cache the first parent's output schema.
        inputSchema = parentOutputSchema;
      } else {
        // Now check that each other parent has the same schema as the first parent.
        if (!parentOutputSchema.equals(inputSchema)) {
          throw new DAGOperatorException("Schema resolution exception; node [" + node
              + "] has parents with mismatched schemas:\nSchema 1:\n"
              + inputSchema + "\nSchema 2:\n" + parentOutputSchema);
        }
      }
    }

    if (myInputSchemas == null) {
      // For nodes with exactly one input schema, check that the defined input
      // schema (if any) matches the output of the predecessors. If unset,
      // set it to the predecessor output.
      Schema myInputSchema = (Schema) node.getAttr(PlanNode.INPUT_SCHEMA_ATTR);
      if (null == myInputSchema) {
        node.setAttr(PlanNode.INPUT_SCHEMA_ATTR, inputSchema);
        myInputSchema = inputSchema;
      }

      if (null != myInputSchema && null != inputSchema) {
        // Check that these are equal.
        if (!inputSchema.equals(myInputSchema)) {
          throw new DAGOperatorException("Node [" + node + "] has set input schema:\n"
              + myInputSchema + "\nbut parents have output schema:\n" + inputSchema);
        }
      }

      // And if there's no output schema defined, set it equal to our input schema.
      Schema myOutputSchema = (Schema) node.getAttr(PlanNode.OUTPUT_SCHEMA_ATTR);
      if (null == myOutputSchema) {
        node.setAttr(PlanNode.OUTPUT_SCHEMA_ATTR, myInputSchema);
      }
    } else {
      // For nodes that accept multiple input schemas, just check that the
      // output schema is set, since we can't infer the output schema from the set
      // of input schemas.
      if (null == node.getAttr(PlanNode.OUTPUT_SCHEMA_ATTR)) {
        throw new DAGOperatorException("Node [" + node
            + "] has multiple input schemas but the output schema is unset.");
      }

      // Sanity check: If this also has a singleton input schema set, complain.
      if (null != node.getAttr(PlanNode.INPUT_SCHEMA_ATTR)) {
        throw new DAGOperatorException("Node [" + node
            + "] has multiple input schemas and singleton input schema set.");
      }
    }
  }

 
}
TOP

Related Classes of com.odiago.flumebase.plan.PropagateSchemas

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.