Package com.odiago.flumebase.exec

Source Code of com.odiago.flumebase.exec.HashJoinElement

/**
* Licensed to Odiago, Inc. under one or more contributor license
* agreements.  See the NOTICE.txt file distributed with this work for
* additional information regarding copyright ownership.  Odiago, Inc.
* licenses this file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance with the
* License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  See the
* License for the specific language governing permissions and limitations
* under the License.
*/

package com.odiago.flumebase.exec;

import java.io.IOException;

import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import org.apache.hadoop.conf.Configuration;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.cloudera.flume.core.Event;

import com.odiago.flumebase.lang.TimeSpan;

import com.odiago.flumebase.parser.TypedField;
import com.odiago.flumebase.parser.WindowSpec;

import com.odiago.flumebase.plan.HashJoinNode;

import com.odiago.flumebase.util.WindowedHashMap;

/**
* FlowElement that performs a hash join between two input streams
* based on equality of a specific input key.
*/
public class HashJoinElement extends FlowElementImpl {
  private static final Logger LOG = LoggerFactory.getLogger(
      HashJoinElement.class.getName());

  /**
   * HashMap containing enqueued elements of the left stream within the
   * current window.
   */
  private WindowedHashMap<Object, EventWrapper, Long> mLeftMap;

  /**
   * HashMap containing enqueued elements of the right stream within the
   * current window.
   */
  private WindowedHashMap<Object, EventWrapper, Long> mRightMap;

  /** Name of the left-side stream. */
  private String mLeftName;

  /** Name of the right-side stream. */
  private String mRightName;

  /** Name of the key field from the left stream. */
  private TypedField mLeftKey;

  /** Name of the key field from the right stream. */
  private TypedField mRightKey;

  /** Window specification in which we are joining. */
  private WindowSpec mWindowWidth;

  /**
   * The actual time interval over which we're doing the join; derived
   * from mWindowWidth.
   */
  private TimeSpan mTimeSpan;

  /** Name of the output stream. */
  private String mOutName;

  /**
   * Mapping from field names to indices in CompositeEventWrapper arrays
   * describing the output events from this join operation.
   */
  private Map<String, Integer> mFieldMap;

  /**
   * The amount of slack time we provide before we evict old elements.
   */
  private int mSlackTime;

  public HashJoinElement(FlowElementContext ctxt, String leftName, String rightName,
      TypedField leftKey, TypedField rightKey, WindowSpec windowWidth, String outName,
      List<TypedField> leftFieldNames, List<TypedField> rightFieldNames, Configuration conf) {
    super(ctxt);

    mSlackTime = conf.getInt(BucketedAggregationElement.SLACK_INTERVAL_KEY,
        BucketedAggregationElement.DEFAULT_SLACK_INTERVAL);
    if (mSlackTime < 0) {
      mSlackTime = BucketedAggregationElement.DEFAULT_SLACK_INTERVAL;
    }

    mLeftMap = new WindowedHashMap<Object, EventWrapper, Long>();
    mRightMap = new WindowedHashMap<Object, EventWrapper, Long>();

    mLeftName = leftName;
    mRightName = rightName;
    mLeftKey = leftKey;
    mRightKey = rightKey;
    mWindowWidth = windowWidth;
    try {
      assert mWindowWidth.getRangeSpec().isConstant();
      mTimeSpan = (TimeSpan) mWindowWidth.getRangeSpec().eval(new EmptyEventWrapper());
    } catch (IOException ioe) {
      // This should be a constant expression, so this would be quite surprising.
      LOG.error("Unexpected IOE during timespan eval() in HashJoin: " + ioe);
    }
    mOutName = outName;

    initFieldMap(leftFieldNames, rightFieldNames);
  }

  public HashJoinElement(FlowElementContext ctxt, HashJoinNode joinNode) {
    this(ctxt, joinNode.getLeftName(), joinNode.getRightName(), joinNode.getLeftKey(),
        joinNode.getRightKey(), joinNode.getWindowWidth(), joinNode.getOutputName(),
        joinNode.getLeftFields(), joinNode.getRightFields(), joinNode.getConf());
  }


  /**
   * Initialize the map we install in every output CompositeEventWrapper.
   * This describes which of the nested EventWrappers contains each field of
   * the joined record. We compute this once and then reuse it in each output
   * event; we always use the ordered list [leftStream, rightStream] in the wrapped
   * list.
   */
  private void initFieldMap(List<TypedField> leftFields, List<TypedField> rightFields) {
    mFieldMap = new HashMap<String, Integer>();

    // Left EventWrapper has index 0...
    for (TypedField f : leftFields) {
      mFieldMap.put(f.getAvroName(), 0);
    }

    // Right EventWrapper has index 1.
    for (TypedField f : rightFields) {
      mFieldMap.put(f.getAvroName(), 1);
    }

    mFieldMap = Collections.unmodifiableMap(mFieldMap);
  }

  @Override
  public void takeEvent(EventWrapper e) throws IOException, InterruptedException {
    Event event = e.getEvent();

    // Determine which stream the event is from; this determines which map we
    // place the event in, and which map we check for candidate join matches.
    String streamName = e.getAttr(STREAM_NAME_ATTR);
    if (null == streamName) {
      // We don't know which stream this came from. Don't process it.
      LOG.warn("Got event with no " + STREAM_NAME_ATTR + " attribute!");
      return;
    }

    WindowedHashMap<Object, EventWrapper, Long> insertMap; // Map where we insert this event.
    WindowedHashMap<Object, EventWrapper, Long> joinMap; // Map we pull join candidates from.
    TypedField keyField; // The field to grab from the event wrapper.
    boolean isLeft;

    if (streamName.equals(mLeftName)) {
      insertMap = mLeftMap;
      joinMap = mRightMap;
      keyField = mLeftKey;
      isLeft = true;
    } else if (streamName.equals(mRightName)) {
      insertMap = mRightMap;
      joinMap = mLeftMap;
      keyField = mRightKey;
      isLeft = false;
    } else {
      // Not from either stream?
      LOG.warn("Got event with unexpected " + STREAM_NAME_ATTR + "=" + streamName);
      return; // Don't know what to do with this.
    }

    // Look up elements from the opposite map to determine what joins we can perform.
    Object key = e.getField(keyField);
    if (null == key) {
      // The key field is null; this will not match to anything in an inner join.
      return;
    }

    assert mTimeSpan.isRelative;
    long curTime = event.getTimestamp();
    Long lo;
    Long hi;

    if (isLeft) {
      // If this event is from the left stream, calculate the relative time interval normally.
      lo = curTime + mTimeSpan.lo;
      hi = curTime + mTimeSpan.hi;
    } else {
      // If this event is from the right stream, use the "mirror image" of the timespan.
      // "RANGE INTERVAL 10 MINUTES PRECEDING" actually means, join with the /next/ 10
      // minutes of data from this perspective.
      lo = curTime - mTimeSpan.hi;
      hi = curTime - mTimeSpan.lo;
    }

    LOG.debug("Working on key: " + key + ", isLeft=" + isLeft);
    LOG.debug("Timestamp=" + curTime + ", interval=" + lo + ", " + hi);

    // Join with all the events in the window.
    List<EventWrapper> joinEvents = joinMap.getRange(key, lo, hi, isLeft, !isLeft);
    for (EventWrapper joinWrapper : joinEvents) {
      CompositeEvent outEvent = new CompositeEvent(mFieldMap,
          event.getPriority(), event.getTimestamp(), event.getNanos(), event.getHost());
      CompositeEventWrapper outWrapper = new CompositeEventWrapper();
      if (isLeft) {
        outEvent.add(e);
        outEvent.add(joinWrapper);
      } else {
        // Add the left event to the composite first.
        // Order matters due to the fixed mFieldMap.
        outEvent.add(joinWrapper);
        outEvent.add(e);
      }
      outEvent.setAttr(STREAM_NAME_ATTR, mOutName); // set the output stream name.
      outWrapper.reset(outEvent);
      emit(outWrapper);
    }

    // Save the event for joining with other events that arrive in the future.
    insertMap.put(key, e, curTime);

    // Remove entries from the join target map that are behind the current
    // window, to keep the window maps from overfilling.
    // Anything behind the 'lo' value can be removed.
    joinMap.removeOlderThan(lo - mSlackTime);

    // If we get lots of records on one side of the join but no records
    // on the other side for an extended period of time, we won't be culling the
    // correct map. Given 'lo' calculated from the perspective of oldest entry in
    // the other map, remove obsolete values from insertMap. Calculating based
    // on the oldest entry in the other map ensures that we are not discarding
    // values that we cannot process yet because one stream is delayed.
    Long oldestInOtherMap = joinMap.oldestTimestamp();
    if (null != oldestInOtherMap) {
      Long otherMapLo;
      if (isLeft) {
        otherMapLo = oldestInOtherMap - mTimeSpan.hi;
      } else {
        otherMapLo = oldestInOtherMap + mTimeSpan.lo;
      }
      LOG.debug("otherMapLo=" + otherMapLo);
      insertMap.removeOlderThan(otherMapLo - mSlackTime);
    }
  }
}
TOP

Related Classes of com.odiago.flumebase.exec.HashJoinElement

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.