Package plan_runner.storm_components

Source Code of plan_runner.storm_components.StormDataSource

package plan_runner.storm_components;

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.concurrent.Semaphore;

import org.apache.log4j.Logger;

import plan_runner.components.ComponentProperties;
import plan_runner.operators.AggregateOperator;
import plan_runner.operators.ChainOperator;
import plan_runner.operators.Operator;
import plan_runner.storm_components.synchronization.TopologyKiller;
import plan_runner.utilities.CustomReader;
import plan_runner.utilities.MyUtilities;
import plan_runner.utilities.PeriodicAggBatchSend;
import plan_runner.utilities.SerializableFileInputStream;
import plan_runner.utilities.SystemParameters;
import backtype.storm.Config;
import backtype.storm.spout.SpoutOutputCollector;
import backtype.storm.task.TopologyContext;
import backtype.storm.topology.TopologyBuilder;
import backtype.storm.tuple.Values;
import backtype.storm.utils.Utils;

public class StormDataSource extends StormSpoutComponent {
  private static final long serialVersionUID = 1L;
  private static Logger LOG = Logger.getLogger(StormDataSource.class);

  private final String _inputPath;
  private int _fileSection;
  private final int _fileParts;
  private CustomReader _reader = null;

  private boolean _hasReachedEOF = false;
  private boolean _hasSentEOF = false; // have sent EOF to TopologyKiller
  // (AckEachTuple mode)
  private boolean _hasSentLastAck = false; // AckLastTuple mode

  private long _pendingTuples = 0;
  private int _numSentTuples = 0;

  private final ChainOperator _operatorChain;

  // for aggregate batch sending
  private final Semaphore _semAgg = new Semaphore(1, true);
  private boolean _firstTime = true;
  private PeriodicAggBatchSend _periodicAggBatch;
  private final long _aggBatchOutputMillis;

  public StormDataSource(ComponentProperties cp, List<String> allCompNames, String inputPath,
      int hierarchyPosition, int parallelism, TopologyBuilder builder, TopologyKiller killer,
      Config conf) {

    super(cp, allCompNames, hierarchyPosition, conf);
    _operatorChain = cp.getChainOperator();

    _aggBatchOutputMillis = cp.getBatchOutputMillis();
    _inputPath = inputPath;
    _fileParts = parallelism;

    if (getHierarchyPosition() == FINAL_COMPONENT && (!MyUtilities.isAckEveryTuple(conf)))
      killer.registerComponent(this, parallelism);

    builder.setSpout(getID(), this, parallelism);
    if (MyUtilities.isAckEveryTuple(conf))
      killer.registerComponent(this, parallelism);
  }

  // ack method on spout is called only if in AckEveryTuple mode (ACKERS > 0)
  @Override
  public void ack(Object msgId) {
    _pendingTuples--;
  }

  @Override
  public void aggBatchSend() {
    if (MyUtilities.isAggBatchOutputMode(_aggBatchOutputMillis))
      if (_operatorChain != null) {
        final Operator lastOperator = _operatorChain.getLastOperator();
        if (lastOperator instanceof AggregateOperator) {
          try {
            _semAgg.acquire();
          } catch (final InterruptedException ex) {
          }

          // sending
          final AggregateOperator agg = (AggregateOperator) lastOperator;
          final List<String> tuples = agg.getContent();
          for (final String tuple : tuples)
            tupleSend(MyUtilities.stringToTuple(tuple, getConf()), null, 0);
          // clearing
          agg.clearStorage();
          _semAgg.release();
        }
      }
  }

  protected void applyOperatorsAndSend(List<String> tuple) {
    // do selection and projection
    if (MyUtilities.isAggBatchOutputMode(_aggBatchOutputMillis))
      try {
        _semAgg.acquire();
      } catch (final InterruptedException ex) {
      }
    tuple = _operatorChain.process(tuple);
    if (MyUtilities.isAggBatchOutputMode(_aggBatchOutputMillis))
      _semAgg.release();

    if (tuple == null)
      return;

    _numSentTuples++;
    _pendingTuples++;
    printTuple(tuple);

    if (MyUtilities.isSending(getHierarchyPosition(), _aggBatchOutputMillis)) {
      long timestamp = 0;
      if (MyUtilities.isCustomTimestampMode(getConf()))
        if (getHierarchyPosition() == StormComponent.NEXT_TO_LAST_COMPONENT)
          // A tuple has a non-null timestamp only if the component is
          // next to last
          // because we measure the latency of the last operator
          timestamp = System.currentTimeMillis();
      tupleSend(tuple, null, timestamp);
    }
    if (MyUtilities.isPrintLatency(getHierarchyPosition(), getConf())) {
      final long timestamp = System.currentTimeMillis();
      // long timestamp = System.nanoTime();
      printTupleLatency(_numSentTuples - 1, timestamp);
    }
  }

  @Override
  public void close() {
    try {
      _reader.close();
    } catch (final Exception e) {
      final String error = MyUtilities.getStackTrace(e);
      LOG.info(error);
    }
  }

  /*
   * whatever is inside this method is done only once
   */
  private void eofFinalization() {
    printContent();

    if (!MyUtilities.isAckEveryTuple(getConf()))
      if (getHierarchyPosition() == FINAL_COMPONENT) {
        if (!_hasSentEOF) {
          _hasSentEOF = true; // to ensure we will not send multiple
          // EOF per single spout
          getCollector().emit(SystemParameters.EOF_STREAM,
              new Values(SystemParameters.EOF));
        }
      } else if (!_hasSentLastAck) {
        LOG.info(getID() + ":Has sent last_ack, tuples sent:" + _numSentTuples);
        _hasSentLastAck = true;
        final List<String> lastTuple = new ArrayList<String>(
            Arrays.asList(SystemParameters.LAST_ACK));
        tupleSend(lastTuple, null, 0);
      }
  }

  @Override
  public void fail(Object msgId) {
    throw new RuntimeException("Failing tuple in " + getID());
  }

  @Override
  public ChainOperator getChainOperator() {
    return _operatorChain;
  }

  // StormComponent
  @Override
  public String getInfoID() {
    final StringBuilder sb = new StringBuilder();
    sb.append("Table ").append(getID()).append(" has ID: ").append(getID());
    return sb.toString();
  }

  @Override
  public long getNumSentTuples() {
    return _numSentTuples;
  }

  public long getPendingTuples() {
    return _pendingTuples;
  }

  // from IRichSpout interface
  @Override
  public void nextTuple() {
    if (_firstTime && MyUtilities.isAggBatchOutputMode(_aggBatchOutputMillis)) {
      _periodicAggBatch = new PeriodicAggBatchSend(_aggBatchOutputMillis, this);
      _firstTime = false;
    }

    if (SystemParameters.isExisting(getConf(), "TIMEOUT_1MS_EVERY_XTH")) {
      // Obsolete - this is for compatibility with old configurations
      final long timeout = 1;
      final int freqWait = SystemParameters.getInt(getConf(), "TIMEOUT_1MS_EVERY_XTH");
      if (_numSentTuples > 0 && _numSentTuples % freqWait == 0)
        Utils.sleep(timeout);
    }
   
    if (SystemParameters.isExisting(getConf(), "TIMEOUT_EVERY_X_TUPLE") && SystemParameters.isExisting(getConf(), "TIMEOUT_X_MS")) {
      final int freqWait = SystemParameters.getInt(getConf(), "TIMEOUT_EVERY_X_TUPLE");     
      final long timeout = SystemParameters.getInt(getConf(), "TIMEOUT_X_MS");
      if (_numSentTuples > 0 && _numSentTuples % freqWait == 0)
        Utils.sleep(timeout);
    }
   

    final String line = readLine();
    if (line == null) {
      if (!_hasReachedEOF) {
        _hasReachedEOF = true;
        // we reached EOF, first time this happens we invoke the method:
        eofFinalization();
      }
      sendEOF();
      // sleep since we are not going to do useful work,
      // but still are looping in nextTuple method
      Utils.sleep(SystemParameters.EOF_TIMEOUT_MILLIS);
      return;
    }

    final List<String> tuple = MyUtilities.fileLineToTuple(line, getConf());
    applyOperatorsAndSend(tuple);
  }

  // BaseRichSpout
  @Override
  public void open(Map map, TopologyContext tc, SpoutOutputCollector collector) {
    super.open(map, tc, collector);
    try {
      _fileSection = tc.getThisTaskIndex();
      _reader = new SerializableFileInputStream(new File(_inputPath), 1 * 1024 * 1024,
          _fileSection, _fileParts);

    } catch (final Exception e) {
      final String error = MyUtilities.getStackTrace(e);
      LOG.info(error);
      throw new RuntimeException("Filename not found:" + error);
    }
  }

  // HELPER methods
  protected String readLine() {
    String text = null;
    try {
      text = _reader.readLine();
    } catch (final IOException e) {
      final String errMessage = MyUtilities.getStackTrace(e);
      LOG.info(errMessage);
    }
    return text;
  }

  /*
   * sending EOF in AckEveryTuple mode when we send at least one tuple to the
   * next component
   */
  private void sendEOF() {
    if (MyUtilities.isAckEveryTuple(getConf()))
      if (_pendingTuples == 0)
        if (!_hasSentEOF) {
          _hasSentEOF = true;
          getCollector().emit(SystemParameters.EOF_STREAM,
              new Values(SystemParameters.EOF));
        }
  }
}
TOP

Related Classes of plan_runner.storm_components.StormDataSource

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.