Package plan_runner.utilities

Source Code of plan_runner.utilities.LocalMergeResults

package plan_runner.utilities;

import java.io.IOException;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.concurrent.Semaphore;

import org.apache.log4j.Logger;

import plan_runner.conversion.TypeConversion;
import plan_runner.expressions.ColumnReference;
import plan_runner.operators.AggregateAvgOperator;
import plan_runner.operators.AggregateOperator;
import plan_runner.operators.AggregateSumOperator;
import plan_runner.storage.AggregationStorage;
import plan_runner.storm_components.StormComponent;

public class LocalMergeResults {
  private static Logger LOG = Logger.getLogger(LocalMergeResults.class);

  // for writing the full final result in Local Mode
  private static int _collectedLastComponents = 0;
  private static int _numTuplesProcessed = 0;
  // the number of tuples the componentTask is reponsible for (!! not how many
  // tuples are in storage!!)

  private static AggregateOperator _computedAgg;
  private static AggregateOperator _fileAgg;
  private static Semaphore _semFullResult = new Semaphore(1, true);

  private static void addMoreResults(AggregateOperator lastAgg, Map map) {
    if (_computedAgg == null) {
      // first task of the last component asked to be added
      // we create empty aggregations, which we later fill, one from
      // tasks, other from a file
      _computedAgg = createOverallAgg(lastAgg, map);
      _fileAgg = (AggregateOperator) DeepCopy.copy(_computedAgg);
      fillAggFromResultFile(map);
    }
    ((AggregationStorage) _computedAgg.getStorage()).addContent((AggregationStorage) (lastAgg
        .getStorage()));
  }

  private static AggregateOperator createOverallAgg(AggregateOperator lastAgg, Map map) {
    final TypeConversion wrapper = lastAgg.getType();
    AggregateOperator overallAgg;

    ColumnReference cr;
    if (lastAgg.hasGroupBy())
      cr = new ColumnReference(wrapper, 1);
    else
      cr = new ColumnReference(wrapper, 0);

    if (lastAgg instanceof AggregateAvgOperator)
      overallAgg = new AggregateAvgOperator(cr, map);
    else
      overallAgg = new AggregateSumOperator(cr, map);

    if (lastAgg.hasGroupBy())
      overallAgg.setGroupByColumns(Arrays.asList(0));

    return overallAgg;
  }

  private static void fillAggFromResultFile(Map map) {
    try {
      final String path = getResultFilePath(map);
      final List<String> lines = MyUtilities.readFileLinesSkipEmpty(path);

      for (final String line : lines) {
        // List<String> tuple = Arrays.asList(line.split("\\s+=\\s+"));
        // we want to catch exactly one space between and after =.
        // tuple might consist of spaces as well
        final List<String> tuple = Arrays.asList(line.split(" = "));
        _fileAgg.process(tuple);
      }
    } catch (final IOException ex) {
      // problem with finding the result file
      _fileAgg = null;
    }
  }

  // getting size information - from path "../test/data/tpch/0.01G",
  // it extracts dataSize = 0.01G
  // For Squall (not in Squall Plan Runner) there is DIP_DB_SIZE,
  // but this method has to be used for PlanRunner as well.
  private static String getDataSizeInfo(Map map) {
    final String path = SystemParameters.getString(map, "DIP_DATA_PATH");
    return MyUtilities.getPartFromEnd(path, 0);
  }

  // this has to be a separate method, because we don't want Exception if
  // DIP_RESULT_ROOT is not set
  private static String getResultDir(Map map) {
    String resultRoot = "";
    if (SystemParameters.isExisting(map, "DIP_RESULT_ROOT"))
      resultRoot = SystemParameters.getString(map, "DIP_RESULT_ROOT");
    return resultRoot;
  }

  private static String getResultFilePath(Map map) {
    final String rootDir = getResultDir(map);
    final String schemaName = getSchemaName(map);
    final String dataSize = getDataSizeInfo(map);
    final String queryName = SystemParameters.getString(map, "DIP_QUERY_NAME");
    return rootDir + "/" + schemaName + "/" + dataSize + "/" + queryName + ".result";
  }

  /*
   * from "../test/data/tpch/0.01G" as dataPath, return tpch
   */
  private static String getSchemaName(Map map) {
    final String path = SystemParameters.getString(map, "DIP_DATA_PATH");
    return MyUtilities.getPartFromEnd(path, 1);
  }

  // The following 2 methods are crucial for collecting, printing and
  // comparing the results in Local Mode
  // called on the component task level, when all Spouts fully propagated
  // their tuples
  public static void localCollectFinalResult(AggregateOperator lastAgg, int hierarchyPosition,
      Map map, Logger log) {
    if ((!SystemParameters.getBoolean(map, "DIP_DISTRIBUTED"))
        && hierarchyPosition == StormComponent.FINAL_COMPONENT)
      try {
        // prepare it for printing at the end of the execution
        _semFullResult.acquire();

        _collectedLastComponents++;
        _numTuplesProcessed += lastAgg.getNumTuplesProcessed();
        addMoreResults(lastAgg, map);

        _semFullResult.release();
      } catch (final InterruptedException ex) {
        throw new RuntimeException("InterruptedException unexpectedly occured!");
      }
  }

  private static void localCompare(Map map) {
    if (_fileAgg == null) {
      LOG.info("\nCannot validate the result, result file " + getResultFilePath(map)
          + " does not exist."
          + "\n  Make sure you specified correct DIP_RESULT_ROOT and"
          + "\n  created result file with correct name.");
      return;
    }
    if (_computedAgg.getStorage().equals(_fileAgg.getStorage()))
      LOG.info("\nOK: Expected result achieved for "
          + SystemParameters.getString(map, "DIP_TOPOLOGY_NAME"));
    else {
      final StringBuilder sb = new StringBuilder();
      sb.append("\nPROBLEM: Not expected result achieved for ").append(
          SystemParameters.getString(map, "DIP_TOPOLOGY_NAME"));
      sb.append("\nCOMPUTED: \n").append(_computedAgg.printContent());
      sb.append("\nFROM THE RESULT FILE: \n").append(_fileAgg.printContent());
      LOG.info(sb.toString());
    }
  }

  private static void localPrint(String finalResult, Map map) {
    final StringBuilder sb = new StringBuilder();
    sb.append("\nThe full result for topology ");
    sb.append(SystemParameters.getString(map, "DIP_TOPOLOGY_NAME")).append(".");
    sb.append("\nCollected from ").append(_collectedLastComponents)
        .append(" component tasks of the last component.");
    sb.append("\nAll the tasks of the last component in total received ")
        .append(_numTuplesProcessed).append(" tuples.");
    sb.append("\n").append(finalResult);
    LOG.info(sb.toString());
  }

  // called just before killExecution
  // only for local mode, since they are executed in a single process, sharing
  // all the classes
  // we need it due to collectedLastComponents, and lines of result
  // in cluster mode, they can communicate only through conf file
  public static void localPrintAndCompare(Map map) {
    localPrint(_computedAgg.printContent(), map);
    localCompare(map);
  }
}
TOP

Related Classes of plan_runner.utilities.LocalMergeResults

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.