Package eu.stratosphere.test.recordJobs.relational

Source Code of eu.stratosphere.test.recordJobs.relational.TPCHQuery9

/***********************************************************************************************************************
* Copyright (C) 2010-2013 by the Stratosphere project (http://stratosphere.eu)
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
* an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
* specific language governing permissions and limitations under the License.
**********************************************************************************************************************/

package eu.stratosphere.test.recordJobs.relational;

import org.apache.log4j.Logger;

import eu.stratosphere.api.common.Plan;
import eu.stratosphere.api.common.Program;
import eu.stratosphere.api.common.ProgramDescription;
import eu.stratosphere.api.java.record.operators.FileDataSink;
import eu.stratosphere.api.java.record.operators.FileDataSource;
import eu.stratosphere.api.java.record.operators.JoinOperator;
import eu.stratosphere.api.java.record.operators.MapOperator;
import eu.stratosphere.api.java.record.operators.ReduceOperator;
import eu.stratosphere.test.recordJobs.relational.query9Util.AmountAggregate;
import eu.stratosphere.test.recordJobs.relational.query9Util.FilteredPartsJoin;
import eu.stratosphere.test.recordJobs.relational.query9Util.IntPair;
import eu.stratosphere.test.recordJobs.relational.query9Util.LineItemMap;
import eu.stratosphere.test.recordJobs.relational.query9Util.OrderMap;
import eu.stratosphere.test.recordJobs.relational.query9Util.OrderedPartsJoin;
import eu.stratosphere.test.recordJobs.relational.query9Util.PartFilter;
import eu.stratosphere.test.recordJobs.relational.query9Util.PartJoin;
import eu.stratosphere.test.recordJobs.relational.query9Util.PartListJoin;
import eu.stratosphere.test.recordJobs.relational.query9Util.PartsuppMap;
import eu.stratosphere.test.recordJobs.relational.query9Util.StringIntPair;
import eu.stratosphere.test.recordJobs.relational.query9Util.StringIntPairStringDataOutFormat;
import eu.stratosphere.test.recordJobs.relational.query9Util.SupplierMap;
import eu.stratosphere.test.recordJobs.relational.query9Util.SuppliersJoin;
import eu.stratosphere.test.recordJobs.util.IntTupleDataInFormat;
import eu.stratosphere.types.IntValue;

/**
* Quote from the TPC-H homepage:
* "The TPC-H is a decision support benchmark on relational data.
* Its documentation and the data generator (DBGEN) can be found
* on http://www.tpc.org/tpch/ .This implementation is tested with
* the DB2 data format."
* This PACT program implements the query 9 of the TPC-H benchmark:
*
* <pre>
* select nation, o_year, sum(amount) as sum_profit
* from (
*   select n_name as nation, extract(year from o_orderdate) as o_year,
*          l_extendedprice * (1 - l_discount) - ps_supplycost * l_quantity as amount
*   from part, supplier, lineitem, partsupp, orders, nation
*   where
*     s_suppkey = l_suppkey and ps_suppkey = l_suppkey and ps_partkey = l_partkey
*     and p_partkey = l_partkey and o_orderkey = l_orderkey and s_nationkey = n_nationkey
*     and p_name like '%[COLOR]%'
* ) as profit
* group by nation, o_year
* order by nation, o_year desc;
* </pre>
*
* Plan:<br>
* Match "part" and "partsupp" on "partkey" -> "parts" with (partkey, suppkey) as key
* Match "orders" and "lineitem" on "orderkey" -> "ordered_parts" with (partkey, suppkey) as key
* Match "parts" and "ordered_parts" on (partkey, suppkey) -> "filtered_parts" with "suppkey" as key
* Match "supplier" and "nation" on "nationkey" -> "suppliers" with "suppkey" as key
* Match "filtered_parts" and "suppliers" on" suppkey" -> "partlist" with (nation, o_year) as key
* Group "partlist" by (nation, o_year), calculate sum(amount)
*
* <b>Attention:</b> The "order by" part is not implemented!
*
*/
@SuppressWarnings("serial")
public class TPCHQuery9 implements Program, ProgramDescription {
  public final String ARGUMENTS = "dop partInputPath partSuppInputPath ordersInputPath lineItemInputPath supplierInputPath nationInputPath outputPath";

  private static Logger LOGGER = Logger.getLogger(TPCHQuery9.class);

  private int degreeOfParallelism = 1;

  private String partInputPath, partSuppInputPath, ordersInputPath, lineItemInputPath, supplierInputPath,
      nationInputPath;

  private String outputPath;


  @Override
  public Plan getPlan(String... args) throws IllegalArgumentException {

    if (args.length != 8)
    {
      LOGGER.warn("number of arguments do not match!");
     
      this.degreeOfParallelism = 1;
      this.partInputPath = "";
      this.partSuppInputPath = "";
      this.ordersInputPath = "";
      this.lineItemInputPath = "";
      this.supplierInputPath = "";
      this.nationInputPath = "";
      this.outputPath = "";
    }else
    {
      this.degreeOfParallelism = Integer.parseInt(args[0]);
      this.partInputPath = args[1];
      this.partSuppInputPath = args[2];
      this.ordersInputPath = args[3];
      this.lineItemInputPath = args[4];
      this.supplierInputPath = args[5];
      this.nationInputPath = args[6];
      this.outputPath = args[7];
    }
   
    /* Create the 6 data sources: */
    /* part: (partkey | name, mfgr, brand, type, size, container, retailprice, comment) */
    FileDataSource partInput = new FileDataSource(
      new IntTupleDataInFormat(), this.partInputPath, "\"part\" source");
    //partInput.setOutputContract(UniqueKey.class);
//    partInput.getCompilerHints().setAvgNumValuesPerKey(1);

    /* partsupp: (partkey | suppkey, availqty, supplycost, comment) */
    FileDataSource partSuppInput = new FileDataSource(
      new IntTupleDataInFormat(), this.partSuppInputPath, "\"partsupp\" source");

    /* orders: (orderkey | custkey, orderstatus, totalprice, orderdate, orderpriority, clerk, shippriority, comment) */
    FileDataSource ordersInput = new FileDataSource(
      new IntTupleDataInFormat(), this.ordersInputPath, "\"orders\" source");
    //ordersInput.setOutputContract(UniqueKey.class);
//    ordersInput.getCompilerHints().setAvgNumValuesPerKey(1);

    /* lineitem: (orderkey | partkey, suppkey, linenumber, quantity, extendedprice, discount, tax, ...) */
    FileDataSource lineItemInput = new FileDataSource(
      new IntTupleDataInFormat(), this.lineItemInputPath, "\"lineitem\" source");

    /* supplier: (suppkey | name, address, nationkey, phone, acctbal, comment) */
    FileDataSource supplierInput = new FileDataSource(
      new IntTupleDataInFormat(), this.supplierInputPath, "\"supplier\" source");
    //supplierInput.setOutputContract(UniqueKey.class);
//    supplierInput.getCompilerHints().setAvgNumValuesPerKey(1);

    /* nation: (nationkey | name, regionkey, comment) */
    FileDataSource nationInput = new FileDataSource(
      new IntTupleDataInFormat(), this.nationInputPath, "\"nation\" source");
    //nationInput.setOutputContract(UniqueKey.class);
//    nationInput.getCompilerHints().setAvgNumValuesPerKey(1);

    /* Filter on part's name, project values to NULL: */
    MapOperator filterPart = MapOperator.builder(PartFilter.class)
      .name("filterParts")
      .build();

    /* Map to change the key element of partsupp, project value to (supplycost, suppkey): */
    MapOperator mapPartsupp = MapOperator.builder(PartsuppMap.class)
      .name("mapPartsupp")
      .build();

    /* Map to extract the year from order: */
    MapOperator mapOrder = MapOperator.builder(OrderMap.class)
      .name("mapOrder")
      .build();

    /* Project value to (partkey, suppkey, quantity, price = extendedprice*(1-discount)): */
    MapOperator mapLineItem = MapOperator.builder(LineItemMap.class)
      .name("proj.Partsupp")
      .build();

    /* - change the key of supplier to nationkey, project value to suppkey */
    MapOperator mapSupplier = MapOperator.builder(SupplierMap.class)
      .name("proj.Partsupp")
      .build();

    /* Equijoin on partkey of part and partsupp: */
    JoinOperator partsJoin = JoinOperator.builder(PartJoin.class, IntValue.class, 0, 0)
      .name("partsJoin")
      .build();

    /* Equijoin on orderkey of orders and lineitem: */
    JoinOperator orderedPartsJoin =
      JoinOperator.builder(OrderedPartsJoin.class, IntValue.class, 0, 0)
      .name("orderedPartsJoin")
      .build();

    /* Equijoin on nationkey of supplier and nation: */
    JoinOperator suppliersJoin =
      JoinOperator.builder(SuppliersJoin.class, IntValue.class, 0, 0)
      .name("suppliersJoin")
      .build();

    /* Equijoin on (partkey,suppkey) of parts and orderedParts: */
    JoinOperator filteredPartsJoin =
      JoinOperator.builder(FilteredPartsJoin.class, IntPair.class, 0, 0)
      .name("filteredPartsJoin")
      .build();

    /* Equijoin on suppkey of filteredParts and suppliers: */
    JoinOperator partListJoin =
      JoinOperator.builder(PartListJoin.class, IntValue.class , 0, 0)
      .name("partlistJoin")
      .build();

    /* Aggregate sum(amount) by (nation,year): */
    ReduceOperator sumAmountAggregate =
      ReduceOperator.builder(AmountAggregate.class, StringIntPair.class, 0)
      .name("groupyBy")
      .build();

    /* Connect input filters: */
    filterPart.setInput(partInput);
    mapPartsupp.setInput(partSuppInput);
    mapOrder.setInput(ordersInput);
    mapLineItem.setInput(lineItemInput);
    mapSupplier.setInput(supplierInput);

    /* Connect equijoins: */
    partsJoin.setFirstInput(filterPart);
    partsJoin.setSecondInput(mapPartsupp);
    orderedPartsJoin.setFirstInput(mapOrder);
    orderedPartsJoin.setSecondInput(mapLineItem);
    suppliersJoin.setFirstInput(mapSupplier);
    suppliersJoin.setSecondInput(nationInput);
    filteredPartsJoin.setFirstInput(partsJoin);
    filteredPartsJoin.setSecondInput(orderedPartsJoin);
    partListJoin.setFirstInput(filteredPartsJoin);
    partListJoin.setSecondInput(suppliersJoin);

    /* Connect aggregate: */
    sumAmountAggregate.setInput(partListJoin);

    /* Connect sink: */
    FileDataSink result = new FileDataSink(new StringIntPairStringDataOutFormat(), this.outputPath, "Results sink");
    result.setInput(sumAmountAggregate);

    Plan p = new Plan(result, "TPC-H query 9");
    p.setDefaultParallelism(this.degreeOfParallelism);
    return p;
  }

  @Override
  public String getDescription() {
    return "TPC-H query 9, parameters: " + this.ARGUMENTS;
  }
}
TOP

Related Classes of eu.stratosphere.test.recordJobs.relational.TPCHQuery9

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.