Package com.ebay.erl.mobius.core.builder

Examples of com.ebay.erl.mobius.core.builder.Dataset


    // represent property name, $datasetID.key.columns
    String joinKeyPropertyName = null;
   
    for( byte assignedDatasetID=0;assignedDatasetID<this.datasets.length;assignedDatasetID++ )
    {
      Dataset aDataset = this.datasets[assignedDatasetID];
      if( aColumn.getDataset().equals(aDataset) )
      {
        JobSetup.validateColumns(aDataset, aColumn);       
        Configuration aJobConf  = aDataset.createJobConf(assignedDatasetID);
        this.jobConf      = Util.merge(this.jobConf, aJobConf);       
        joinKeyPropertyName    = assignedDatasetID+".key.columns";
        break;
      }
    }
View Full Code Here


    for( Projectable aFunc:projections )
    {
      Column[] requiredInputColumns = aFunc.getInputColumns();
      for( Column aColumn:requiredInputColumns )
      {
        Dataset aDataset = aColumn.getDataset();
        // make sure the <code>aDataset</code> within the participated datasets
        boolean withinSelectedDataset = false;
        for( Dataset aSelectedDataset:this.datasets )
        {
          if( aSelectedDataset.equals(aDataset) )
          {
            withinSelectedDataset = true;
            break;
          }
        }
       
        if( !withinSelectedDataset )
        {
          // user select a column from a dataset that doesn't
          // in the selected datasets in this join/group by job.
          throw new IllegalArgumentException(aColumn.toString()+" does not within the selected datasets " +
              "in this join/group task, please select columns only from the selected datasets.");
        }
       
        List<Column> projectablesInADataset = null;
        if ( (projectablesInADataset=datasetToColumns.get(aDataset))==null )
        {
          projectablesInADataset = new LinkedList<Column>();
          datasetToColumns.put(aDataset, projectablesInADataset);
        }
       
        if( !projectablesInADataset.contains(aColumn) )
          projectablesInADataset.add(aColumn);
      }
    }
   
    if( datasetToColumns.keySet().size()!=this.datasets.length )
    {
      throw new IllegalArgumentException("Please select at least one column from each dataset in the join/group-by job.");
    }
   
    // SETUP JOB
    if( this.userDefinedConf!=null )
    {
      this.jobConf = new JobConf(Util.merge(this.jobConf, this.userDefinedConf));
    }
    this.jobConf.setJarByClass(job.getClass());
    this.jobConf.setMapOutputKeyClass(DataJoinKey.class);
    this.jobConf.setMapOutputValueClass(DataJoinValue.class);
    this.jobConf.setPartitionerClass (DataJoinKeyPartitioner.class);
    this.jobConf.setOutputValueGroupingComparator (DataJoinKey.Comparator.class);
    this.jobConf.setOutputKeyComparatorClass (DataJoinKey.class);
    this.jobConf.setReducerClass(DefaultMobiusReducer.class);
    this.jobConf.set(ConfigureConstants.PROJECTION_COLUMNS, SerializableUtil.serializeToBase64(projections));
   
   
   
    JobSetup.setupOutputs(this.jobConf, output, outputFormat);
   
    // setup input paths, projection columns for each datasets.
    for( byte assignedDatasetID=0;assignedDatasetID<this.datasets.length;assignedDatasetID++)
    {
      Dataset aDataset = this.datasets[assignedDatasetID];
     
      // setup input for each dataset
      JobSetup.setupInputs(jobConf, aDataset, assignedDatasetID);
     
      // setup projection for each dataset
View Full Code Here

  @Test
  public void testOverflow()
    throws IOException
  {
    Dataset ds = this.createDummyDataset("ds", new String[]{"COLUMN"});
   
    BigDecimal expected = BigDecimal.valueOf(Double.MAX_VALUE).add(BigDecimal.valueOf(Double.MAX_VALUE));
   
    Tuple t = new Tuple();
    t.put("COLUMN", Double.MAX_VALUE);
View Full Code Here

 
  @Test
  public void test1()
    throws IOException
  {
    Dataset ds = this.createDummyDataset("ds", new String[]{"COLUMN"});
    BigDecimal expected = BigDecimal.valueOf(-5D);
   
    Tuple t1 = new Tuple();
    t1.put("column", 10D);
   
View Full Code Here

    String output  = args[1];
   
    LOGGER.info("Input Path:"+input);
    LOGGER.info("Output Path:"+output);
   
    Dataset items = TSVDatasetBuilder.newInstance(this, "items_table", new String[]{"ITEM_ID", "SELLER_ID", "BUYER_ID", "ITEM_PRICE"})
      .addInputPath(new Path(input))
      .build();
   
    this.group(items)
      .by("SELLER_ID")     
View Full Code Here

  @Test
  public void test()
    throws IOException
  {
    Dataset ds = this.createDummyDataset("test", new String[]{"ID"});
   
    Concate func = new Concate(new Column(ds, "ID"), ",");
   
    List<Tuple> tuples    = new ArrayList<Tuple>();
    StringBuffer trueAnswer = new StringBuffer();
View Full Code Here

    Tuple t3 = new Tuple();
    t3.put("C1", a2);
    tuples.add(t3);
   
   
    Dataset ds = this.createDummyDataset("test", new String[]{"C1"});
    Concate func = new Concate(new Column(ds, "C1"));
    func.reset();
    for( Tuple t:tuples )
    {
      func.consume(t);
View Full Code Here

      throw new RuntimeException(e);
    }
   
    // creating a dummy dataset, the schema doesn't match the actual file schema,
    // it's just for unit testing.
    Dataset dummy = TSVDatasetBuilder.newInstance(this, name, schema)
      .addInputPath(new Path(input.getAbsolutePath()))
      .build();
   
    return dummy;
  }
View Full Code Here

   
    File input  = new File(this.getClass().getResource("/com/ebay/erl/mobius/core/items.tsv").toURI());
   
    // creating a dummy dataset, the schema doesn't match the actual file schema,
    // it's just for unit testing.
    Dataset dummy = TSVDatasetBuilder.newInstance(this, "dummy", new String[]{"COUNTS", "ID"})
      .addInputPath(new Path(input.getAbsolutePath()))
      .build();
   
    final int topK = 5;
   
View Full Code Here

  @Override
  public int run(String[] args)
    throws Exception
 
    Dataset items = TSVDatasetBuilder.newInstance(this, "items_table", new String[]{"ITEM_ID", "SELLER_ID", "BUYER_ID", "ITEM_PRICE"})
      .addInputPath(new Path(args[0]))
    .build();
   
    this.sort(items)
      .select("ITEM_ID", "ITEM_PRICE", "SELLER_ID", "BUYER_ID")
View Full Code Here

TOP

Related Classes of com.ebay.erl.mobius.core.builder.Dataset

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.