for( Projectable aFunc:projections )
{
Column[] requiredInputColumns = aFunc.getInputColumns();
for( Column aColumn:requiredInputColumns )
{
Dataset aDataset = aColumn.getDataset();
// make sure the <code>aDataset</code> within the participated datasets
boolean withinSelectedDataset = false;
for( Dataset aSelectedDataset:this.datasets )
{
if( aSelectedDataset.equals(aDataset) )
{
withinSelectedDataset = true;
break;
}
}
if( !withinSelectedDataset )
{
// user select a column from a dataset that doesn't
// in the selected datasets in this join/group by job.
throw new IllegalArgumentException(aColumn.toString()+" does not within the selected datasets " +
"in this join/group task, please select columns only from the selected datasets.");
}
List<Column> projectablesInADataset = null;
if ( (projectablesInADataset=datasetToColumns.get(aDataset))==null )
{
projectablesInADataset = new LinkedList<Column>();
datasetToColumns.put(aDataset, projectablesInADataset);
}
if( !projectablesInADataset.contains(aColumn) )
projectablesInADataset.add(aColumn);
}
}
if( datasetToColumns.keySet().size()!=this.datasets.length )
{
throw new IllegalArgumentException("Please select at least one column from each dataset in the join/group-by job.");
}
// SETUP JOB
if( this.userDefinedConf!=null )
{
this.jobConf = new JobConf(Util.merge(this.jobConf, this.userDefinedConf));
}
this.jobConf.setJarByClass(job.getClass());
this.jobConf.setMapOutputKeyClass(DataJoinKey.class);
this.jobConf.setMapOutputValueClass(DataJoinValue.class);
this.jobConf.setPartitionerClass (DataJoinKeyPartitioner.class);
this.jobConf.setOutputValueGroupingComparator (DataJoinKey.Comparator.class);
this.jobConf.setOutputKeyComparatorClass (DataJoinKey.class);
this.jobConf.setReducerClass(DefaultMobiusReducer.class);
this.jobConf.set(ConfigureConstants.PROJECTION_COLUMNS, SerializableUtil.serializeToBase64(projections));
JobSetup.setupOutputs(this.jobConf, output, outputFormat);
// setup input paths, projection columns for each datasets.
for( byte assignedDatasetID=0;assignedDatasetID<this.datasets.length;assignedDatasetID++)
{
Dataset aDataset = this.datasets[assignedDatasetID];
// setup input for each dataset
JobSetup.setupInputs(jobConf, aDataset, assignedDatasetID);
// setup projection for each dataset