}
}
// For each input Tuple from this File execute the RecordProcessor
// The Default IdentityRecordProcessor just bypasses the same Tuple
ITuple processedTuple = null;
try {
processedTuple = recordProcessor.process(fileTuple, counterInterface);
} catch (Throwable e1) {
throw new RuntimeException(e1);
}
if (processedTuple == null) {
// The tuple has been filtered out by the user
return;
}
// Get the partition Id from this record
String strKey = "";
try {
strKey = getPartitionByKey(processedTuple, tableSpec, jsEngine);
} catch (Throwable e) {
throw new RuntimeException(e);
}
int shardId = partitionMap.findPartition(strKey);
if (shardId == -1) {
throw new RuntimeException(
"shard id = -1 must be some sort of software bug. This shouldn't happen if PartitionMap is complete.");
}
// Finally write it to the Hadoop output
for (Field field : processedTuple.getSchema().getFields()) {
tableTuple.set(field.getName(), processedTuple.get(field.getName()));
}
tableTuple.set(SploutSQLOutputFormat.PARTITION_TUPLE_FIELD, shardId);
collector.write(tableTuple);
}
}, inputFile.getSpecificHadoopInputFormatContext());
}
}
tableSpecs.add(table.getTableSpec());
}
// We do the same for the replicated tables but the Mapper logic will be different
// We will send the data to all the partitions
for (final Table table : tablespace.getReplicateAllTables()) {
List<Field> fields = new ArrayList<Field>();
fields.addAll(table.getTableSpec().getSchema().getFields());
fields.add(SploutSQLOutputFormat.getPartitionField());
final Schema tableSchema = new Schema(table.getTableSpec().getSchema().getName(), fields);
schemaCounter++;
builder.addIntermediateSchema(NullableSchema.nullableSchema(tableSchema));
// For each input file for the Table we add an input and a TupleMapper
for (TableInput inputFile : table.getFiles()) {
final RecordProcessor recordProcessor = inputFile.getRecordProcessor();
for (Path path : inputFile.getPaths()) {
builder.addInput(path, inputFile.getFormat(), new TupleMapper<ITuple, NullWritable>() {
Tuple tableTuple = new Tuple(tableSchema);
CounterInterface counterInterface = null;
@Override
public void map(ITuple key, NullWritable value, TupleMRContext context, Collector collector)
throws IOException, InterruptedException {
if (counterInterface == null) {
counterInterface = new CounterInterface(context.getHadoopContext());
}
// For each input Tuple from this File execute the RecordProcessor
// The Default IdentityRecordProcessor just bypasses the same Tuple
ITuple processedTuple = null;
try {
processedTuple = recordProcessor.process(key, counterInterface);
} catch (Throwable e1) {
throw new RuntimeException(e1);
}
if (processedTuple == null) {
// The tuple has been filtered out by the user
return;
}
// Finally write it to the Hadoop output
for (Field field : processedTuple.getSchema().getFields()) {
tableTuple.set(field.getName(), processedTuple.get(field.getName()));
}
// Send the data of the replicated table to all partitions!
for (int i = 0; i < nPartitions; i++) {
tableTuple.set(SploutSQLOutputFormat.PARTITION_TUPLE_FIELD, i);