tableBuilder.initialSQL("pragma page_size=" + pageSize);
// insertion order is very important for optimizing query speed because it makes data be co-located in disk
tableBuilder.insertionSortOrder(OrderBy.parse("pagename:asc, date:asc"));
// instantiate a TablespaceBuilder
TablespaceBuilder tablespaceBuilder = new TablespaceBuilder();
// we will partition this dataset in as many partitions as:
tablespaceBuilder.setNPartitions(nPartitions);
tablespaceBuilder.add(tableBuilder.build());
// we turn a specific SQLite pragma on for making autocomplete queries fast
tablespaceBuilder.initStatements("pragma case_sensitive_like=true;");
HadoopUtils.deleteIfExists(outFs, outPath);
// finally, instantiate a TablespaceGenerator and execute it
TablespaceGenerator tablespaceViewBuilder;
if(generateTupleFiles) {
// we subclass TablespaceGenerator to be able to run the generation without outputting the SQLite stores, for
// benchmark comparisons.
// In the future this feature may be useful in general for debugging store creation.
tablespaceViewBuilder = new TablespaceGenerator(tablespaceBuilder.build(), outPath, this.getClass()) {
@Override
public void generateView(Configuration conf, SamplingType samplingType,
SamplingOptions samplingOptions) throws Exception {
prepareOutput(conf);
final int nPartitions = tablespace.getnPartitions();
if(nPartitions > 1) {
partitionMap = sample(nPartitions, conf, samplingType, samplingOptions);
} else {
partitionMap = PartitionMap.oneShardOpenedMap();
}
writeOutputMetadata(conf);
TupleMRBuilder builder = createMRBuilder(nPartitions, conf);
// Set a TupleOutput here instead of SQLiteOutput
builder.setOutput(new Path(outputPath, OUT_STORE), new TupleOutputFormat(tableSchema),
ITuple.class, NullWritable.class);
executeViewGeneration(builder);
}
};
} else {
// ... otherwise a standard TablespaceGenerator is used.
tablespaceViewBuilder = new TablespaceGenerator(tablespaceBuilder.build(), outPath, this.getClass());
}
tablespaceViewBuilder.generateView(getConf(), SamplingType.FULL_SCAN,
new TupleSampler.FullScanSamplingOptions());
}