TupleTextInputFormat.NO_ESCAPE_CHARACTER, false, false, TupleTextInputFormat.NO_NULL_STRING,
fileSchema, recordProcessor);
}
// partition the dataset by pagename - which should give a fair even distribution.
tableBuilder.partitionBy("pagename");
// create a compound index on pagename, date so that typical queries for the dataset will be fast
tableBuilder.createIndex("pagename", "date");
long nonExactPageSize = memoryForIndexing / 32000; // number of pages
int pageSize = (int) Math.pow(2, (int) Math.round(Math.log(nonExactPageSize) / Math.log(2)));