// This method is called one time per each partition
public void initPartition(int partition, Path local) throws IOException, InterruptedException {
try {
LOG.info("Initializing SQL connection [" + partition + "]");
SQLiteConnection conn = new SQLiteConnection(new File(local.toString()));
// Change the default temp_store_directory, otherwise we may run out of disk space as it will go to /var/tmp
// In EMR the big disks are at /mnt
// It suffices to set it to . as it is the tasks' work directory
// Warning: this pragma is deprecated and may be removed in further versions, however there is no choice
// other than recompiling SQLite or modifying the environment.
conn.open(true);
conn.exec("PRAGMA temp_store_directory = '" + new File(".").getAbsolutePath() + "'");
SQLiteStatement st = conn.prepare("PRAGMA temp_store_directory");
st.step();
LOG.info("Changed temp_store_directory to: " + st.columnString(0));
// journal_mode=OFF speeds up insertions
conn.exec("PRAGMA journal_mode=OFF");
/*
* page_size is one of of the most important parameters for speed up indexation. SQLite performs a merge sort for
* sorting data before inserting it in an index. The buffer SQLites uses for sorting has a size equals to
* page_size * SQLITE_DEFAULT_TEMP_CACHE_SIZE. Unfortunately, SQLITE_DEFAULT_TEMP_CACHE_SIZE is a compilation
* parameter. That is then fixed to the sqlite4java library used. We have recompiled that library to increase
* SQLITE_DEFAULT_TEMP_CACHE_SIZE (up to 32000 at the point of writing this lines), so, at runtime the unique way
* to change the buffer size used for sorting is change the page_size. page_size must be changed BEFORE CREATE
* STATEMENTS, otherwise it won't have effect. page_size should be a multiple of the sector size (1024 on linux)
* in order to be efficient.
*/
conn.exec("PRAGMA page_size=8192;");
connCache.put(partition, conn);
// Init transaction
for(String sql : getPreSQL()) {
LOG.info("Executing: " + sql);
conn.exec(sql);
}
conn.exec("BEGIN");
Map<String, SQLiteStatement> stMap = new HashMap<String, SQLiteStatement>();
stCache.put(partition, stMap);
} catch(SQLiteException e) {
throw new IOException(e);
} catch(SploutSQLOutputFormatException e) {