* REVISIONS
*/
private void loadRevision() throws SQLException {
// TEXT
Table textTable = this.getTableCatalog(WikipediaConstants.TABLENAME_TEXT);
String textSQL = SQLUtil.getInsertSQL(textTable);
PreparedStatement textInsert = this.conn.prepareStatement(textSQL);
// REVISION
Table revTable = this.getTableCatalog(WikipediaConstants.TABLENAME_REVISION);
String revSQL = SQLUtil.getInsertSQL(revTable);
PreparedStatement revisionInsert = this.conn.prepareStatement(revSQL);
WikipediaBenchmark b = (WikipediaBenchmark)this.benchmark;
int batchSize = 1;
Zipf h_users = new Zipf(this.rng(), 1, this.num_users, WikipediaConstants.REVISION_USER_SIGMA);
FlatHistogram<Integer> h_textLength = new FlatHistogram<Integer>(this.rng(), TextHistograms.TEXT_LENGTH);
FlatHistogram<Integer> h_commentLength = b.commentLength;
FlatHistogram<Integer> h_minorEdit = b.minorEdit;
FlatHistogram<Integer> h_nameLength = new FlatHistogram<Integer>(this.rng(), UserHistograms.NAME_LENGTH);
FlatHistogram<Integer> h_numRevisions = new FlatHistogram<Integer>(this.rng(), PageHistograms.REVISIONS_PER_PAGE);
int rev_id = 1;
int lastPercent = -1;
for (int page_id = 1; page_id <= this.num_pages; page_id++) {
// There must be at least one revision per page
int num_revised = h_numRevisions.nextValue().intValue();
// Generate what the new revision is going to be
int old_text_length = h_textLength.nextValue().intValue();
assert(old_text_length > 0);
char old_text[] = TextGenerator.randomChars(rng(), old_text_length);
for (int i = 0; i < num_revised; i++) {
// Generate the User who's doing the revision and the Page revised
// Makes sure that we always update their counter
int user_id = h_users.nextInt();
assert(user_id > 0 && user_id <= this.num_users) : "Invalid UserId '" + user_id + "'";
this.user_revision_ctr[user_id-1]++;
// Generate what the new revision is going to be
if (i > 0) {
old_text = b.generateRevisionText(old_text);
old_text_length = old_text.length;
}
char rev_comment[] = TextGenerator.randomChars(rng(), h_commentLength.nextValue().intValue());
// The REV_USER_TEXT field is usually the username, but we'll just
// put in gibberish for now
char user_text[] = TextGenerator.randomChars(rng(), h_nameLength.nextValue().intValue());
// Insert the text
int col = 1;
textInsert.setInt(col++, rev_id); // old_id
textInsert.setString(col++, new String(old_text)); // old_text
textInsert.setString(col++, "utf-8"); // old_flags
textInsert.setInt(col++, page_id); // old_page
textInsert.addBatch();
// Insert the revision
col = 1;
revisionInsert.setInt(col++, rev_id); // rev_id
revisionInsert.setInt(col++, page_id); // rev_page
revisionInsert.setInt(col++, rev_id); // rev_text_id
revisionInsert.setString(col++, new String(rev_comment)); // rev_comment
revisionInsert.setInt(col++, user_id); // rev_user
revisionInsert.setString(col++, new String(user_text)); // rev_user_text
revisionInsert.setString(col++, TimeUtil.getCurrentTimeString14()); // rev_timestamp
revisionInsert.setInt(col++, h_minorEdit.nextValue().intValue()); // rev_minor_edit
revisionInsert.setInt(col++, 0); // rev_deleted
revisionInsert.setInt(col++, 0); // rev_len
revisionInsert.setInt(col++, 0); // rev_parent_id
revisionInsert.addBatch();
// Update Last Revision Stuff
this.page_last_rev_id[page_id-1] = rev_id;
this.page_last_rev_length[page_id-1] = old_text_length;
rev_id++;
batchSize++;
} // FOR (revision)
if (batchSize > WikipediaConstants.BATCH_SIZE) {
textInsert.executeBatch();
revisionInsert.executeBatch();
this.conn.commit();
this.addToTableCount(textTable.getName(), batchSize);
this.addToTableCount(revTable.getName(), batchSize);
batchSize = 0;
if (LOG.isDebugEnabled()) {
int percent = (int) (((double) page_id / (double) this.num_pages) * 100);
if (percent != lastPercent) LOG.debug("REVISIONS (" + percent + "%)");
lastPercent = percent;
}
}
} // FOR (page)
revisionInsert.close();
textInsert.close();
if (this.getDatabaseType() == DatabaseType.POSTGRES) {
this.updateAutoIncrement(textTable.getColumn(0), rev_id);
this.updateAutoIncrement(revTable.getColumn(0), rev_id);
}
// UPDATE USER
revTable = this.getTableCatalog(WikipediaConstants.TABLENAME_USER);
String updateUserSql = "UPDATE " + revTable.getEscapedName() +
" SET user_editcount = ?, " +
" user_touched = ? " +
" WHERE user_id = ?";
PreparedStatement userUpdate = this.conn.prepareStatement(updateUserSql);
batchSize = 0;
for (int i = 0; i < this.num_users; i++) {
int col = 1;
userUpdate.setInt(col++, this.user_revision_ctr[i]);
userUpdate.setString(col++, TimeUtil.getCurrentTimeString14());
userUpdate.setInt(col++, i+1); // ids start at 1
userUpdate.addBatch();
if ((++batchSize % WikipediaConstants.BATCH_SIZE) == 0) {
userUpdate.executeBatch();
this.conn.commit();
userUpdate.clearBatch();
batchSize = 0;
}
} // FOR
if (batchSize > 0) {
userUpdate.executeBatch();
this.conn.commit();
userUpdate.clearBatch();
}
userUpdate.close();
// UPDATE PAGES
revTable = this.getTableCatalog(WikipediaConstants.TABLENAME_PAGE);
String updatePageSql = "UPDATE " + revTable.getEscapedName() +
" SET page_latest = ?, " +
" page_touched = ?, " +
" page_is_new = 0, " +
" page_is_redirect = 0, " +
" page_len = ? " +