public void InitializeDatabase() {
// Add indices:
try
{
PropertiesManager pm = new PropertiesManager();
////////////////////////
//
// Remove old indexes, mostly just old code that is no longer needed
//
dropIndexIfItExists(DbManager.getDocument().getContent(), CompressedFullTextPojo.url_, 1);
dropIndexIfItExists(DbManager.getDocument().getContent(), CompressedFullTextPojo.sourceKey_, 2);
dropIndexIfItExists(DbManager.getDocument().getMetadata(), DocumentPojo.sourceUrl_, 1);
dropIndexIfItExists(DbManager.getDocument().getMetadata(), DocumentPojo.sourceKey_, 1);
dropIndexIfItExists(DbManager.getDocument().getMetadata(), DocumentPojo.title_, 1);
// (Title simply not needed, that was a mistake from an early iteration)
dropIndexIfItExists(DbManager.getDocument().getMetadata(), DocumentPojo.updateId_, 1);
dropIndexIfItExists(DbManager.getSocial().getShare(), "type", 1);
dropIndexIfItExists(DbManager.getSocial().getCookies(), "apiKey", 1);
dropIndexIfItExists(DbManager.getCustom().getLookup(),CustomMapReduceJobPojo.jobidS_, 2);
dropIndexIfItExists(DbManager.getCustom().getLookup(),CustomMapReduceJobPojo.waitingOn_, 2);
// (see shard keys below, these legacy ones can appear if the DB is restored from a different machine's backup)
dropIndexIfNotNeeded(DbManager.getDocument().getContent(), "sourceKey_1_url_1", 0, "sourceKey_2_url_2", 0);
dropIndexIfNotNeeded(DbManager.getDocument().getMetadata(), "sourceKey_1__id_1", 0, "sourceKey_1__id_-1", 0);
////////////////////////
//
// Indexes needed for sharding:
//
// ** Content (has changed a bit)
BasicDBObject compIndex = new BasicDBObject(CompressedFullTextPojo.sourceKey_, 1);
compIndex.put(CompressedFullTextPojo.url_, 1);
addIndexIfNeeded(DbManager.getDocument().getContent(), "sourceKey_2_url_2", 0, compIndex); // (remove legacy 2_2 and replace with 1_1, which supports shards)
// ** Metadata
// Add {_id:1} to "standalone" sourceKey, sort docs matching source key by "time" (sort of!)
compIndex = new BasicDBObject(DocumentPojo.sourceKey_, 1);
compIndex.put(DocumentPojo._id_, 1);
addIndexIfNeeded(DbManager.getDocument().getMetadata(), "sourceKey_1__id_-1", 0, compIndex); // (remove legacy 1_-1 and replace with 1_1, which supports shards)
// ** Entities and associations
DbManager.getFeature().getEntity().ensureIndex(new BasicDBObject(EntityFeaturePojo.index_, 1));
DbManager.getFeature().getAssociation().ensureIndex(new BasicDBObject(AssociationFeaturePojo.index_, 1));
////////////////////////
//
// Other indexes
//
// Needed to handle updates of large files containing many URLs:
DbManager.getDocument().getMetadata().ensureIndex(new BasicDBObject(DocumentPojo.sourceUrl_, 2), new BasicDBObject(MongoDbManager.sparse_, true));
// Needed for duplicate checking
// (Compound index lets me access {url, sourceKey}, {url} efficiently ... but need sourceKey separately to do {sourceKey})
compIndex = new BasicDBObject(DocumentPojo.url_, 1);
compIndex.put(DocumentPojo.sourceKey_, 1);
DbManager.getDocument().getMetadata().ensureIndex(compIndex);
// Needed to handle document updates
DbManager.getDocument().getMetadata().ensureIndex(new BasicDBObject(DocumentPojo.updateId_, 2), new BasicDBObject(MongoDbManager.sparse_, true));
// Needed to update documents' entities' doc counts
if (!pm.getAggregationDisabled()) {
compIndex = new BasicDBObject(EntityPojo.docQuery_index_, 1);
compIndex.put(DocumentPojo.communityId_, 1);
DbManager.getDocument().getMetadata().ensureIndex(compIndex);
}
// Needed for keeping source/community doc counts