jobConf.setBoolean(ExtractingParams.IGNORE_TIKA_EXCEPTION, false);
MapReduceIndexerTool tool;
int res;
QueryResponse results;
HttpSolrServer server = new HttpSolrServer(cloudJettys.get(0).url);
String[] args = new String[] {
"--solr-home-dir=" + MINIMR_CONF_DIR.getAbsolutePath(),
"--output-dir=" + outDir.toString(),
"--mappers=3",
++numRuns % 2 == 0 ? "--input-list=" + INPATH.toString() : dataDir.toString(),
"--shard-url", cloudJettys.get(0).url,
"--shard-url", cloudJettys.get(1).url,
"--shard-url", cloudJettys.get(2).url,
"--go-live-threads", Integer.toString(random().nextInt(15) + 1),
"--verbose",
"--go-live"
};
args = prependInitialArgs(args);
if (true) {
tool = new MapReduceIndexerTool();
res = ToolRunner.run(jobConf, tool, args);
assertEquals(0, res);
assertTrue(tool.job.isComplete());
assertTrue(tool.job.isSuccessful());
results = server.query(new SolrQuery("*:*"));
assertEquals(20, results.getResults().getNumFound());
}
fs.delete(inDir, true);
fs.delete(outDir, true);
fs.delete(dataDir, true);
assertTrue(fs.mkdirs(inDir));
INPATH = upAvroFile(fs, inDir, DATADIR, dataDir, inputAvroFile2);
args = new String[] {
"--solr-home-dir=" + MINIMR_CONF_DIR.getAbsolutePath(),
"--output-dir=" + outDir.toString(),
"--mappers=3",
"--verbose",
"--go-live",
++numRuns % 2 == 0 ? "--input-list=" + INPATH.toString() : dataDir.toString(),
"--shard-url", cloudJettys.get(0).url,
"--shard-url", cloudJettys.get(1).url,
"--shard-url", cloudJettys.get(2).url,
"--go-live-threads", Integer.toString(random().nextInt(15) + 1)
};
args = prependInitialArgs(args);
if (true) {
tool = new MapReduceIndexerTool();
res = ToolRunner.run(jobConf, tool, args);
assertEquals(0, res);
assertTrue(tool.job.isComplete());
assertTrue(tool.job.isSuccessful());
results = server.query(new SolrQuery("*:*"));
assertEquals(22, results.getResults().getNumFound());
}
// try using zookeeper
String collection = "collection1";
if (random().nextBoolean()) {
// sometimes, use an alias
createAlias("updatealias", "collection1");
collection = "updatealias";
}
fs.delete(inDir, true);
fs.delete(outDir, true);
fs.delete(dataDir, true);
INPATH = upAvroFile(fs, inDir, DATADIR, dataDir, inputAvroFile3);
cloudClient.deleteByQuery("*:*");
cloudClient.commit();
assertEquals(0, cloudClient.query(new SolrQuery("*:*")).getResults().getNumFound());
args = new String[] {
"--output-dir=" + outDir.toString(),
"--mappers=3",
"--reducers=12",
"--fanout=2",
"--verbose",
"--go-live",
++numRuns % 2 == 0 ? "--input-list=" + INPATH.toString() : dataDir.toString(),
"--zk-host", zkServer.getZkAddress(),
"--collection", collection
};
args = prependInitialArgs(args);
if (true) {
tool = new MapReduceIndexerTool();
res = ToolRunner.run(jobConf, tool, args);
assertEquals(0, res);
assertTrue(tool.job.isComplete());
assertTrue(tool.job.isSuccessful());
SolrDocumentList resultDocs = executeSolrQuery(cloudClient, "*:*");
assertEquals(RECORD_COUNT, resultDocs.getNumFound());
assertEquals(RECORD_COUNT, resultDocs.size());
// perform updates
for (int i = 0; i < RECORD_COUNT; i++) {
SolrDocument doc = resultDocs.get(i);
SolrInputDocument update = new SolrInputDocument();
for (Map.Entry<String, Object> entry : doc.entrySet()) {
update.setField(entry.getKey(), entry.getValue());
}
update.setField("user_screen_name", "Nadja" + i);
update.removeField("_version_");
cloudClient.add(update);
}
cloudClient.commit();
// verify updates
SolrDocumentList resultDocs2 = executeSolrQuery(cloudClient, "*:*");
assertEquals(RECORD_COUNT, resultDocs2.getNumFound());
assertEquals(RECORD_COUNT, resultDocs2.size());
for (int i = 0; i < RECORD_COUNT; i++) {
SolrDocument doc = resultDocs.get(i);
SolrDocument doc2 = resultDocs2.get(i);
assertEquals(doc.getFirstValue("id"), doc2.getFirstValue("id"));
assertEquals("Nadja" + i, doc2.getFirstValue("user_screen_name"));
assertEquals(doc.getFirstValue("text"), doc2.getFirstValue("text"));
// perform delete
cloudClient.deleteById((String)doc.getFirstValue("id"));
}
cloudClient.commit();
// verify deletes
assertEquals(0, executeSolrQuery(cloudClient, "*:*").size());
}
cloudClient.deleteByQuery("*:*");
cloudClient.commit();
assertEquals(0, cloudClient.query(new SolrQuery("*:*")).getResults().getNumFound());
server.shutdown();
// try using zookeeper with replication
String replicatedCollection = "replicated_collection";
createCollection(replicatedCollection, 11, 3, 11);