pc.getConf().setProperty("pig.exec.reducers.bytes.per.reducer", "100");
pc.getConf().setProperty("pig.exec.reducers.max", "10");
ConfigurationValidator.validatePigProperties(pc.getProperties());
conf = ConfigurationUtil.toConfiguration(pc.getProperties());
JobControlCompiler jcc = new JobControlCompiler(pc, conf);
JobControl jc=jcc.compile(mrPlan, "Test");
Job job = jc.getWaitingJobs().get(0);
long reducer=Math.min((long)Math.ceil(new File("test/org/apache/pig/test/data/passwd").length()/100.0), 10);
assertEquals(job.getJobConf().getLong("mapred.reduce.tasks",10), reducer);
// use the PARALLEL key word, it will override the estimated reducer number
query = "a = load '/passwd';" +
"b = group a by $0 PARALLEL 2;" +
"store b into 'output';";
pp = Util.buildPp(ps, query);
mrPlan = Util.buildMRPlan(pp, pc);
pc.getConf().setProperty("pig.exec.reducers.bytes.per.reducer", "100");
pc.getConf().setProperty("pig.exec.reducers.max", "10");
ConfigurationValidator.validatePigProperties(pc.getProperties());
conf = ConfigurationUtil.toConfiguration(pc.getProperties());
jcc = new JobControlCompiler(pc, conf);
jc=jcc.compile(mrPlan, "Test");
job = jc.getWaitingJobs().get(0);
assertEquals(job.getJobConf().getLong("mapred.reduce.tasks",10), 2);
final byte[] COLUMNFAMILY = Bytes.toBytes("pig");
HTable table = util.createTable(Bytes.toBytesBinary("passwd"),
COLUMNFAMILY);
// the estimation won't take effect when it apply to non-dfs or the files doesn't exist, such as hbase
query = "a = load 'hbase://passwd' using org.apache.pig.backend.hadoop.hbase.HBaseStorage('c:f1 c:f2');" +
"b = group a by $0 ;" +
"store b into 'output';";
pp = Util.buildPp(ps, query);
mrPlan = Util.buildMRPlan(pp, pc);
pc.getConf().setProperty("pig.exec.reducers.bytes.per.reducer", "100");
pc.getConf().setProperty("pig.exec.reducers.max", "10");
ConfigurationValidator.validatePigProperties(pc.getProperties());
conf = ConfigurationUtil.toConfiguration(pc.getProperties());
jcc = new JobControlCompiler(pc, conf);
jc=jcc.compile(mrPlan, "Test");
job = jc.getWaitingJobs().get(0);
assertEquals(job.getJobConf().getLong("mapred.reduce.tasks",10), 1);
util.deleteTable(Bytes.toBytesBinary("passwd"));
// In HBase 0.90.1 and above we can use util.shutdownMiniHBaseCluster()
// here instead.