boolean usedthedate,
String custFields,
updateStatus update,String uniqCheckField,Integer parallel
) throws Exception
{
Job job = new Job(new Configuration(jconf));
JobIndexPublic.setJars(job.getConfiguration());
if (filetype.equals("seq")) {
job.setInputFormatClass(SequenceFileInputFormat.class);
for (String input : inputs) {
Path p = new Path(inputBase, "*" + input + "*/"+inputmatch+"");
System.out.println(p.toString());
FileStatus[] list=fs.globStatus(p);
if(list==null||list.length==0)
{
continue;
}
SequenceFileInputFormat.addInputPath(job, p);
}
} else {
for (String input : inputs) {
Path p = new Path(inputBase, "*" + input + "*/"+inputmatch+"");
System.out.println(p.toString());
FileStatus[] list=fs.globStatus(p);
if(list==null||list.length==0)
{
continue;
}
FileInputFormat.addInputPath(job, p);
}
}
Path baseP=new Path(output);
Path baseParent=new Path(output);
if(baseP.getParent()!=null&&baseP.getParent().getParent()!=null)
{
baseParent=baseP.getParent().getParent();
}
String jobnameOutput=new String(baseParent.toString()+"*"+baseP.getName());
int cutoutlen=50;
if(jobnameOutput.length()>cutoutlen)
{
jobnameOutput="*"+jobnameOutput.substring(jobnameOutput.length()-cutoutlen, jobnameOutput.length());
}
System.out.println("output:"+output+"@"+jobnameOutput);
System.out.println("tmp:"+smallindex.toString());
job.setJobName("mdrill_stage_1@"+jobnameOutput);
job.setJarByClass(JobIndexerPartion.class);
fs.delete(new Path(output), true);
fs.delete(smallindex, true);
Configuration conf = job.getConfiguration();
String fields = JobIndexPublic.readFieldsFromSchemaXml(solrHome+ "/solr/conf/schema.xml",fs,conf);
JobIndexPublic.setDistributecache(new Path(solrHome,"solr/conf"), fs,conf);
if(!split.isEmpty()&&!split.equals("default")&&!split.equals("\001"))
{
conf.set("higo.column.split", split);
}
conf.set("uniq.check.field", uniqCheckField);
if(split.equals("\t"))
{
conf.set("higo.column.split", "tab");
}
conf.set("higo.column.custfields", custFields);
conf.set("higo.input.base", inputBase);
conf.setBoolean("higo.column.userthedate", usedthedate);
//conf.set("mapred.reduce.slowstart.completed.maps", "0.01");
conf.set("higo.index.fields", fields);
job.setPartitionerClass(PairPartion.class) ;
job.setMapperClass(IndexMapper.class);
job.setMapOutputKeyClass(PairWriteable.class);
job.setMapOutputValueClass(DocumentMap.class);
job.setReducerClass(IndexReducer.class);
job.setOutputKeyClass(IntWritable.class);
job.setOutputValueClass(Text.class);
job.setOutputFormatClass(SequenceFileOutputFormat.class);
FileOutputFormat.setOutputPath(job, smallindex);
job.setNumReduceTasks(shards * parallel);
int result=0;
if(update!=null)
{
job.submit();
while(!job.isComplete())
{
update.update(1, job);
Thread.sleep(3000);
}
if(update.dump(job)){
return -1;
}
}else{
result=job.waitForCompletion(true)? 0 : -1;
}
if(result==0)
{
Job job2 = new Job(new Configuration(jconf));
JobIndexPublic.setJars(job2.getConfiguration());
job2.setJobName("mdrill_stage_2@"+jobnameOutput);
Configuration conf2 = job2.getConfiguration();
JobIndexPublic.setDistributecache(new Path(solrHome,"solr/conf"), fs,conf2);
conf2.set("higo.index.fields", fields);
job2.setJarByClass(JobIndexerPartion.class);
job2.setInputFormatClass(SequenceFileInputFormat.class);
SequenceFileInputFormat.addInputPath(job2, new Path(smallindex,"part-r-*"));
job2.setMapOutputKeyClass(IntWritable.class);
job2.setMapOutputValueClass(Text.class);
job2.setPartitionerClass(IntPartion.class) ;
job2.setReducerClass(IndexReducerMerge.class);
job2.setOutputKeyClass(IntWritable.class);
job2.setOutputValueClass(Text.class);
job2.setOutputFormatClass(SequenceFileOutputFormat.class);
job2.setNumReduceTasks(shards);
SequenceFileOutputFormat.setOutputPath(job2, new Path(output));
if(update!=null)
{
job2.submit();
while(!job2.isComplete())
{
update.update(2, job2);
Thread.sleep(3000);
}
update.finish();
}else{
result= job2.waitForCompletion(true) ? 0 : -1;
}
}
fs.delete(smallindex, true);