package com.alimama.mdrill.index;
import java.io.File;
import java.io.IOException;
import java.text.SimpleDateFormat;
import java.util.Comparator;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Set;
import java.util.Map.Entry;
import java.util.TreeMap;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.util.Tool;
import com.alimama.mdrill.partion.MdrillPartions;
import com.alimama.mdrill.partion.MdrillPartionsInterface;
import com.alimama.mdrill.utils.HadoopUtil;
import com.alimama.mdrill.utils.IndexUtils;
import com.alimama.mdrill.utils.TryLockFile;
public class JobIndexerPartion extends Configured implements Tool {
private int shards;
private String solrHome;
private FileSystem fs;
private String inputBase;
private int dayDelay = 10;
private int maxRunDays = 365;
private String startday = "19831107";
private String filetype = "txt";
private Path tmp;
private Path workDir;
private Path index;
private String type="default";
private JobIndexParse parse=null;
private MdrillPartionsInterface mdrillpartion;
TryLockFile flock=null;
public JobIndexerPartion(String tablename,Configuration conf,int _shards, String _solrHome, String _inputBase, int _dayplus,int _maxRunDays, String _startday, String _filetype,String type)
throws IOException {
this.shards = _shards;
this.solrHome = _solrHome;
this.tmp = new Path(_solrHome,"tmp");
this.workDir= new Path(this.tmp,java.util.UUID.randomUUID().toString());
this.index = new Path(_solrHome,"index");
this.inputBase = _inputBase;
this.dayDelay = _dayplus;
this.maxRunDays = _maxRunDays;
this.startday = _startday;
this.filetype = _filetype;
this.fs = FileSystem.get(conf);
this.parse=new JobIndexParse(fs);
this.type=type;
this.mdrillpartion=MdrillPartions.INSTANCE(this.type);
String stormhome = System.getProperty("storm.home");
if (stormhome == null) {
stormhome=".";
}
String lockPathBase=stormhome+"/lock";
File file = new File(lockPathBase);
file.mkdirs();
flock=new TryLockFile(lockPathBase+"/"+tablename);
}
private void cleanTmp() throws IOException
{
HadoopUtil.cleanHistoryFile(this.fs, this.tmp);
}
private TreeMap<String,HashSet<String>> getPartions() throws Exception
{
HashSet<String> namelist = this.mdrillpartion.getNameList(fs, this.inputBase, this.startday, dayDelay, maxRunDays);
HashMap<String,HashSet<String>> partions=this.mdrillpartion.indexPartions(namelist, startday, dayDelay, maxRunDays);
TreeMap<String, HashSet<String>> rtn= new TreeMap<String, HashSet<String>>(new Comparator<String>() {
@Override
public int compare(String o1, String o2) {
return o2.compareTo(o1);
}
});
rtn.putAll(partions);
return rtn;
}
// public HashMap<String,String> getVertify(HashMap<String,HashSet<String>> partions) throws Exception
// {
// HashMap<String,String> vertifyset=this.mdrillpartion.indexVertify(partions, shards, startday, dayDelay, maxRunDays);
// return vertifyset;
// }
public String getCurrentVertify(String partion,HashSet<String> partionDays,String submatch,String tablemode) throws Exception
{
HashMap<String,HashSet<String>> partions=new HashMap<String, HashSet<String>>();
partions.put(partion, partionDays);
HashMap<String,String> vertifyset=this.mdrillpartion.indexVertify(partions, shards, startday, dayDelay, maxRunDays);
String partionvertify = vertifyset.get(partion);
if(partionvertify==null||partionvertify.isEmpty())
{
partionvertify= "partionV"+MdrillPartions.PARTION_VERSION+"@001@single@" + this.shards + "@"+ java.util.UUID.randomUUID().toString();
}
HashSet<FileStatus> pathlist=MakeIndex.getInputList(this.fs, this.inputBase,partionDays,submatch);
long dusize=0;
long mintime=Long.MAX_VALUE;
long maxtime=Long.MIN_VALUE;
for(FileStatus p:pathlist)
{
if( p.isDir())
{
dusize+=HadoopUtil.duSize(fs, p.getPath());
}else{
dusize += p.getLen();
}
long lasttimes=p.getModificationTime();
mintime=Math.min(mintime, lasttimes);
maxtime=Math.max(maxtime, lasttimes);
}
if(tablemode.indexOf("@igDataChange@")>=0)
{
return "partionV"+MdrillPartions.PARTION_VERSION+"@001@"+partion + "@" + shards + "@"+ partionDays.size()+"@"+partionDays.hashCode()+"@0@0@0@0";
}
return partionvertify+"@"+dusize+"@"+pathlist.size()+"@"+parseDate(mintime)+"@"+parseDate(maxtime);
}
SimpleDateFormat fmt = new SimpleDateFormat("yyyyMMdd_HHmmss_SSS");
public String parseDate(long t)
{
try{
Date d=new Date(t);
return fmt.format(d);
}catch(Throwable e){
return String.valueOf(t);
}
}
public static class rebuildPartion{
String partion;
HashSet<String> days;
Path tmpindexOtherPath;
Path otherveritify;
String partionvertify;
}
public int run(String[] args) throws Exception {
String split=args[0];
String submatch=args[1];
Integer parallel=Integer.parseInt(args[2]);
String tablemode=args[3];
Integer rep=Integer.parseInt(args[4]);
this.cleanTmp();
while(true)
{
TreeMap<String,HashSet<String>> partions=this.getPartions();
rebuildPartion runPartion=null;
for(Entry<String,HashSet<String>> e:partions.entrySet())
{
String partion=e.getKey();
HashSet<String> days=e.getValue();
String currentvertify = this.getCurrentVertify(partion, days,submatch,tablemode);
Path indexOtherPath = new Path(this.index, partion);
Path tmpindexOtherPath = new Path(this.workDir, partion);
Path otherveritify=new Path(indexOtherPath, "vertify");
String lastVertify=parse.readFirstLineStr(otherveritify);
System.out.println("11111111 vertify:>>>last>>>"+lastVertify+">>current>>"+currentvertify+"<<<<");
if (!currentvertify.equals(lastVertify)) {
if (days.size() > 0&&MakeIndex.checkPath(fs, this.inputBase, days, submatch)) {
runPartion=new rebuildPartion();
runPartion.partion=partion;
runPartion.days=days;
runPartion.tmpindexOtherPath=tmpindexOtherPath;
runPartion.otherveritify=otherveritify;
runPartion.partionvertify=currentvertify;
break;
}
}
}
if(runPartion!=null)
{
System.out.println("22222 vertify:"+runPartion.partion+">>>"+runPartion.partionvertify+"<<<<");
int ret=0;
try{
flock.trylock();
String currentvertify = this.getCurrentVertify(runPartion.partion, runPartion.days,submatch,tablemode);
Path indexOtherPath = new Path(this.index, runPartion.partion);
Path otherveritify=new Path(indexOtherPath, "vertify");
String lastVertify=parse.readFirstLineStr(otherveritify);
System.out.println("333333 vertify:"+runPartion.partion+">>>"+runPartion.partionvertify+">>>"+currentvertify+">>>>"+lastVertify+"<<<<");
if (currentvertify.equals(lastVertify)) {
System.out.println("##########finiesd by other process #########");
continue;
}
ret = this.subRun(runPartion.days, runPartion.tmpindexOtherPath.toString(),split,submatch,parallel,tablemode,rep);
parse.writeStr(new Path(runPartion.tmpindexOtherPath, "vertify"), runPartion.partionvertify);
}finally{
flock.unlock();
}
if (ret != 0) {
return ret;
}
TreeMap<String, HashSet<String>> partionscomplete=this.getPartions();
HashSet<String> days=partionscomplete.get(runPartion.partion);
if(days==null)
{
continue;
}
String currentVertify = this.getCurrentVertify(runPartion.partion, days,submatch,tablemode);
System.out.println("44444 vertify:"+runPartion.partion+">>>"+runPartion.partionvertify+"<<<"+currentVertify+"<<<<");
if (!currentVertify.equals(runPartion.partionvertify)) {
System.out.println("##########changed#########");
continue;
}
try{
Path indexOtherPath = new Path(this.index, runPartion.partion);
if(this.fs.exists(indexOtherPath))
{
this.fs.delete(indexOtherPath,true);
}
this.fs.mkdirs(indexOtherPath.getParent());
this.fs.rename(runPartion.tmpindexOtherPath, indexOtherPath);
}catch(Throwable e)
{
e.printStackTrace();
}
continue;
}
break;
}
this.cleanNotUsedPartion();
this.cleanTmp();
this.fs.delete(this.workDir,true);
return 0;
}
private void cleanNotUsedPartion() throws Exception
{
TreeMap<String, HashSet<String>> partions=this.getPartions();
Set<String> olds=parse.readPartion(this.index);
for(Entry<String,HashSet<String>> e:partions.entrySet())
{
String partion=e.getKey();
olds.remove(partion);
}
for(String old:olds)
{
Path indexOtherPath = new Path(this.index, old);
if(this.fs.exists(indexOtherPath))
{
this.fs.delete(indexOtherPath,true);
}
}
}
private int subRun(HashSet<String> inputs, String output,String split,String submatch,Integer parallel,String tablemode,int rep) throws Exception {
Path smallindex = this.parse.smallIndex(output);
Configuration conf=this.getConf();
conf.set("mdrill.table.mode", tablemode);
conf.setInt("dfs.replication", rep);
conf.set("io.sort.mb", "80");
Pattern mapiPattern = Pattern.compile("@iosortmb:([0-9]+)@");
Matcher mat=mapiPattern.matcher(tablemode);
if (mat.find()) {
conf.set("io.sort.mb", mat.group(1));
}
String hdfsPral = "1";
mapiPattern = Pattern.compile("@sigment:([0-9]+)@");
mat = mapiPattern.matcher(tablemode);
if (mat.find()) {
hdfsPral = mat.group(1);
}
int sigcount=1;
try{
sigcount=Integer.parseInt(hdfsPral);
}catch(Throwable e){}
if(sigcount<=1)
{
return MakeIndex.make(fs, solrHome, conf, this.filetype, this.inputBase, inputs, submatch, output, smallindex, shards, split,true,"",null,parallel);
}
int rtn=MakeIndex.make(fs, solrHome, conf, this.filetype, this.inputBase, inputs, submatch, output, smallindex, shards*sigcount, split,true,"",null,parallel);
if(rtn==0)
{
Path subdir=new Path(output,"sigment");
for(int i=0;i<shards*sigcount;i++)
{
String dir=IndexUtils.getHdfsForder(i);
String sig=IndexUtils.getHdfsForder(i/sigcount);
if(fs.exists(new Path(output,dir)))
{
Path newname=new Path(subdir,sig);
fs.mkdirs(newname);
fs.rename(new Path(output,dir), new Path(newname,dir));
}
}
}
return rtn;
}
}