Package com.alimama.mdrill.index

Source Code of com.alimama.mdrill.index.JobIndexerPartion$rebuildPartion

package com.alimama.mdrill.index;

import java.io.File;
import java.io.IOException;
import java.text.SimpleDateFormat;
import java.util.Comparator;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Set;
import java.util.Map.Entry;
import java.util.TreeMap;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.util.Tool;

import com.alimama.mdrill.partion.MdrillPartions;
import com.alimama.mdrill.partion.MdrillPartionsInterface;
import com.alimama.mdrill.utils.HadoopUtil;
import com.alimama.mdrill.utils.IndexUtils;
import com.alimama.mdrill.utils.TryLockFile;

public class JobIndexerPartion extends Configured implements Tool {
  private int shards;
  private String solrHome;
  private FileSystem fs;
  private String inputBase;
  private int dayDelay = 10;
  private int maxRunDays = 365;
  private String startday = "19831107";
  private String filetype = "txt";
  private Path tmp;
  private Path workDir;
  private Path index;
  private String type="default";
 
  private JobIndexParse parse=null;
  private MdrillPartionsInterface mdrillpartion;

  TryLockFile flock=null;
  public JobIndexerPartion(String tablename,Configuration conf,int _shards, String _solrHome,  String _inputBase,  int _dayplus,int _maxRunDays, String _startday, String _filetype,String type)
      throws IOException {
    this.shards = _shards;
    this.solrHome = _solrHome;
    this.tmp = new Path(_solrHome,"tmp");
    this.workDir= new Path(this.tmp,java.util.UUID.randomUUID().toString());
    this.index = new Path(_solrHome,"index");
    this.inputBase = _inputBase;
    this.dayDelay = _dayplus;
    this.maxRunDays = _maxRunDays;
    this.startday = _startday;
    this.filetype = _filetype;
    this.fs = FileSystem.get(conf);
    this.parse=new JobIndexParse(fs);
    this.type=type;
    this.mdrillpartion=MdrillPartions.INSTANCE(this.type);
   
   
    String stormhome = System.getProperty("storm.home");
    if (stormhome == null) {
      stormhome=".";
    }
   
    String lockPathBase=stormhome+"/lock";
    File file = new File(lockPathBase);
    file.mkdirs();
    flock=new TryLockFile(lockPathBase+"/"+tablename);

  }

  private void cleanTmp() throws IOException
  {
    HadoopUtil.cleanHistoryFile(this.fs, this.tmp);
  }
 
 
  private TreeMap<String,HashSet<String>> getPartions() throws Exception
  {
    HashSet<String> namelist = this.mdrillpartion.getNameList(fs, this.inputBase,  this.startday, dayDelay, maxRunDays);
    HashMap<String,HashSet<String>> partions=this.mdrillpartion.indexPartions(namelist, startday, dayDelay, maxRunDays);
    TreeMap<String, HashSet<String>> rtn= new TreeMap<String, HashSet<String>>(new Comparator<String>() {

      @Override
      public int compare(String o1, String o2) {
        return o2.compareTo(o1);
      }
     
    });
   
    rtn.putAll(partions);
   
    return rtn;
  }
 
//  public HashMap<String,String> getVertify(HashMap<String,HashSet<String>> partions) throws Exception
//  {
//    HashMap<String,String> vertifyset=this.mdrillpartion.indexVertify(partions, shards, startday, dayDelay, maxRunDays);
//    return vertifyset;
//  }
 

  public String getCurrentVertify(String partion,HashSet<String> partionDays,String submatch,String tablemode) throws Exception
  {
    HashMap<String,HashSet<String>> partions=new HashMap<String, HashSet<String>>();
    partions.put(partion, partionDays);
    HashMap<String,String> vertifyset=this.mdrillpartion.indexVertify(partions, shards, startday, dayDelay, maxRunDays);
    String partionvertify = vertifyset.get(partion);
    if(partionvertify==null||partionvertify.isEmpty())
    {
      partionvertify=  "partionV"+MdrillPartions.PARTION_VERSION+"@001@single@" + this.shards + "@"+ java.util.UUID.randomUUID().toString();
    }
   
    HashSet<FileStatus> pathlist=MakeIndex.getInputList(this.fs, this.inputBase,partionDays,submatch);
    long dusize=0;
    long mintime=Long.MAX_VALUE;
    long maxtime=Long.MIN_VALUE;
    for(FileStatus p:pathlist)
    {
      if( p.isDir())
      {
        dusize+=HadoopUtil.duSize(fs, p.getPath());
      }else{
        dusize +=  p.getLen();
      }
      long lasttimes=p.getModificationTime();
      mintime=Math.min(mintime, lasttimes);
      maxtime=Math.max(maxtime, lasttimes);
    }
   
    if(tablemode.indexOf("@igDataChange@")>=0)
    {
      return "partionV"+MdrillPartions.PARTION_VERSION+"@001@"+partion + "@" + shards + "@"+ partionDays.size()+"@"+partionDays.hashCode()+"@0@0@0@0";
    }
       
    return partionvertify+"@"+dusize+"@"+pathlist.size()+"@"+parseDate(mintime)+"@"+parseDate(maxtime);
  }
 
  SimpleDateFormat fmt = new SimpleDateFormat("yyyyMMdd_HHmmss_SSS");
  public String parseDate(long t)
  {
    try{
    Date d=new Date(t);
    return fmt.format(d);
    }catch(Throwable e){
      return String.valueOf(t);
    }
  }
 
  public static class rebuildPartion{
    String partion;
    HashSet<String> days;
    Path tmpindexOtherPath;
    Path otherveritify;
    String partionvertify;
  }
  public int run(String[] args) throws Exception {
    String split=args[0];
    String submatch=args[1];
    Integer parallel=Integer.parseInt(args[2]);
    String tablemode=args[3];
    Integer rep=Integer.parseInt(args[4]);

    this.cleanTmp();
    while(true)
    {
      TreeMap<String,HashSet<String>> partions=this.getPartions();
      rebuildPartion runPartion=null;
      for(Entry<String,HashSet<String>> e:partions.entrySet())
      {
        String partion=e.getKey();
        HashSet<String> days=e.getValue();
        String currentvertify = this.getCurrentVertify(partion, days,submatch,tablemode);
       
        Path indexOtherPath = new Path(this.index, partion);
        Path tmpindexOtherPath = new Path(this.workDir, partion);
        Path otherveritify=new Path(indexOtherPath, "vertify");
        String lastVertify=parse.readFirstLineStr(otherveritify);
        System.out.println("11111111 vertify:>>>last>>>"+lastVertify+">>current>>"+currentvertify+"<<<<");

        if (!currentvertify.equals(lastVertify)) {
          if (days.size() > 0&&MakeIndex.checkPath(fs, this.inputBase, days, submatch)) {
            runPartion=new rebuildPartion();
            runPartion.partion=partion;
            runPartion.days=days;
            runPartion.tmpindexOtherPath=tmpindexOtherPath;
            runPartion.otherveritify=otherveritify;
            runPartion.partionvertify=currentvertify;
            break;
          }
        }
      }
      if(runPartion!=null)
      {
        System.out.println("22222  vertify:"+runPartion.partion+">>>"+runPartion.partionvertify+"<<<<");
        int ret=0;
        try{
          flock.trylock();
          String currentvertify = this.getCurrentVertify(runPartion.partion, runPartion.days,submatch,tablemode);
          Path indexOtherPath = new Path(this.index, runPartion.partion);
          Path otherveritify=new Path(indexOtherPath, "vertify");
          String lastVertify=parse.readFirstLineStr(otherveritify);
          System.out.println("333333 vertify:"+runPartion.partion+">>>"+runPartion.partionvertify+">>>"+currentvertify+">>>>"+lastVertify+"<<<<");

          if (currentvertify.equals(lastVertify)) {
            System.out.println("##########finiesd by other process #########");
            continue;
          }
         
         
          ret = this.subRun(runPartion.days, runPartion.tmpindexOtherPath.toString(),split,submatch,parallel,tablemode,rep);
          parse.writeStr(new Path(runPartion.tmpindexOtherPath, "vertify"), runPartion.partionvertify);
        }finally{
          flock.unlock();
        }
        if (ret != 0) {
          return ret;
        }
       
        TreeMap<String, HashSet<String>> partionscomplete=this.getPartions();
        HashSet<String> days=partionscomplete.get(runPartion.partion);
        if(days==null)
        {
          continue;
        }
        String currentVertify = this.getCurrentVertify(runPartion.partion, days,submatch,tablemode);
        System.out.println("44444 vertify:"+runPartion.partion+">>>"+runPartion.partionvertify+"<<<"+currentVertify+"<<<<");

        if (!currentVertify.equals(runPartion.partionvertify)) {
          System.out.println("##########changed#########");
          continue;
        }
       
        try{
        Path indexOtherPath = new Path(this.index, runPartion.partion);
        if(this.fs.exists(indexOtherPath))
        {
          this.fs.delete(indexOtherPath,true);
        }
        this.fs.mkdirs(indexOtherPath.getParent());
        this.fs.rename(runPartion.tmpindexOtherPath, indexOtherPath);
        }catch(Throwable e)
        {
          e.printStackTrace();
        }
        continue;
      }
      break;
    }
   
    this.cleanNotUsedPartion();
   
    this.cleanTmp();
    this.fs.delete(this.workDir,true);
    return 0;
  }
 
  private void cleanNotUsedPartion() throws Exception
  {
    TreeMap<String, HashSet<String>> partions=this.getPartions();
    Set<String> olds=parse.readPartion(this.index);
    for(Entry<String,HashSet<String>> e:partions.entrySet())
    {
      String partion=e.getKey();
      olds.remove(partion);
    }
   
    for(String old:olds)
    {
      Path indexOtherPath = new Path(this.index, old);
      if(this.fs.exists(indexOtherPath))
      {
        this.fs.delete(indexOtherPath,true);
      }
    }
  }

 

  private int subRun(HashSet<String> inputs, String output,String split,String submatch,Integer parallel,String tablemode,int rep) throws Exception {
    Path smallindex = this.parse.smallIndex(output);
    Configuration conf=this.getConf();
    conf.set("mdrill.table.mode", tablemode);
    conf.setInt("dfs.replication", rep);
   
     conf.set("io.sort.mb", "80");
     Pattern mapiPattern      = Pattern.compile("@iosortmb:([0-9]+)@");
     Matcher mat=mapiPattern.matcher(tablemode);
         if (mat.find()) {
         conf.set("io.sort.mb", mat.group(1));
         }
   
    String hdfsPral = "1";
    mapiPattern = Pattern.compile("@sigment:([0-9]+)@");
    mat = mapiPattern.matcher(tablemode);
    if (mat.find()) {
      hdfsPral = mat.group(1);
    }
        
        
   
    int sigcount=1;
    try{
      sigcount=Integer.parseInt(hdfsPral);
    }catch(Throwable e){}
   
    if(sigcount<=1)
    {
      return MakeIndex.make(fs, solrHome, conf, this.filetype, this.inputBase, inputs, submatch, output, smallindex, shards, split,true,"",null,parallel);
    }
   
    int rtn=MakeIndex.make(fs, solrHome, conf, this.filetype, this.inputBase, inputs, submatch, output, smallindex, shards*sigcount, split,true,"",null,parallel);
    if(rtn==0)
    {
      Path subdir=new Path(output,"sigment");
      for(int i=0;i<shards*sigcount;i++)
      {
        String dir=IndexUtils.getHdfsForder(i);
        String sig=IndexUtils.getHdfsForder(i/sigcount);
        if(fs.exists(new Path(output,dir)))
        {
          Path newname=new Path(subdir,sig);
          fs.mkdirs(newname);
          fs.rename(new Path(output,dir), new Path(newname,dir));
        }
      }
    }
    return rtn;
   
  }
}
TOP

Related Classes of com.alimama.mdrill.index.JobIndexerPartion$rebuildPartion

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.