Package com.alimama.mdrill.index

Source Code of com.alimama.mdrill.index.IndexReducerMerge

package com.alimama.mdrill.index;
import java.io.IOException;
import java.util.Iterator;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.lucene.index.TermInfosWriter;

import com.alimama.mdrill.hdfsDirectory.FileSystemDirectory;
import com.alimama.mdrill.index.utils.DocumentConverter;
import com.alimama.mdrill.index.utils.HeartBeater;
import com.alimama.mdrill.index.utils.JobIndexPublic;
import com.alimama.mdrill.index.utils.ShardWriter;
import com.alimama.mdrill.utils.ZipUtils;

public class IndexReducerMerge extends
    Reducer<IntWritable, Text, IntWritable, Text> {
  private HeartBeater heartBeater = null;
  private ShardWriter shardWriter = null;
  private String indexHdfsPath = null;
  private String tmpath = null;
  private String tmpathzip = null;
  public DocumentConverter documentConverter;
 
  boolean isNotFdtMode=true;
  boolean isNotZip=false;
  protected void setup(Context context) throws java.io.IOException,
      InterruptedException {
    super.setup(context);
    Configuration conf = context.getConfiguration();
    isNotFdtMode=conf.get("mdrill.table.mode","").indexOf("@hdfs@")<0;
    isNotZip=conf.get("mdrill.table.mode","").indexOf("@nonzipout@")>=0;

    if(!isNotFdtMode)
    {
      TermInfosWriter.setSkipInterVal(16);
    }
    TermInfosWriter.setNotUseQuick(false);
    String fieldStrs = conf.get("higo.index.fields");
    String[] fieldslist = fieldStrs.split(",");
    this.documentConverter = new DocumentConverter(fieldslist,"solrconfig.xml", "schema.xml");
    heartBeater = new HeartBeater(context);
    heartBeater.needHeartBeat();

    shardWriter = this.initShardWriter(context);
    shardWriter.getDir().setAllowLinks(true);
  }

  protected void cleanup(Context context) throws IOException,
      InterruptedException {
    try {
      shardWriter.addEmptyDoc();
      shardWriter.optimize();
      shardWriter.close();
     
      Configuration conf = context.getConfiguration();
      FileSystem fs = FileSystem.get(conf);
      if(isNotFdtMode&&!isNotZip)
      {
        ZipUtils.zip(fs, tmpath, fs, tmpathzip);
       
        if (!fs.exists(new Path(indexHdfsPath))) {
         
          fs.rename(new Path(tmpathzip), new Path(indexHdfsPath));
        }
      }else{
          if (!fs.exists(new Path(indexHdfsPath))) {
          fs.rename(new Path(tmpath), new Path(indexHdfsPath));
        }
      }

    } catch (Throwable e) {
      throw new IOException(e);
    }

    heartBeater.cancelHeartBeat();
    heartBeater.interrupt();

  }

  private ShardWriter initShardWriter(Context context) throws IOException {
    String part_xxxxx = JobIndexPublic.getOutFileName(context, "part");
    Configuration conf = context.getConfiguration();
    FileSystem fs = FileSystem.get(conf);

    String outputdir = conf.get("mapred.output.dir");
    String uuid=java.util.UUID.randomUUID().toString();
    indexHdfsPath = new Path(outputdir, part_xxxxx).toString();
    tmpath = new Path(outputdir + "/_tmpindex", part_xxxxx + "_"+uuid).toString();
    tmpathzip = new Path(outputdir + "/_tmpindex", part_xxxxx + "_zip_"+ uuid).toString();
   
    ShardWriter shardWriter = new ShardWriter(fs, tmpath, conf);
    return shardWriter;
  }

  protected void reduce(IntWritable key, Iterable<Text> values,
      Context context) throws java.io.IOException, InterruptedException {
    Configuration conf = context.getConfiguration();
    FileSystem fs = FileSystem.get(conf);
    Iterator<Text> iterator = values.iterator();
    while (iterator.hasNext()) {
      Text path = iterator.next();
      shardWriter.process(new FileSystemDirectory(fs, new Path(path
          .toString()), false, conf));
      context.write(key, path);
    }

  }
}
TOP

Related Classes of com.alimama.mdrill.index.IndexReducerMerge

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.