Package com.alimama.mdrill.index.utils

Source Code of com.alimama.mdrill.index.utils.ShardWriter

package com.alimama.mdrill.index.utils;

import java.io.IOException;
import java.util.Date;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.Trash;
import org.apache.log4j.Logger;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriter.MaxFieldLength;
import org.apache.lucene.index.KeepOnlyLastCommitDeletionPolicy;
import org.apache.lucene.index.SerialMergeScheduler;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermEnum;
import org.apache.lucene.store.Directory;

import com.alimama.mdrill.hdfsDirectory.FileSystemDirectory;


public class ShardWriter {
    private static Logger logger = Logger.getLogger(ShardWriter.class);
    private Path perm;
    private FileSystemDirectory dir;
    public FileSystemDirectory getDir() {
        return dir;
    }

    private IndexWriter writer;
    private long numForms = 0;
    private long numDocs=0;

    public long getNumDocs() {
        return numDocs;
    }

    public ShardWriter(FileSystem fs, String indexOutputPathStr,Configuration iconf) throws IOException {
  logger.info("Construct a shard writer " + indexOutputPathStr);
  perm = new Path(indexOutputPathStr);
  if (!fs.exists(perm)) {
      fs.mkdirs(perm);
  } else {
      moveToTrash(iconf, perm);
      fs.mkdirs(perm);
  }

  this.dir = new FileSystemDirectory(fs, perm, true, iconf);
  writer = new IndexWriter(dir, null,
          new KeepOnlyLastCommitDeletionPolicy(),
          MaxFieldLength.UNLIMITED);
//  writer.setMergeScheduler(new SerialMergeScheduler());//new ConcurrentMergeScheduler()
  writer.setMergeFactor(256);
  writer.setTermIndexInterval(128);
  writer.setUseCompoundFile(false);

  logger.info("finish Construct a shard writer " + indexOutputPathStr);

    }
   
    public void addEmptyDoc() throws CorruptIndexException, IOException
    {
      Document empty=new Document();
      empty.add(new Field("higoempty_emptydoc_s", "1", Field.Store.NO, Field.Index.NOT_ANALYZED_NO_NORMS));
      writer.addDocument(empty);
    }

    public void process(RamWriter form) throws IOException {
  int docs=form.getNumDocs();
  if(docs>0)
  {
    numDocs+=docs;
    numForms++;
    this.process(form.getDirectory());
  }
    }
    public void process(Directory dir) throws CorruptIndexException, IOException
    {
  Date d1=new Date();
  writer.addIndexesNoOptimize(new Directory[] { dir });
  Date d2=new Date();
  logger.info("process time "+(d2.getTime()-d1.getTime()));
    }

    public void close() throws IOException {
  logger.info("Closing the shard writer, processed " + numForms + " forms "+numDocs +" docs ");
  try {

   
    writer.getReader();
      writer.close();
     
     

  } finally {
      this.writer = null;
//      IndexReader reader=IndexReader.open(this.dir);
//      TermEnum te=reader.terms(new Term("thedate"));
//      StringBuffer buff=new StringBuffer();
//      int index=0;
//      for (;;) {
//        index++;
//      Term t = te.term();
//      if (t == null||!t.field().equals("thedate")||index>100) {
//        break;
//      }
//      buff.append(t.toString());
//      buff.append(",");
//      te.next();
//    }
//      reader.close();
//     
//      System.out.println("thedate:"+buff.toString());
      this.dir.close();
      this.dir = null;
      logger.info("Closed Lucene index writer");
  }
  logger.info("Moved new index files to " + perm);
    }

    public String toString() {
  return this.getClass().getName() + "@" + perm + "&";
    }

  public void optimize() throws IOException {
    Throwable error=null;
    for(int i=0;i<10;i++)
    {
      error=null;
      try {
        writer.optimize();
        return ;
      } catch (CorruptIndexException e) {
        logger.error("optimize Corrupt Index error. ", e);
        error=e;
      } catch (IOException e) {
        logger.error("optimize IOException . ", e);
        error=e;
      }
      try {
        Thread.sleep(5000);
      } catch (InterruptedException e) {
        logger.error(e);
      }
     
    }
    if(error!=null)
    {
      throw new IOException(error);
    }
  }

    public static void moveToTrash(Configuration conf, Path path)
      throws IOException {
  Trash t = new Trash(conf);
  boolean isMoved = t.moveToTrash(path);
  t.expunge();
  if (!isMoved) {
      logger.error("Trash is not enabled or file is already in the trash.");
  }
    }
}
TOP

Related Classes of com.alimama.mdrill.index.utils.ShardWriter

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.