Package com.linkedin.camus.sweeper

Source Code of com.linkedin.camus.sweeper.CamusDailyCleaner

package com.linkedin.camus.sweeper;

import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.net.URI;
import java.util.Map;
import java.util.Properties;

import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.CommandLineParser;
import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.cli.OptionBuilder;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.PosixParser;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.apache.log4j.Logger;
import org.joda.time.DateTime;
import org.joda.time.format.DateTimeFormatter;

import com.linkedin.camus.sweeper.utils.DateUtils;
import com.linkedin.camus.sweeper.utils.Utils;

/**
*
* Responsible for cleaning out the daily files based on the retention value set in the config
*
*/

public class CamusDailyCleaner extends Configured implements Tool
{

  public static final String SIMULATE = "camus.sweeper.clean.simulate";
  public static final String RETENTION_TOPIC_PREFIX = "camus.sweeper.clean.retention.days.topic.";
  public static final String OUTPUT_DAILY_FORMAT_STR = "YYYY/MM/dd";

  private DateUtils dUtils;
  private DateTimeFormatter outputDailyFormat;
  private DateTimeFormatter outputMonthFormat;
  private DateTimeFormatter outputYearFormat;

  private final Properties props;

  private String dailyPath;
  private FileSystem fs;
  private boolean simulate = false;
  private static Logger log = Logger.getLogger(CamusDailyCleaner.class);

  public CamusDailyCleaner()
  {
    this.props = new Properties();
  }

  public CamusDailyCleaner(Properties props)
  {
    this.props = props;
    dUtils = new DateUtils(props);
    outputDailyFormat = dUtils.getDateTimeFormatter(OUTPUT_DAILY_FORMAT_STR);
    outputMonthFormat = dUtils.getDateTimeFormatter("YYYY/MM");
    outputYearFormat = dUtils.getDateTimeFormatter("YYYY");
  }

  public static void main(String args[]) throws Exception
  {
    CamusDailyCleaner job = new CamusDailyCleaner();
    ToolRunner.run(job, args);

  }

  public void run() throws Exception
  {

    log.info("Starting the Camus - Daily Cleaner");
   
    String fromLocation = (String) props.getProperty("camus.sweeper.source.dir");
    String destLocation = (String) props.getProperty("camus.sweeper.dest.dir", "");
   
    if (destLocation.isEmpty())
      destLocation = fromLocation;
   
    dailyPath = destLocation;
   
    log.debug("Daily Path : " + dailyPath);
    simulate = Boolean.parseBoolean(props.getProperty(SIMULATE, "false"));

    Configuration conf = new Configuration();
    fs = FileSystem.get(conf);

    // Topic-specific retention
    Map<String, String> map = Utils.getMapByPrefix(props, RETENTION_TOPIC_PREFIX);

    int regularRetention = Integer.parseInt((String) props.getProperty("camus.sweeper.clean.retention.days.global", "-1"));

    if (regularRetention != -1)
      System.out.println("Global retention set to " + regularRetention);
    else
      System.out.println("Global retention set to infinity, will not delete unspecified topics");

    FileStatus[] statuses = fs.listStatus(new Path(dailyPath));
    for (FileStatus status : statuses)
    {
      String name = status.getPath().getName();
      if (name.startsWith(".") || name.startsWith("_"))
      {
        continue;
      }

      if (map.containsKey(name))
      {
        enforceRetention(name, Integer.parseInt(map.get(name)));
      }
      else if (regularRetention != -1)
      {
        enforceRetention(name, regularRetention);
      }
    }
  }

  private void enforceRetention(String topic, int numDays) throws Exception
  {
    System.out.println("Running retention for " + topic + " and for days " + numDays);
    DateTime time = new DateTime(dUtils.zone);
    DateTime daysAgo = time.minusDays(numDays);

    Path sourceDailyGlob = new Path(dailyPath, topic + "/daily/*/*/*");

    for (FileStatus f : fs.globStatus(sourceDailyGlob))
    {
      DateTime dirDateTime =
          outputDailyFormat.parseDateTime(f.getPath()
                                           .toString()
                                           .substring(f.getPath().toString().length()
                                               - OUTPUT_DAILY_FORMAT_STR.length()));
      if (dirDateTime.isBefore(daysAgo))
        deleteDay(topic, f.getPath());
    }
  }

  private void deleteDay(String topic, Path dayPath) throws Exception
  {
    Path monthPath = dayPath.getParent();
    Path yearPath = monthPath.getParent();

    if (fs.exists(dayPath))
    {
      System.out.println(" Deleting day " + yearPath.getName() + "/" + monthPath.getName() + "/" + dayPath.getName());
      deleteFileDir(fs, dayPath);

      if (fs.listStatus(monthPath).length == 0)
      {
        System.out.println(" Deleting month " + yearPath.getName() + "/" + monthPath.getName());
        deleteFileDir(fs, monthPath);

        if (fs.listStatus(yearPath).length == 0)
        {
          System.out.println(" Deleting year " + yearPath.getName());
          deleteFileDir(fs, yearPath);
        }
      }
    }
  }

  private void deleteFileDir(FileSystem fs, Path deletePath) throws IOException
  {
    if (!simulate)
    {
      System.out.println("Deleting " + deletePath);
      if (fs.delete(deletePath, true))
      {
        return;
      }
      else
      {
        throw new IOException("Path " + deletePath + " couldn't be deleted.");
      }
    }
    else
    {
      System.out.println("Simulating delete " + deletePath);
    }
  }

  public int run(String[] args) throws Exception
  {
    Options options = new Options();

    options.addOption("p", true, "properties filename from the classpath");
    options.addOption("P", true, "external properties filename");

    options.addOption(OptionBuilder.withArgName("property=value")
                                   .hasArgs(2)
                                   .withValueSeparator()
                                   .withDescription("use value for given property")
                                   .create("D"));

    CommandLineParser parser = new PosixParser();
    CommandLine cmd = parser.parse(options, args);

    if (!(cmd.hasOption('p') || cmd.hasOption('P')))
    {
      HelpFormatter formatter = new HelpFormatter();
      formatter.printHelp("CamusJob.java", options);
      return 1;
    }

    if (cmd.hasOption('p'))
      props.load(ClassLoader.getSystemClassLoader().getResourceAsStream(cmd.getOptionValue('p')));

    if (cmd.hasOption('P'))
    {
      File file = new File(cmd.getOptionValue('P'));
      FileInputStream fStream = new FileInputStream(file);
      props.load(fStream);
    }

    props.putAll(cmd.getOptionProperties("D"));

    dUtils = new DateUtils(props);
    outputDailyFormat = dUtils.getDateTimeFormatter(OUTPUT_DAILY_FORMAT_STR);
    outputMonthFormat = dUtils.getDateTimeFormatter("YYYY/MM");
    outputYearFormat = dUtils.getDateTimeFormatter("YYYY");

    run();
    return 0;
  }
}
TOP

Related Classes of com.linkedin.camus.sweeper.CamusDailyCleaner

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.