Package org.apache.hadoop.mapred.gridmix.test.system

Source Code of org.apache.hadoop.mapred.gridmix.test.system.UtilsForGridmix

/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.mapred.gridmix.test.system;

import java.io.IOException;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.permission.FsAction;
import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.util.ToolRunner;
import org.apache.hadoop.mapred.gridmix.Gridmix;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobStatus;
import org.apache.hadoop.mapred.RunningJob;
import org.apache.hadoop.mapreduce.JobID;
import java.util.Date;
import java.util.HashMap;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Arrays;
import java.net.URI;
import java.text.SimpleDateFormat;
import java.io.OutputStream;
import java.util.Set;
import java.util.List;
import java.util.Iterator;
import java.util.Map;
import java.io.File;
import java.io.FileOutputStream;
import org.apache.hadoop.test.system.ProxyUserDefinitions;
import org.apache.hadoop.test.system.ProxyUserDefinitions.GroupsAndHost;

/**
* Gridmix utilities.
*/
public class UtilsForGridmix {
  private static final Log LOG = LogFactory.getLog(UtilsForGridmix.class);
  private static final Path DEFAULT_TRACES_PATH =
    new Path(System.getProperty("user.dir") + "/src/test/system/resources/");

  /**
   * cleanup the folder or file.
   * @param path - folder or file path.
   * @param conf - cluster configuration
   * @throws IOException - If an I/O error occurs.
   */
  public static void cleanup(Path path, Configuration conf)
     throws IOException {
    FileSystem fs = path.getFileSystem(conf);
    fs.delete(path, true);
    fs.close();
  }

  /**
   * Get the login user.
   * @return - login user as string..
   * @throws IOException - if an I/O error occurs.
   */
  public static String getUserName() throws IOException {
    return UserGroupInformation.getLoginUser().getUserName();
  }
 
  /**
   * Get the argument list for gridmix job.
   * @param gridmixDir - gridmix parent directory.
   * @param gridmixRunMode - gridmix modes either 1,2,3.
   * @param values - gridmix runtime values.
   * @param otherArgs - gridmix other generic args.
   * @return - argument list as string array.
   */
  public static String [] getArgsList(Path gridmixDir, int gridmixRunMode,
                                      String [] values, String [] otherArgs) {
    String [] runtimeArgs = {
        "-D", GridMixConfig.GRIDMIX_LOG_MODE + "=DEBUG",
        "-D", GridMixConfig.GRIDMIX_OUTPUT_DIR + "=gridmix",
        "-D", GridMixConfig.GRIDMIX_JOB_SUBMISSION_QUEUE_IN_TRACE + "=true",
        "-D", GridMixConfig.GRIDMIX_JOB_TYPE + "=" + values[0],
        "-D", GridMixConfig.GRIDMIX_USER_RESOLVER + "=" + values[1],
        "-D", GridMixConfig.GRIDMIX_SUBMISSION_POLICY + "=" + values[2]
    };

    String [] classArgs;
    if ((gridmixRunMode == GridMixRunMode.DATA_GENERATION.getValue()
       || gridmixRunMode
       == GridMixRunMode.DATA_GENERATION_AND_RUN_GRIDMIX.getValue())
       && values[1].indexOf("RoundRobinUserResolver") > 0) {
      classArgs = new String[] {
          "-generate", values[3],
          "-users", values[4],
          gridmixDir.toString(),
          values[5]
      };
    } else if (gridmixRunMode == GridMixRunMode.DATA_GENERATION.getValue()
              || gridmixRunMode
              == GridMixRunMode.DATA_GENERATION_AND_RUN_GRIDMIX.getValue()) {
      classArgs = new String[] {
          "-generate", values[3],
          gridmixDir.toString(),
          values[4]
      };
    } else if (gridmixRunMode == GridMixRunMode.RUN_GRIDMIX.getValue()
              && values[1].indexOf("RoundRobinUserResolver") > 0) {
      classArgs = new String[] {
          "-users", values[3],
          gridmixDir.toString(),
          values[4]
      };
    } else {
      classArgs = new String[] {
         gridmixDir.toString(),values[3]
      };
    }

    String [] args = new String [runtimeArgs.length +
       classArgs.length + ((otherArgs != null)?otherArgs.length:0)];
    System.arraycopy(runtimeArgs, 0, args, 0, runtimeArgs.length);

    if (otherArgs != null) {
      System.arraycopy(otherArgs, 0, args, runtimeArgs.length,
                       otherArgs.length);
      System.arraycopy(classArgs, 0, args, (runtimeArgs.length +
                       otherArgs.length), classArgs.length);
    } else {
      System.arraycopy(classArgs, 0, args, runtimeArgs.length,
                       classArgs.length);
    }
    return args;
  }
 
  /**
   * Create a file with specified size in mb.
   * @param sizeInMB - file size in mb.
   * @param inputDir - input directory.
   * @param conf - cluster configuration.
   * @throws Exception - if an exception occurs.
   */
  public static void createFile(int sizeInMB, Path inputDir,
      Configuration conf) throws Exception {
    Date d = new Date();
    SimpleDateFormat sdf = new SimpleDateFormat("ddMMyy_HHmmssS");
    String formatDate = sdf.format(d);
    FileSystem fs = inputDir.getFileSystem(conf);
    OutputStream out = fs.create(new Path(inputDir,"datafile_" + formatDate));
    final byte[] b = new byte[1024 * 1024];
    for (int index = 0; index < sizeInMB; index++) {
      out.write(b);
    }   
    out.close();
    fs.close();
  }
 
  /**
   * Create directories for a path.
   * @param path - directories path.
   * @param conf  - cluster configuration.
   * @throws IOException  - if an I/O error occurs.
   */
  public static void createDirs(Path path,Configuration conf)
     throws IOException {
    FileSystem fs = path.getFileSystem(conf);
    if (!fs.exists(path)) {
       fs.mkdirs(path);
       fs.setPermission(path,new FsPermission(FsAction.ALL,
           FsAction.ALL,FsAction.ALL));
    }
  }
 
  /**
   * Run the Gridmix job with given runtime arguments.
   * @param gridmixDir - Gridmix parent directory.
   * @param conf - cluster configuration.
   * @param gridmixRunMode - gridmix run mode either 1,2,3
   * @param runtimeValues -gridmix runtime values.
   * @return - gridmix status either 0 or 1.
   * @throws Exception
   */
  public static int runGridmixJob(Path gridmixDir, Configuration conf,
     int gridmixRunMode, String [] runtimeValues) throws Exception {
    return runGridmixJob(gridmixDir, conf, gridmixRunMode, runtimeValues, null);
  }
  /**
   * Run the Gridmix job with given runtime arguments.
   * @param gridmixDir - Gridmix parent directory
   * @param conf - cluster configuration.
   * @param gridmixRunMode - gridmix run mode.
   * @param runtimeValues - gridmix runtime values.
   * @param otherArgs - gridmix other generic args.
   * @return - gridmix status either 0 or 1.
   * @throws Exception
   */
 
  public static int runGridmixJob(Path gridmixDir, Configuration conf,
                                  int gridmixRunMode, String [] runtimeValues,
                                  String [] otherArgs) throws Exception {
    Path  outputDir = new Path(gridmixDir, "gridmix");
    Path inputDir = new Path(gridmixDir, "input");
    LOG.info("Cleanup the data if data already exists.");
    String modeName = new String();
    switch (gridmixRunMode) {
      case 1 :
        cleanup(inputDir, conf);
        cleanup(outputDir, conf);
        modeName = GridMixRunMode.DATA_GENERATION.name();
        break;
      case 2 :
        cleanup(outputDir, conf);
        modeName = GridMixRunMode.RUN_GRIDMIX.name();
        break;
      case 3 :
        cleanup(inputDir, conf);
        cleanup(outputDir, conf);
        modeName = GridMixRunMode.DATA_GENERATION_AND_RUN_GRIDMIX.name();
        break;
    }

    final String [] args =
        UtilsForGridmix.getArgsList(gridmixDir, gridmixRunMode,
                                    runtimeValues, otherArgs);
    Gridmix gridmix = new Gridmix();
    LOG.info("Submit a Gridmix job in " + runtimeValues[1]
            + " mode for " + modeName);
    int exitCode = ToolRunner.run(conf, gridmix, args);
    return exitCode;
  }

  /**
   * Get the proxy users file.
   * @param conf - cluster configuration.
   * @return String - proxy users file.
   * @Exception - if no proxy users found in configuration.
   */
  public static String getProxyUsersFile(Configuration conf)
      throws Exception {
     ProxyUserDefinitions pud = getProxyUsersData(conf);
     String fileName = buildProxyUsersFile(pud.getProxyUsers());
     if (fileName == null) {
        LOG.error("Proxy users file not found.");
        throw new Exception("Proxy users file not found.");
     } else {
        return fileName;
     }
  }
 
  /**
  * List the current gridmix jobid's.
  * @param client - job client.
  * @param execJobCount - number of executed jobs.
  * @return - list of gridmix jobid's.
  */
public static List<JobID> listGridmixJobIDs(JobClient client,
     int execJobCount) throws Exception {
   List<JobID> jobids = new ArrayList<JobID>();
   JobStatus [] jobStatus = client.getAllJobs();
   int numJobs = jobStatus.length;
   for (int index = 0; index < 31; index++) {
     Thread.sleep(1000);
     jobStatus = client.getAllJobs();
     numJobs = jobStatus.length;
   }
   for (int index = 1; index <= execJobCount; index++) {
     JobStatus js = jobStatus[numJobs - index];
     JobID jobid = js.getJobID();
     RunningJob runJob = client.getJob(jobid.toString());
     String jobName = runJob.getJobName();
     if (!jobName.equals("GRIDMIX_GENERATE_INPUT_DATA") &&
         !jobName.equals("GRIDMIX_GENERATE_DISTCACHE_DATA")) {
       jobids.add(jobid);
     }
   }
   return (jobids.size() == 0)? null : jobids;
}

/**
  * List the proxy users.
  * @param conf
  * @return
  * @throws Exception
  */
public static List<String> listProxyUsers(Configuration conf,
     String loginUser) throws Exception {
   List<String> proxyUsers = new ArrayList<String>();
   ProxyUserDefinitions pud = getProxyUsersData(conf);
   Map<String, GroupsAndHost> usersData = pud.getProxyUsers();
   Collection users = usersData.keySet();
   Iterator<String> itr = users.iterator();
   while (itr.hasNext()) {
     String user = itr.next();
     if (!user.equals(loginUser)){ proxyUsers.add(user); };
   }
   return proxyUsers;
}

  private static String buildProxyUsersFile(final Map<String, GroupsAndHost>
      proxyUserData) throws Exception {
     FileOutputStream fos = null;
     File file = null;
     StringBuffer input = new StringBuffer();
     Set users = proxyUserData.keySet();
     Iterator itr = users.iterator();
     while (itr.hasNext()) {
       String user = itr.next().toString();
       if (!user.equals(
           UserGroupInformation.getLoginUser().getShortUserName())) {
         input.append(user);
         final GroupsAndHost gah = proxyUserData.get(user);
         final List <String> groups = gah.getGroups();
         for (String group : groups) {
           input.append(",");
           input.append(group);
         }
         input.append("\n");
       }
     }
     if (input.length() > 0) {
        try {
           file = File.createTempFile("proxyusers", null);
           fos = new FileOutputStream(file);
           fos.write(input.toString().getBytes());
        } catch(IOException ioexp) {
           LOG.warn(ioexp.getMessage());
           return null;
        } finally {
           fos.close();
           file.deleteOnExit();
        }
        LOG.info("file.toString():" + file.toString());
        return file.toString();
     } else {
        return null;
     }
  }

  private static ProxyUserDefinitions getProxyUsersData(Configuration conf)
      throws Exception {
    Iterator itr = conf.iterator();
    List<String> proxyUsersData = new ArrayList<String>();
    while (itr.hasNext()) {
      String property = itr.next().toString();
      if (property.indexOf("hadoop.proxyuser") >= 0
         && property.indexOf("groups=") >= 0) {
        proxyUsersData.add(property.split("\\.")[2]);
      }
    }

    if (proxyUsersData.size() == 0) {
       LOG.error("No proxy users found in the configuration.");
       throw new Exception("No proxy users found in the configuration.");
    }

    ProxyUserDefinitions pud = new ProxyUserDefinitions() {
       public boolean writeToFile(URI filePath) throws IOException {
           throw new UnsupportedOperationException("No such methood exists.");
       };
    };

     for (String userName : proxyUsersData) {
        List<String> groups = Arrays.asList(conf.get("hadoop.proxyuser." +
            userName + ".groups").split("//,"));
        List<String> hosts = Arrays.asList(conf.get("hadoop.proxyuser." +
            userName + ".hosts").split("//,"));
        ProxyUserDefinitions.GroupsAndHost definitions =
            pud.new GroupsAndHost();
        definitions.setGroups(groups);
        definitions.setHosts(hosts);
        pud.addProxyUser(userName, definitions);
     }
     return pud;
  }

  /**
   *  Gives the list of paths for MR traces against different time
   *  intervals.It fetches only the paths which followed the below
   *  file convention.
   *    Syntax : &lt;FileName&gt;_&lt;TimeIntervals&gt;.json.gz
   *  There is a restriction in a  file and user has to 
   *  follow  the below convention for time interval.
   *    Syntax: &lt;numeric&gt;[m|h|d]
   *    e.g : for 10 minutes trace should specify 10m,
   *    same way for 1 hour traces should specify 1h,
   *    for 1 day traces should specify 1d.
   *
   * @param conf - cluster configuration.
   * @return - list of MR paths as key/value pair based on time interval.
   * @throws IOException - if an I/O error occurs.
   */
  public static Map<String, String> getMRTraces(Configuration conf)
     throws IOException {
    return getMRTraces(conf, DEFAULT_TRACES_PATH);
  }
 
  /**
   *  It gives the list of paths for MR traces against different time
   *  intervals. It fetches only the paths which followed the below
   *  file convention.
   *    Syntax : &lt;FileNames&gt;_&lt;TimeInterval&gt;.json.gz
   *  There is a restriction in a file and user has to follow the
   *  below convention for time interval.
   *    Syntax: &lt;numeric&gt;[m|h|d]
   *    e.g : for 10 minutes trace should specify 10m,
   *    same way for 1 hour traces should specify 1h,
   *    for 1 day  traces should specify 1d.
   *
   * @param conf - cluster configuration object.
   * @param tracesPath - MR traces path.
   * @return - list of MR paths as key/value pair based on time interval.
   * @throws IOException - If an I/O error occurs.
   */
  public static Map<String,String> getMRTraces(Configuration conf,
      Path tracesPath) throws IOException {
     Map <String, String> jobTraces = new HashMap <String, String>();
     final FileSystem fs = FileSystem.getLocal(conf);
     final FileStatus fstat[] = fs.listStatus(tracesPath);
     for (FileStatus fst : fstat) {
        final String fileName = fst.getPath().getName();
        if (fileName.endsWith("m.json.gz")
            || fileName.endsWith("h.json.gz")
            || fileName.endsWith("d.json.gz")) {
           jobTraces.put(fileName.substring(fileName.indexOf("_") + 1,
              fileName.indexOf(".json.gz")), fst.getPath().toString());
        }
     }
     if (jobTraces.size() == 0) {
        LOG.error("No traces found in " + tracesPath.toString() + " path.");
        throw new IOException("No traces found in "
                             + tracesPath.toString() + " path.");
     }
     return jobTraces;
  }
 
  /**
   * It list the all the MR traces path irrespective of time.
   * @param conf - cluster configuration.
   * @param tracesPath - MR traces path
   * @return - MR paths as a list.
   * @throws IOException - if an I/O error occurs.
   */
  public static List<String> listMRTraces(Configuration conf,
      Path tracesPath) throws IOException {
     List<String> jobTraces = new ArrayList<String>();
     final FileSystem fs = FileSystem.getLocal(conf);
     final FileStatus fstat[] = fs.listStatus(tracesPath);
     for (FileStatus fst : fstat) {
        jobTraces.add(fst.getPath().toString());
     }
     if (jobTraces.size() == 0) {
        LOG.error("No traces found in " + tracesPath.toString() + " path.");
        throw new IOException("No traces found in "
                             + tracesPath.toString() + " path.");
     }
     return jobTraces;
  }
 
  /**
   * It list the all the MR traces path irrespective of time.
   * @param conf - cluster configuration.
   * @param tracesPath - MR traces path
   * @return - MR paths as a list.
   * @throws IOException - if an I/O error occurs.
   */
  public static List<String> listMRTraces(Configuration conf)
      throws IOException {
     return listMRTraces(conf, DEFAULT_TRACES_PATH);
  }

  /**
   * Gives the list of MR traces for given time interval.
   * The time interval should be following convention.
   *   Syntax : &lt;numeric&gt;[m|h|d]
   *   e.g : 10m or 1h or 2d etc.
   * @param conf - cluster configuration
   * @param timeInterval - trace time interval.
   * @param tracesPath - MR traces Path.
   * @return - MR paths as a list for a given time interval.
   * @throws IOException - If an I/O error occurs.
   */
  public static List<String> listMRTracesByTime(Configuration conf,
      String timeInterval, Path tracesPath) throws IOException {
     List<String> jobTraces = new ArrayList<String>();
     final FileSystem fs = FileSystem.getLocal(conf);
     final FileStatus fstat[] = fs.listStatus(tracesPath);
     for (FileStatus fst : fstat) {
        final String fileName = fst.getPath().getName();
        if (fileName.indexOf(timeInterval) >= 0) {
           jobTraces.add(fst.getPath().toString());
        }
     }
     return jobTraces;
  }
 
  /**
   * Gives the list of MR traces for given time interval.
   * The time interval should be following convention.
   *   Syntax : &lt;numeric&gt;[m|h|d]
   *   e.g : 10m or 1h or 2d etc.
   * @param conf - cluster configuration
   * @param timeInterval - trace time interval.
   * @return - MR paths as a list for a given time interval.
   * @throws IOException - If an I/O error occurs.
   */
  public static List<String> listMRTracesByTime(Configuration conf,
      String timeInterval) throws IOException {
     return listMRTracesByTime(conf, timeInterval, DEFAULT_TRACES_PATH);
  }
}
TOP

Related Classes of org.apache.hadoop.mapred.gridmix.test.system.UtilsForGridmix

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.