Package org.springframework.data.hadoop.pig

Source Code of org.springframework.data.hadoop.pig.PigServerFactoryBean$DefaultPigServerFactory

/*
* Copyright 2011 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
*      http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.springframework.data.hadoop.pig;

import java.security.PrivilegedExceptionAction;
import java.util.Collection;

import org.apache.hadoop.security.UserGroupInformation;
import org.apache.pig.PigServer;
import org.apache.pig.backend.executionengine.ExecException;
import org.apache.pig.impl.PigContext;
import org.springframework.beans.factory.BeanCreationException;
import org.springframework.beans.factory.BeanNameAware;
import org.springframework.beans.factory.FactoryBean;
import org.springframework.beans.factory.ObjectFactory;
import org.springframework.util.CollectionUtils;
import org.springframework.util.StringUtils;

/**
* Factory for creating a {@link PigServer} instance. Note that since PigServer is not thread-safe and the Pig API does not
* provide some type of factory, the factory bean returns an instance of {@link ObjectFactory} (which handles the creation of {@link PigServer} instances)
* instead of the raw {@link PigServer} object which cannot be reused.
*
* Note that the caller needs to handle the object clean-up,  specifically calling {@link PigServer#shutdown()}.
*
* In general, to avoid leaks it is recommended to use the {@link PigTemplate}.
*
* @author Costin Leau
*/
public class PigServerFactoryBean implements FactoryBean<PigServerFactory>, BeanNameAware {

  private PigContext pigContext;
  private Collection<String> pathToSkip;
  private Collection<PigScript> scripts;
  private Integer parallelism;
  private String jobName;
  private String jobPriority;
  private Boolean validateEachStatement;
  private String beanName;

  private String user;

  private class DefaultPigServerFactory implements PigServerFactory {
    @Override
    public PigServer getPigServer() {
      try {
        return createPigInstance();
      } catch (Exception ex) {
        throw new BeanCreationException("Cannot create PigServer instance", ex);
      }
    }
  };


  public PigServerFactory getObject() throws Exception {
    return new DefaultPigServerFactory();
  }

  public Class<?> getObjectType() {
    return PigServerFactory.class;
  }

  public boolean isSingleton() {
    return true;
  }

  protected PigServer createPigInstance() throws Exception {
    final PigContext ctx = (pigContext != null ? pigContext : new PigContext());

    // apparently if not connected, pig can cause all kind of errors
    PigServer pigServer = null;

    try {
      if (StringUtils.hasText(user)) {
        UserGroupInformation ugi = UserGroupInformation.createProxyUser(user,
            UserGroupInformation.getLoginUser());
        pigServer = ugi.doAs(new PrivilegedExceptionAction<PigServer>() {
          @Override
          public PigServer run() throws Exception {
            return new PigServer(ctx, true);
          }
        });
      }
      else {
        pigServer = new PigServer(ctx, true);
      }
    } catch (ExecException ex) {
      throw PigUtils.convert(ex);
    }


    if (!CollectionUtils.isEmpty(pathToSkip)) {
      for (String path : pathToSkip) {
        pigServer.addPathToSkip(path);
      }
    }

    if (parallelism != null) {
      pigServer.setDefaultParallel(parallelism);
    }

    if (StringUtils.hasText(jobName)) {
      pigServer.setJobName(jobName);
    }
    else {
      if (StringUtils.hasText(beanName)) {
        pigServer.setJobName(beanName);
      }
    }

    if (StringUtils.hasText(jobPriority)) {
      pigServer.setJobPriority(jobPriority);
    }

    if (validateEachStatement != null) {
      PigUtils.validateEachStatement(pigServer, validateEachStatement);
    }


    if (!CollectionUtils.isEmpty(scripts)) {
      PigUtils.runWithConversion(pigServer, scripts, false);
    }

    return pigServer;
  }

  public void setBeanName(String name) {
    this.beanName = name;
  }

  /**
   * Sets the {@link PigContext} to use.
   *
   * @param pigContext The pigContext to set.
   */
  public void setPigContext(PigContext pigContext) {
    this.pigContext = pigContext;
  }

  /**
   * Sets the paths to skip.
   *
   * @param pathToSkip The pathToSkip to set.
   */
  public void setPathsToSkip(Collection<String> pathToSkip) {
    this.pathToSkip = pathToSkip;
  }

  /**
   * Sets the scripts to execute at startup.
   *
   * @param scripts The scripts to set.
   */
  public void setScripts(Collection<PigScript> scripts) {
    this.scripts = scripts;
  }

  /**
   * Sets the parallelism.
   *
   * @param parallelism The parallelism to set.
   */
  public void setParallelism(Integer parallelism) {
    this.parallelism = parallelism;
  }

  /**
   * Sets the job name.
   *
   * @param jobName The jobName to set.
   */
  public void setJobName(String jobName) {
    this.jobName = jobName;
  }

  /**
   * Sets the job priority.
   *
   * @param jobPriority The jobPriority to set.
   */
  public void setJobPriority(String jobPriority) {
    this.jobPriority = jobPriority;
  }

  /**
   * Indicates whether each statement should be validated or not. By default it is unset,
   * relying on the Pig defaults.
   *
   * @param validateEachStatement whether to validate each statement or not.
   */
  public void setValidateEachStatement(Boolean validateEachStatement) {
    this.validateEachStatement = validateEachStatement;
  }

  /**
   * Sets the user impersonation (optional) for executing Pig jobs.
   * Should be used when running against a Hadoop Kerberos cluster.
   *
   * @param user user/group information
   */
  public void setUser(String user) {
    this.user = user;
  }
}
TOP

Related Classes of org.springframework.data.hadoop.pig.PigServerFactoryBean$DefaultPigServerFactory

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.