Package org.apache.howl.mapreduce

Source Code of org.apache.howl.mapreduce.HowlOutputStorageDriver

/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with this
* work for additional information regarding copyright ownership. The ASF
* licenses this file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package org.apache.howl.mapreduce;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.Properties;

import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.common.FileUtils;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.mapreduce.JobContext;
import org.apache.hadoop.mapreduce.OutputFormat;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.howl.data.HowlRecord;
import org.apache.howl.data.schema.HowlSchema;


/** The abstract class to be implemented by underlying storage drivers to enable data access from Howl through
*  HowlOutputFormat.
*/
public abstract class HowlOutputStorageDriver {

  /**
   * Initialize the storage driver with specified properties, default implementation does nothing.
   * @param context the job context object
   * @param howlProperties the properties for the storage driver
   * @throws IOException Signals that an I/O exception has occurred.
   */
    public void initialize(JobContext context, Properties howlProperties) throws IOException {
    }

    /**
     * Returns the OutputFormat to use with this Storage Driver.
     * @return the OutputFormat instance
     * @throws IOException Signals that an I/O exception has occurred.
     */
    public abstract OutputFormat<? super WritableComparable<?>, ? super Writable> getOutputFormat() throws IOException;

    /**
     * Set the data location for the output.
     * @param jobContext the job context object
     * @param location the data location
     * @throws IOException Signals that an I/O exception has occurred.
     */
    public abstract void setOutputPath(JobContext jobContext, String location) throws IOException;

    /**
     * Set the schema for the data being written out.
     * @param jobContext the job context object
     * @param schema the data schema
     * @throws IOException Signals that an I/O exception has occurred.
     */
    public abstract void setSchema(JobContext jobContext, HowlSchema schema) throws IOException;

    /**
     * Sets the partition key values for the partition being written.
     * @param jobContext the job context object
     * @param partitionValues the partition values
     * @throws IOException Signals that an I/O exception has occurred.
     */
    public abstract void setPartitionValues(JobContext jobContext, Map<String, String> partitionValues) throws IOException;

    /**
     * Generate the key for the underlying outputformat. The value given to HowlOutputFormat is passed as the
     * argument. The key given to HowlOutputFormat is ignored..
     * @param value the value given to HowlOutputFormat
     * @return a key instance
     * @throws IOException Signals that an I/O exception has occurred.
     */
    public abstract WritableComparable<?> generateKey(HowlRecord value) throws IOException;

    /**
     * Convert the given HowlRecord value to the actual value type.
     * @param value the HowlRecord value to convert
     * @return a value instance
     * @throws IOException Signals that an I/O exception has occurred.
     */
    public abstract Writable convertValue(HowlRecord value) throws IOException;

    /**
     * Gets the location to use for the specified partition values.
     *  The storage driver can override as required.
     * @param jobContext the job context object
     * @param tableLocation the location of the table
     * @param partitionValues the partition values
     * @return the location String.
     * @throws IOException Signals that an I/O exception has occurred.
     */
    public String getOutputLocation(JobContext jobContext,
            String tableLocation, List<String> partitionCols, Map<String, String> partitionValues) throws IOException {

      if( partitionValues == null || partitionValues.size() == 0 ) {
        return new Path(tableLocation, HowlOutputFormat.TEMP_DIR_NAME).toString();
      }

      List<String> values = new ArrayList<String>();
      for(String partitionCol : partitionCols) {
        values.add(partitionValues.get(partitionCol));
      }

      String partitionLocation = FileUtils.makePartName(partitionCols, values);

      Path path = new Path(tableLocation, partitionLocation);
      return path.toString();
    }

    /** Default implementation assumes FileOutputFormat. Storage drivers wrapping
     * other OutputFormats should override this method.
     */
    public Path getWorkFilePath(TaskAttemptContext context, String outputLoc) throws IOException{
      return new Path(new FileOutputCommitter(new Path(outputLoc), context).getWorkPath(), FileOutputFormat.getUniqueFile(context, "part",""));
    }
}
TOP

Related Classes of org.apache.howl.mapreduce.HowlOutputStorageDriver

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.