Package co.nubetech.hiho.mapreduce.lib.output

Source Code of co.nubetech.hiho.mapreduce.lib.output.AppendSequenceFileOutputFormat

/**
* Copyright 2010 Nube Technologies
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed
* under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
* CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and limitations under the License.
*/
package co.nubetech.hiho.mapreduce.lib.output;

import java.io.IOException;
import java.text.NumberFormat;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapred.FileAlreadyExistsException;
import org.apache.hadoop.mapred.InvalidJobConfException;
import org.apache.hadoop.mapreduce.JobContext;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.TaskID;
import org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter;
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;

import co.nubetech.hiho.common.HIHOConf;

public class AppendSequenceFileOutputFormat<K, V> extends
    SequenceFileOutputFormat<K, V> {
  private String isAppend;
  private static long fileCount;

  private static final NumberFormat NUMBER_FORMAT = NumberFormat
      .getInstance();

  static {
    NUMBER_FORMAT.setMinimumIntegerDigits(5);
    NUMBER_FORMAT.setGroupingUsed(false);
  }

  @Override
  public void checkOutputSpecs(JobContext job)
      throws FileAlreadyExistsException, IOException {
    try {
      super.checkOutputSpecs(job);
    } catch (FileAlreadyExistsException e) {

      Configuration conf = job.getConfiguration();

      isAppend = conf.get(HIHOConf.IS_APPEND, "false");
      if (isAppend.equalsIgnoreCase("false")) {
        throw new FileAlreadyExistsException();
      } else {
        Path outDir = getOutputPath(job);
        if (outDir == null) {
          throw new InvalidJobConfException(
              "OUTPUT directory not set.");
        }
      }
    }
  }

  @Override
  public Path getDefaultWorkFile(TaskAttemptContext context, String extension)
      throws IOException {
    Path p1;
    isAppend = context.getConfiguration().get(HIHOConf.IS_APPEND, "false");
    if (isAppend.equalsIgnoreCase("false")) {
      p1 = super.getDefaultWorkFile(context, extension);
    } else {
      FileOutputCommitter committer = (FileOutputCommitter) getOutputCommitter(context);
      Path p = committer.getWorkPath();
      fileCount = p.getFileSystem(context.getConfiguration())
          .getContentSummary(getOutputPath(context)).getFileCount();
      if (fileCount > 1)
        fileCount = fileCount - 1;
      p1 = new Path(committer.getWorkPath(), getUniqueFile(context,
          "part", extension));
    }
    return p1;
  }

  public synchronized static String getUniqueFile(TaskAttemptContext context,
      String name, String extension) {

    TaskID taskId = context.getTaskAttemptID().getTaskID();
    int partition = taskId.getId();
    partition = partition + (int) fileCount;
    StringBuilder result = new StringBuilder();
    result.append(name);
    result.append('-');
    // result.append(taskId.isMap() ? 'm' : 'r');
    result.append('-');
    result.append(NUMBER_FORMAT.format(partition));
    result.append(extension);
    return result.toString();
  }
}
TOP

Related Classes of co.nubetech.hiho.mapreduce.lib.output.AppendSequenceFileOutputFormat

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.