/*
* Copyright 2014 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.springframework.data.hadoop.store.output;
import java.io.IOException;
import java.io.OutputStream;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.compress.CompressionCodec;
import org.apache.hadoop.util.ReflectionUtils;
import org.springframework.data.hadoop.store.StoreException;
import org.springframework.data.hadoop.store.codec.CodecInfo;
import org.springframework.data.hadoop.store.support.OutputStoreObjectSupport;
import org.springframework.data.hadoop.store.support.StreamsHolder;
import org.springframework.util.ClassUtils;
/**
* A {@code AbstractDataStreamWriter} is a base implementation handling streams
* with a raw hdfs files.
*
* @author Janne Valkealahti
*
*/
public abstract class AbstractDataStreamWriter extends OutputStoreObjectSupport {
private final static Log log = LogFactory.getLog(AbstractDataStreamWriter.class);
public final static int DEFAULT_MAX_OPEN_ATTEMPTS = 10;
private int maxOpenAttempts = DEFAULT_MAX_OPEN_ATTEMPTS;
/**
* Instantiates a new abstract data stream writer.
*
* @param configuration the hadoop configuration
* @param basePath the hdfs path
* @param codec the compression codec info
*/
public AbstractDataStreamWriter(Configuration configuration, Path basePath, CodecInfo codec) {
super(configuration, basePath, codec);
}
/**
* Sets the max open attempts trying to find a suitable path for output
* stream. Only positive values are allowed and any attempt to set this to
* less than 1 will automatically reset value to exactly 1.
*
* @param maxOpenAttempts the new max open attempts
*/
public void setMaxOpenAttempts(int maxOpenAttempts) {
this.maxOpenAttempts = maxOpenAttempts < 1 ? 1 : maxOpenAttempts;
}
/**
* Gets the output.
*
* @return the output
* @throws IOException Signals that an I/O exception has occurred.
*/
protected StreamsHolder<OutputStream> getOutput() throws IOException {
StreamsHolder<OutputStream> holder = new StreamsHolder<OutputStream>();
FileSystem fs = FileSystem.get(getConfiguration());
// Using maxOpenAttempts try to resolve path and open
// an output stream and automatically rolling strategies
// to find a next candidate. Effectively if maxOpenAttempts
// is set to roughly same count as expected number of writers
// and strategy init is accurate enough to find a good starting
// position for naming, we should always get a next available
// path and its stream.
Path p = null;
FSDataOutputStream wout = null;
int openAttempt = 0;
do {
try {
p = getResolvedPath();
if (isAppendable() && p.getFileSystem(getConfiguration()).exists(p)) {
wout = fs.append(p);
} else {
wout = fs.create(p, isOverwrite());
}
break;
} catch (Exception e) {
getOutputContext().rollStrategies();
}
} while (++openAttempt < maxOpenAttempts);
if (wout == null) {
throw new StoreException("We've reached maxOpenAttempts=" + maxOpenAttempts
+ " to find suitable output path. Last path tried was path=[" + p + "]");
}
log.info("Creating output for path " + p);
holder.setPath(p);
if (!isCompressed()) {
holder.setStream(wout);
} else {
// TODO: will isCompressed() really guard for npe against getCodec()
Class<?> clazz = ClassUtils.resolveClassName(getCodec().getCodecClass(), getClass().getClassLoader());
CompressionCodec compressionCodec = (CompressionCodec) ReflectionUtils.newInstance(clazz,
getConfiguration());
OutputStream out = compressionCodec.createOutputStream(wout);
holder.setWrappedStream(wout);
holder.setStream(out);
}
return holder;
}
/**
* Gets the current stream writing position.
*
* @param holder the holder for output streams
* @return the position
* @throws IOException Signals that an I/O exception has occurred.
*/
protected long getPosition(StreamsHolder<OutputStream> holder) throws IOException {
if (holder != null) {
OutputStream out = holder.getStream();
OutputStream wout = holder.getWrappedStream();
if (out instanceof FSDataOutputStream) {
return ((FSDataOutputStream) out).getPos();
} else if (wout instanceof FSDataOutputStream) {
return ((FSDataOutputStream) wout).getPos();
} else {
return -1;
}
} else {
return -1;
}
}
}