Package org.apache.storm.hdfs.trident

Source Code of org.apache.storm.hdfs.trident.HdfsState$SequenceFileOptions

package org.apache.storm.hdfs.trident;

import backtype.storm.task.IMetricsContext;
import backtype.storm.topology.FailedException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.client.HdfsDataOutputStream;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.compress.CompressionCodecFactory;
import org.apache.storm.hdfs.common.rotation.RotationAction;
import org.apache.storm.hdfs.common.security.HdfsSecurityUtil;
import org.apache.storm.hdfs.trident.format.FileNameFormat;
import org.apache.storm.hdfs.trident.format.RecordFormat;
import org.apache.storm.hdfs.trident.format.SequenceFormat;
import org.apache.storm.hdfs.trident.rotation.FileRotationPolicy;

import org.apache.storm.hdfs.trident.rotation.TimedRotationPolicy;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import storm.trident.operation.TridentCollector;
import storm.trident.state.State;
import storm.trident.tuple.TridentTuple;

import java.io.IOException;
import java.io.Serializable;
import java.net.URI;
import java.util.*;

public class HdfsState implements State {

    public static abstract class Options implements Serializable {

        protected String fsUrl;
        protected String configKey;
        protected transient FileSystem fs;
        private Path currentFile;
        protected FileRotationPolicy rotationPolicy;
        protected FileNameFormat fileNameFormat;
        protected int rotation = 0;
        protected transient Configuration hdfsConfig;
        protected ArrayList<RotationAction> rotationActions = new ArrayList<RotationAction>();
        protected transient Object writeLock;
        protected transient Timer rotationTimer;

        abstract void closeOutputFile() throws IOException;

        abstract Path createOutputFile() throws IOException;

        abstract void execute(List<TridentTuple> tuples) throws IOException;

        abstract void doPrepare(Map conf, int partitionIndex, int numPartitions) throws IOException;

        protected void rotateOutputFile() throws IOException {
            LOG.info("Rotating output file...");
            long start = System.currentTimeMillis();
            synchronized (this.writeLock) {
                closeOutputFile();
                this.rotation++;

                Path newFile = createOutputFile();
                LOG.info("Performing {} file rotation actions.", this.rotationActions.size());
                for (RotationAction action : this.rotationActions) {
                    action.execute(this.fs, this.currentFile);
                }
                this.currentFile = newFile;
            }
            long time = System.currentTimeMillis() - start;
            LOG.info("File rotation took {} ms.", time);


        }

        void prepare(Map conf, int partitionIndex, int numPartitions){
            this.writeLock = new Object();
            if (this.rotationPolicy == null) throw new IllegalStateException("RotationPolicy must be specified.");
            if (this.fsUrl == null) {
                throw new IllegalStateException("File system URL must be specified.");
            }
            this.fileNameFormat.prepare(conf, partitionIndex, numPartitions);
            this.hdfsConfig = new Configuration();
            Map<String, Object> map = (Map<String, Object>)conf.get(this.configKey);
            if(map != null){
                for(String key : map.keySet()){
                    this.hdfsConfig.set(key, String.valueOf(map.get(key)));
                }
            }
            try{
                HdfsSecurityUtil.login(conf, hdfsConfig);
                doPrepare(conf, partitionIndex, numPartitions);
                this.currentFile = createOutputFile();

            } catch (Exception e){
                throw new RuntimeException("Error preparing HdfsState: " + e.getMessage(), e);
            }

            if(this.rotationPolicy instanceof TimedRotationPolicy){
                long interval = ((TimedRotationPolicy)this.rotationPolicy).getInterval();
                this.rotationTimer = new Timer(true);
                TimerTask task = new TimerTask() {
                    @Override
                    public void run() {
                        try {
                            rotateOutputFile();
                        } catch(IOException e){
                            LOG.warn("IOException during scheduled file rotation.", e);
                        }
                    }
                };
                this.rotationTimer.scheduleAtFixedRate(task, interval, interval);
            }
        }

    }

    public static class HdfsFileOptions extends Options {

        private transient FSDataOutputStream out;
        protected RecordFormat format;
        private long offset = 0;

        public HdfsFileOptions withFsUrl(String fsUrl){
            this.fsUrl = fsUrl;
            return this;
        }

        public HdfsFileOptions withConfigKey(String configKey){
            this.configKey = configKey;
            return this;
        }

        public HdfsFileOptions withFileNameFormat(FileNameFormat fileNameFormat){
            this.fileNameFormat = fileNameFormat;
            return this;
        }

        public HdfsFileOptions withRecordFormat(RecordFormat format){
            this.format = format;
            return this;
        }

        public HdfsFileOptions withRotationPolicy(FileRotationPolicy rotationPolicy){
            this.rotationPolicy = rotationPolicy;
            return this;
        }

        public HdfsFileOptions addRotationAction(RotationAction action){
            this.rotationActions.add(action);
            return this;
        }

        @Override
        void doPrepare(Map conf, int partitionIndex, int numPartitions) throws IOException {
            LOG.info("Preparing HDFS Bolt...");
            this.fs = FileSystem.get(URI.create(this.fsUrl), hdfsConfig);
        }

        @Override
        void closeOutputFile() throws IOException {
            this.out.close();
        }

        @Override
        Path createOutputFile() throws IOException {
            Path path = new Path(this.fileNameFormat.getPath(), this.fileNameFormat.getName(this.rotation, System.currentTimeMillis()));
            this.out = this.fs.create(path);
            return path;
        }

        @Override
        public void execute(List<TridentTuple> tuples) throws IOException {
            boolean rotated = false;
            synchronized (this.writeLock) {
                for (TridentTuple tuple : tuples) {
                    byte[] bytes = this.format.format(tuple);
                    out.write(bytes);
                    this.offset += bytes.length;

                    if (this.rotationPolicy.mark(tuple, this.offset)) {
                        rotateOutputFile();
                        this.offset = 0;
                        this.rotationPolicy.reset();
                        rotated = true;
                    }
                }
                if (!rotated) {
                    if (this.out instanceof HdfsDataOutputStream) {
                        ((HdfsDataOutputStream) this.out).hsync(EnumSet.of(HdfsDataOutputStream.SyncFlag.UPDATE_LENGTH));
                    } else {
                        this.out.hsync();
                    }
                }
            }
        }
    }

    public static class SequenceFileOptions extends Options {
        private SequenceFormat format;
        private SequenceFile.CompressionType compressionType = SequenceFile.CompressionType.RECORD;
        private transient SequenceFile.Writer writer;
        private String compressionCodec = "default";
        private transient CompressionCodecFactory codecFactory;

        public SequenceFileOptions withCompressionCodec(String codec){
            this.compressionCodec = codec;
            return this;
        }

        public SequenceFileOptions withFsUrl(String fsUrl) {
            this.fsUrl = fsUrl;
            return this;
        }

        public SequenceFileOptions withConfigKey(String configKey){
            this.configKey = configKey;
            return this;
        }

        public SequenceFileOptions withFileNameFormat(FileNameFormat fileNameFormat) {
            this.fileNameFormat = fileNameFormat;
            return this;
        }

        public SequenceFileOptions withSequenceFormat(SequenceFormat format) {
            this.format = format;
            return this;
        }

        public SequenceFileOptions withRotationPolicy(FileRotationPolicy rotationPolicy) {
            this.rotationPolicy = rotationPolicy;
            return this;
        }

        public SequenceFileOptions withCompressionType(SequenceFile.CompressionType compressionType){
            this.compressionType = compressionType;
            return this;
        }

        public SequenceFileOptions addRotationAction(RotationAction action){
            this.rotationActions.add(action);
            return this;
        }

        @Override
        void doPrepare(Map conf, int partitionIndex, int numPartitions) throws IOException {
            LOG.info("Preparing Sequence File State...");
            if (this.format == null) throw new IllegalStateException("SequenceFormat must be specified.");

            this.fs = FileSystem.get(URI.create(this.fsUrl), hdfsConfig);
            this.codecFactory = new CompressionCodecFactory(hdfsConfig);
        }

        @Override
        Path createOutputFile() throws IOException {
            Path p = new Path(this.fsUrl + this.fileNameFormat.getPath(), this.fileNameFormat.getName(this.rotation, System.currentTimeMillis()));
            this.writer = SequenceFile.createWriter(
                    this.hdfsConfig,
                    SequenceFile.Writer.file(p),
                    SequenceFile.Writer.keyClass(this.format.keyClass()),
                    SequenceFile.Writer.valueClass(this.format.valueClass()),
                    SequenceFile.Writer.compression(this.compressionType, this.codecFactory.getCodecByName(this.compressionCodec))
            );
            return p;
        }

        @Override
        void closeOutputFile() throws IOException {
            this.writer.close();
        }

        @Override
        public void execute(List<TridentTuple> tuples) throws IOException {
            long offset;
            for(TridentTuple tuple : tuples) {
                synchronized (this.writeLock) {
                    this.writer.append(this.format.key(tuple), this.format.value(tuple));
                    offset = this.writer.getLength();
                }

                if (this.rotationPolicy.mark(tuple, offset)) {
                    rotateOutputFile();
                    this.rotationPolicy.reset();
                }
            }
        }

    }

    public static final Logger LOG = LoggerFactory.getLogger(HdfsState.class);
    private Options options;

    HdfsState(Options options){
        this.options = options;
    }

    void prepare(Map conf, IMetricsContext metrics, int partitionIndex, int numPartitions){
        this.options.prepare(conf, partitionIndex, numPartitions);
    }

    @Override
    public void beginCommit(Long txId) {
    }

    @Override
    public void commit(Long txId) {
    }

    public void updateState(List<TridentTuple> tuples, TridentCollector tridentCollector){
        try{
            this.options.execute(tuples);
        } catch (IOException e){
            LOG.warn("Failing batch due to IOException.", e);
            throw new FailedException(e);
        }
    }
}
TOP

Related Classes of org.apache.storm.hdfs.trident.HdfsState$SequenceFileOptions

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.