Package com.cloudera.flume.handlers.hdfs

Source Code of com.cloudera.flume.handlers.hdfs.DFSEventSink

/**
* Licensed to Cloudera, Inc. under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  Cloudera, Inc. licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.cloudera.flume.handlers.hdfs;

import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import java.util.Map.Entry;

import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.SequenceFile.Writer;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.cloudera.flume.conf.Context;
import com.cloudera.flume.conf.FlumeConfiguration;
import com.cloudera.flume.conf.SinkFactory.SinkBuilder;
import com.cloudera.flume.core.Event;
import com.cloudera.flume.core.EventSink;
import com.google.common.base.Preconditions;

/**
* Writes events the a file give a hadoop uri path. If no uri is specified It
* defaults to the set by the given configured by fs.default.name config
* variable
*/
public class DFSEventSink extends EventSink.Base {
  static final Logger LOG = LoggerFactory.getLogger(DFSEventSink.class);
  String path;
  Writer writer = null;
  // We keep a - potentially unbounded - set of writers around to deal with
  // different tags on events. Therefore this feature should be used with some
  // care (where the set of possible paths is small) until we do something
  // more sensible with resource management.
  final Map<String, Writer> sfWriters = new HashMap<String, Writer>();

  // Used to short-circuit around doing regex matches when we know there are
  // no templates to be replaced.
  boolean shouldSub = false;

  public DFSEventSink(String path) {
    this.path = path;
    shouldSub = Event.containsTag(path);
  }

  protected Writer openWriter(String p) throws IOException {
    LOG.info("Opening " + p);
    FlumeConfiguration conf = FlumeConfiguration.get();

    Path dstPath = new Path(p);
    FileSystem hdfs = dstPath.getFileSystem(conf);

    Writer w = SequenceFile.createWriter(hdfs, conf, dstPath,
        WriteableEventKey.class, WriteableEvent.class);

    return w;
  }

  /**
   * Writes the message to an HDFS file whose path is substituted with tags
   * drawn from the supplied event
   */
  @Override
  public void append(Event e) throws IOException, InterruptedException  {
    Writer w = writer;

    if (shouldSub) {
      String realPath = e.escapeString(path);
      w = sfWriters.get(realPath);
      if (w == null) {
        w = openWriter(realPath);
        sfWriters.put(realPath, w);
      }
    }

    Preconditions.checkState(w != null,
        "Attempted to append to a null dfs writer!");
    w.append(new WriteableEventKey(e), new WriteableEvent(e));
    super.append(e);
  }

  @Override
  public void close() throws IOException {
    if (shouldSub) {
      for (Entry<String, Writer> e : sfWriters.entrySet()) {
        LOG.info("Closing " + e.getKey());
        e.getValue().close();
      }
    } else {

      if (writer == null) {
        LOG.warn("DFS Sink double closed? " + path);
        return;
      }
      LOG.info("Closing " + path);
      writer.close();

      writer = null;
    }
  }

  @Override
  public void open() throws IOException {
    if (!shouldSub) {
      writer = openWriter(path);
    }
  }

  public static SinkBuilder builder() {
    return new SinkBuilder() {
      @Override
      public EventSink build(Context context, String... args) {
        if (args.length != 1) {
          // TODO (jon) make this message easier.
          throw new IllegalArgumentException(
              "usage: dfs(\"[(hdfs|file|s3n|...)://namenode[:port]]/path\")");
        }
        return new DFSEventSink(args[0]);
      }
    };
  }
}
TOP

Related Classes of com.cloudera.flume.handlers.hdfs.DFSEventSink

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.