/**
* Licensed to Cloudera, Inc. under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. Cloudera, Inc. licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.cloudera.flume.collector;
import java.io.IOException;
import java.util.Collection;
import java.util.Map;
import org.apache.hadoop.fs.Path;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.cloudera.flume.agent.FlumeNode;
import com.cloudera.flume.conf.Context;
import com.cloudera.flume.conf.FlumeConfiguration;
import com.cloudera.flume.conf.FlumeSpecException;
import com.cloudera.flume.conf.SinkFactory.SinkBuilder;
import com.cloudera.flume.core.Event;
import com.cloudera.flume.core.EventSink;
import com.cloudera.flume.core.EventSinkDecorator;
import com.cloudera.flume.core.MaskDecorator;
import com.cloudera.flume.handlers.debug.InsistentAppendDecorator;
import com.cloudera.flume.handlers.debug.InsistentOpenDecorator;
import com.cloudera.flume.handlers.debug.StubbornAppendSink;
import com.cloudera.flume.handlers.endtoend.AckChecksumChecker;
import com.cloudera.flume.handlers.endtoend.AckListener;
import com.cloudera.flume.handlers.hdfs.EscapedCustomDfsSink;
import com.cloudera.flume.handlers.rolling.ProcessTagger;
import com.cloudera.flume.handlers.rolling.RollSink;
import com.cloudera.flume.handlers.rolling.Tagger;
import com.cloudera.flume.handlers.rolling.TimeTrigger;
import com.cloudera.flume.reporter.ReportEvent;
import com.cloudera.util.BackoffPolicy;
import com.cloudera.util.CumulativeCappedExponentialBackoff;
import com.google.common.base.Preconditions;
import com.google.common.collect.HashMultimap;
import com.google.common.collect.Multimap;
/**
* This collector sink is the high level specification a user would use.
*
* Currently is is just a DFSEventSink that uses some default values from the
* flume configuration file and is overridable by the user.
*
* TODO (jon) this default output needs to be replaced with a combo of henry's
* new tagged output writer and the custom format dfs writer..
*/
public class CollectorSink extends EventSink.Base {
static final Logger LOG = LoggerFactory.getLogger(CollectorSink.class);
final EventSink snk;
AckAccumulator accum = new AckAccumulator();
// RollTag, AckTag
Multimap<String, String> rollAckMap = HashMultimap.<String, String> create();
CollectorSink(String path, String filename, long millis)
throws FlumeSpecException {
this(path, filename, millis, new ProcessTagger(), 250);
}
CollectorSink(String path, String filename, String hiveTableName, long millis)
throws FlumeSpecException {
this(path, filename, hiveTableName, millis, new ProcessTagger(), 250);
}
CollectorSink(String path, String filename, String hiveTableName, long millis, String elasticSearchUrl, String elasticIndex, String elasticType)
throws FlumeSpecException {
this(path, filename, hiveTableName, millis, new ProcessTagger(), 250, elasticSearchUrl, elasticIndex, elasticType);
}
CollectorSink(String path, String filename, String hiveTableName, long millis, String elasticSearchUrl, String elasticIndex, String elasticType, boolean runMarkerQueries)
throws FlumeSpecException {
this(path, filename, hiveTableName, millis, new ProcessTagger(), 250, elasticSearchUrl, elasticIndex, elasticType, runMarkerQueries);
}
CollectorSink(final String logdir, final String filename, final long millis,
final Tagger tagger, long checkmillis) {
EventSink s = new RollSink(new Context(), "collectorSink", new TimeTrigger(
tagger, millis), checkmillis) {
@Override
public EventSink newSink(Context ctx) throws IOException {
String tag = tagger.newTag();
String path = logdir + Path.SEPARATOR_CHAR;
EventSink dfs = new EscapedCustomDfsSink(path, filename + tag);
return new RollDetectDeco(dfs, tag);
}
};
long initMs = FlumeConfiguration.get().getInsistentOpenInitBackoff();
long cumulativeMaxMs = FlumeConfiguration.get()
.getFailoverMaxCumulativeBackoff();
long maxMs = FlumeConfiguration.get().getFailoverMaxSingleBackoff();
BackoffPolicy backoff1 = new CumulativeCappedExponentialBackoff(initMs,
maxMs, cumulativeMaxMs);
BackoffPolicy backoff2 = new CumulativeCappedExponentialBackoff(initMs,
maxMs, cumulativeMaxMs);
// the collector snk has ack checking logic, retry and reopen logic, and
// needs an extra mask before rolling, writing to disk and forwarding acks
// (roll detect).
// { ackChecksumChecker => insistentAppend => stubbornAppend =>
// insistentOpen => mask("rolltag") => roll(xx) { rollDetect =>
// escapedCusomtDfs } }
EventSink tmp = new MaskDecorator(s, "rolltag");
tmp = new InsistentOpenDecorator<EventSink>(tmp, backoff1);
tmp = new StubbornAppendSink<EventSink>(tmp);
tmp = new InsistentAppendDecorator<EventSink>(tmp, backoff2);
snk = new AckChecksumChecker<EventSink>(tmp, accum);
}
CollectorSink(final String logdir, final String filename, final String hiveTableName, final long millis,
final Tagger tagger, long checkmillis) {
EventSink s = new RollSink(new Context(), null, new TimeTrigger(tagger,
millis), checkmillis) {
@Override
public EventSink newSink(Context ctx) throws IOException {
String tag = tagger.newTag();
String path = logdir + Path.SEPARATOR_CHAR;
EventSink dfs = new EscapedCustomDfsSink(path, filename + tag, hiveTableName);
return new RollDetectDeco(dfs, tag);
}
};
snk = new AckChecksumChecker<EventSink>(s, accum);
}
CollectorSink(final String logdir, final String filename, final String hiveTableName, final long millis,
final Tagger tagger, long checkmillis, final String elasticSearchUrl, final String elasticIndex, final String elasticType) {
EventSink s = new RollSink(new Context(), null, new TimeTrigger(tagger,
millis), checkmillis) {
@Override
public EventSink newSink(Context ctx) throws IOException {
String tag = tagger.newTag();
String path = logdir + Path.SEPARATOR_CHAR;
EventSink dfs = new EscapedCustomDfsSink(path, filename + tag, hiveTableName, elasticSearchUrl, elasticIndex, elasticType);
return new RollDetectDeco(dfs, tag);
}
};
snk = new AckChecksumChecker<EventSink>(s, accum);
}
CollectorSink(final String logdir, final String filename, final String hiveTableName, final long millis,
final Tagger tagger, long checkmillis, final String elasticSearchUrl, final String elasticIndex, final String elasticType, final boolean runMarkerQueries) {
EventSink s = new RollSink(new Context(), null, new TimeTrigger(tagger,
millis), checkmillis) {
@Override
public EventSink newSink(Context ctx) throws IOException {
String tag = tagger.newTag();
String path = logdir + Path.SEPARATOR_CHAR;
EventSink dfs = new EscapedCustomDfsSink(path, filename + tag, hiveTableName, elasticSearchUrl, elasticIndex, elasticType, runMarkerQueries);
return new RollDetectDeco(dfs, tag);
}
};
snk = new AckChecksumChecker<EventSink>(s, accum);
}
String curRollTag;
/**
* This is a helper class that wraps the body of the collector sink, so that
* and gives notifications when a roll hash happened. Because only close has
* sane flushing semantics in hdfs <= v0.20.x we need to collect acks, data is
* safe only after a close on the hdfs file happens.
*/
class RollDetectDeco extends EventSinkDecorator<EventSink> {
String tag;
public RollDetectDeco(EventSink s, String tag) {
super(s);
this.tag = tag;
}
public void open() throws IOException {
// set the collector's current tag to curRollTAg.
curRollTag = tag;
super.open();
}
@Override
public void close() throws IOException {
super.close();
AckListener master = FlumeNode.getInstance().getCollectorAckListener();
Collection<String> acktags = rollAckMap.get(curRollTag);
LOG.debug("Roll closed, pushing acks for " + curRollTag + " :: "
+ acktags);
for (String at : acktags) {
master.end(at);
}
}
};
/**
* This accumulates ack tags in rollAckMap so that they can be pushed to the
* master when the the hdfs file associated with the rolltag is closed.
*/
class AckAccumulator implements AckListener {
@Override
public void end(String group) throws IOException {
LOG.debug("Adding to acktag " + group + " to rolltag " + curRollTag);
rollAckMap.put(curRollTag, group);
LOG.debug("Current rolltag acktag mapping: " + rollAckMap);
}
@Override
public void err(String group) throws IOException {
}
@Override
public void expired(String key) throws IOException {
}
@Override
public void start(String group) throws IOException {
}
};
@Override
public void append(Event e) throws IOException {
snk.append(e);
super.append(e);
}
@Override
public void close() throws IOException {
snk.close();
}
@Override
public void open() throws IOException {
snk.open();
}
@Override
public String getName() {
return "Collector";
}
@Override
public void getReports(String namePrefix, Map<String, ReportEvent> reports) {
super.getReports(namePrefix, reports);
snk.getReports(namePrefix + getName() + ".", reports);
}
public EventSink getSink() {
return snk;
}
public static SinkBuilder builder() {
return new SinkBuilder() {
@Override
public EventSink build(Context context, String... argv) {
Preconditions.checkArgument(argv.length <= 3 && argv.length >= 2,
"usage: collectorSink[(dfsdir,path[,rollmillis])]");
String logdir = FlumeConfiguration.get().getCollectorDfsDir(); // default
long millis = FlumeConfiguration.get().getCollectorRollMillis();
String prefix = "";
if (argv.length >= 1) {
logdir = argv[0]; // override
}
if (argv.length >= 2) {
prefix = argv[1];
}
if (argv.length >= 3) {
millis = Long.parseLong(argv[2]);
}
try {
EventSink snk = new CollectorSink(logdir, prefix, millis);
return snk;
} catch (FlumeSpecException e) {
LOG.error("CollectorSink spec error " + e, e);
throw new IllegalArgumentException(
"usage: collectorSink[(dfsdir,path[,rollmillis])]" + e);
}
}
};
}
public static SinkBuilder hiveBuilder() {
return new SinkBuilder() {
@Override
public EventSink build(Context context, String... argv) {
LOG.info("adding hiveCollectorSink arguments");
Preconditions.checkArgument(argv.length <= 4 && argv.length >= 3,
"usage: hiveCollectorSink[(dfsdir,path,hive_table_name[,rollmillis])]");
String logdir = FlumeConfiguration.get().getCollectorDfsDir(); // default
long millis = FlumeConfiguration.get().getCollectorRollMillis();
String hiveTableName = "";
String prefix = "";
if (argv.length >= 1) {
logdir = argv[0]; // override
}
if (argv.length >= 2) {
prefix = argv[1];
}
if (argv.length >= 3) {
hiveTableName = argv[2];
}
if (argv.length >= 4) {
millis = Long.parseLong(argv[3]);
}
LOG.info("HIVE TABLE NAME: " + hiveTableName);
try {
EventSink snk = new CollectorSink(logdir, prefix,hiveTableName,millis);
return snk;
} catch (FlumeSpecException e) {
LOG.error("CollectorSink spec error " + e, e);
throw new IllegalArgumentException(
"usage: collectorSink[(dfsdir,path[,rollmillis])]" + e);
}
}
};
}
public static SinkBuilder hiveElasticSearchBuilder() {
return new SinkBuilder() {
@Override
public EventSink build(Context context, String... argv) {
LOG.info("adding hiveElasticSearchCollectorSink arguments");
Preconditions.checkArgument(argv.length <= 6 && argv.length >= 5,
"usage: hiveElasticSearchCollectorSink[(dfsdir,path,hive_table_name[,rollmillis])]");
String logdir = FlumeConfiguration.get().getCollectorDfsDir(); // default
long millis = FlumeConfiguration.get().getCollectorRollMillis();
String hiveTableName = "";
String prefix = "";
String elasticSearchUrl = "", elasticIndex = "", elasticType = "";
if (argv.length >= 1) {
logdir = argv[0]; // override
}
if (argv.length >= 2) {
prefix = argv[1];
}
if (argv.length >= 3) {
hiveTableName = argv[2];
}
if (argv.length >= 4 && argv.length <= 6) {
elasticSearchUrl = argv[3];
elasticIndex = argv[4];
elasticType = argv[5];
}
LOG.info("HIVE TABLE NAME: " + hiveTableName);
try {
EventSink snk = new CollectorSink(logdir, prefix,hiveTableName, millis, elasticSearchUrl, elasticIndex, elasticType);
return snk;
} catch (FlumeSpecException e) {
LOG.error("CollectorSink spec error " + e, e);
throw new IllegalArgumentException(
"usage: collectorSink[(dfsdir,path[,rollmillis])]" + e);
}
}
};
}
public static SinkBuilder hesMarkerBuilder() {
return new SinkBuilder() {
@Override
public EventSink build(Context context, String... argv) {
LOG.info("adding hiveElasticSearchCollectorSink arguments");
Preconditions.checkArgument(argv.length <= 6 && argv.length >= 5,
"usage: hesMarkerBuilder[(dfsdir,path,hive_table_name[,rollmillis])]");
String logdir = FlumeConfiguration.get().getCollectorDfsDir(); // default
long millis = FlumeConfiguration.get().getCollectorRollMillis();
String hiveTableName = "";
String prefix = "";
String elasticSearchUrl = "", elasticIndex = "", elasticType = "";
if (argv.length >= 1) {
logdir = argv[0]; // override
}
if (argv.length >= 2) {
prefix = argv[1];
}
if (argv.length >= 3) {
hiveTableName = argv[2];
}
if (argv.length >= 4 && argv.length <= 6) {
elasticSearchUrl = argv[3];
elasticIndex = argv[4];
elasticType = argv[5];
}
LOG.info("HIVE TABLE NAME: " + hiveTableName);
boolean runMarkerQueries = true;
try {
EventSink snk = new CollectorSink(logdir, prefix,hiveTableName, millis, elasticSearchUrl, elasticIndex, elasticType, runMarkerQueries);
return snk;
} catch (FlumeSpecException e) {
LOG.error("CollectorSink spec error " + e, e);
throw new IllegalArgumentException(
"usage: collectorSink[(dfsdir,path[,rollmillis])]" + e);
}
}
};
}
}