/**
* Licensed to Cloudera, Inc. under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. Cloudera, Inc. licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.cloudera.flume.handlers.endtoend;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.atomic.AtomicLong;
import java.util.zip.CRC32;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.cloudera.flume.agent.FlumeNode;
import com.cloudera.flume.conf.Context;
import com.cloudera.flume.conf.SinkFactory.SinkDecoBuilder;
import com.cloudera.flume.core.Event;
import com.cloudera.flume.core.EventSink;
import com.cloudera.flume.core.EventSinkDecorator;
import com.cloudera.flume.reporter.ReportEvent;
import com.google.common.base.Preconditions;
/**
* This tracks ack on batches of events.
*
* This looks for and tracks begin and end message for ack tagged values.
*
* Will this be a reasonable size? 8 bytes for the checksum per machine coming
* in. With high estimates of roughly 1000 machines, each with 5 connections,
* with lets say 20 outstanding ack groups (64MB or roughly 10-20 seconds of
* buffering) means 800 KB. No big deal.
*
* Let say 1MB group, 50 MB, This is still manageable.
*/
public class AckChecksumChecker<S extends EventSink> extends
EventSinkDecorator<S> {
static final Logger LOG = LoggerFactory.getLogger(AckChecksumChecker.class);
final static public String A_ACK_STARTS = "ackStarts";
final static public String A_ACK_ENDS = "ackEnds";
final static public String A_ACK_FAILS = "ackFails";
final static public String A_ACK_SUCCESS = "ackSuccesses";
final static public String A_ACK_UNEXPECTED = "ackUnexpected";
AtomicLong ackStarts = new AtomicLong();
AtomicLong ackEnds = new AtomicLong();
AtomicLong ackFails = new AtomicLong();
AtomicLong ackSuccesses = new AtomicLong();
// TODO (jon) this is very inefficient right now.
Map<String, Long> partial = new HashMap<String, Long>();
// Just need to keep the tags of the finished ones
List<String> done = new ArrayList<String>();
final AckListener listener;
long unstarted = 0; // number of events that didn't have a start event.
public AckChecksumChecker(S s, AckListener l) {
super(s);
Preconditions.checkNotNull(l);
this.listener = l;
}
public AckChecksumChecker(S s) {
super(s);
// do nothing listener
this.listener = new AckListener() {
@Override
public void end(String group) {
LOG.info("ended " + group);
}
@Override
public void err(String group) {
LOG.info("erred " + group);
}
@Override
public void start(String group) {
LOG.info("start " + group);
}
@Override
public void expired(String group) throws IOException {
LOG.info("expired " + group);
}
};
}
@Override
public void append(Event e) throws IOException {
byte[] btyp = e.get(AckChecksumInjector.ATTR_ACK_TYPE);
if (btyp == null) {
// pass through if has no checksumming tags
super.append(e);
return;
}
byte[] btag = e.get(AckChecksumInjector.ATTR_ACK_TAG);
byte[] bchk = e.get(AckChecksumInjector.ATTR_ACK_HASH);
String k = new String(btag);
if (Arrays.equals(btyp, AckChecksumInjector.CHECKSUM_START)) {
LOG.info("Starting checksum group called " + k);
// Checksum Start marker: create new partial
long newchk = ByteBuffer.wrap(bchk).getLong();
LOG.info("initial checksum is " + Long.toHexString(newchk));
partial.put(k, newchk);
ackStarts.incrementAndGet();
listener.start(k);
return;
} else if (Arrays.equals(btyp, AckChecksumInjector.CHECKSUM_STOP)) {
LOG.info("Finishing checksum group called '" + k + "'");
ackEnds.incrementAndGet();
// Checksum stop marker: move from partial to done
Long chksum = partial.get(k);
if (chksum == null) {
LOG.error("checksum failed");
listener.err(k);
ackFails.incrementAndGet();
return;
}
long endchk = ByteBuffer.wrap(bchk).getLong();
LOG.debug("final checksum is " + Long.toHexString(endchk)
+ " stop checksum is " + Long.toHexString(chksum));
if ((chksum ^ endchk) != 0) {
// There was a problem.
LOG.warn("[ Thread " + Thread.currentThread().getId()
+ " ] Some component of msg group was lost or duped " + k);
listener.err(k);
ackFails.incrementAndGet();
return;
}
LOG.info("Checksum succeeded " + Long.toHexString(chksum));
listener.end(k);
ackSuccesses.incrementAndGet();
done.add(k);
partial.remove(k);
LOG.info("moved from partial to complete " + k);
return;
}
// normal case, just an update the checksum.
CRC32 chk = new CRC32();
chk.reset();
chk.update(e.getBody());
long chkVal = chk.getValue();
if (chkVal != ByteBuffer.wrap(bchk).getLong()) {
LOG.warn("check sum does not match!");
}
super.append(e);
// only do this after we have successfully sent the event.
synchronized (partial) {
Long chks = partial.get(k);
if (chks == null) {
// throw new IOException("Ack tag '" + k + "' was not started: ");
unstarted++;
return;
}
// update checksum.
long checksum = partial.get(k);
checksum ^= chkVal;
partial.put(k, checksum);
}
}
@Override
public ReportEvent getReport() {
ReportEvent rpt = super.getReport();
rpt.setLongMetric(A_ACK_FAILS, ackFails.get());
rpt.setLongMetric(A_ACK_SUCCESS, ackSuccesses.get());
rpt.setLongMetric(A_ACK_STARTS, ackStarts.get());
rpt.setLongMetric(A_ACK_ENDS, ackEnds.get());
rpt.setLongMetric(A_ACK_UNEXPECTED, unstarted);
return rpt;
}
public static SinkDecoBuilder builder() {
return new SinkDecoBuilder() {
@Override
public EventSinkDecorator<EventSink> build(Context context,
String... argv) {
Preconditions.checkArgument(argv.length == 0, "usage: ackChecker");
return new AckChecksumChecker<EventSink>(null, FlumeNode.getInstance()
.getCollectorAckListener());
}
};
}
}