/**
* Licensed to Cloudera, Inc. under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. Cloudera, Inc. licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.cloudera.flume.handlers.exec;
import java.io.IOException;
import java.nio.BufferOverflowException;
import java.nio.ByteBuffer;
import java.nio.channels.ReadableByteChannel;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.cloudera.flume.conf.FlumeConfiguration;
import com.cloudera.flume.conf.SourceFactory.SourceBuilder;
import com.cloudera.flume.core.Attributes;
import com.cloudera.flume.core.Event;
import com.cloudera.flume.core.EventImpl;
import com.cloudera.flume.core.EventSource;
import com.cloudera.util.Clock;
import com.cloudera.util.InputStreamPipe;
import com.google.common.base.Preconditions;
/**
* Simple process output source. Uses threads to asynchronously read stdout and
* stderr in order to ensure that system buffers are drained.
*
* Events are either returned line-by-line or aggregated into a single event
* containing an entire process' output.
*
* TODO(henry) - expose more of 'exec' parameters to callers, like ENV and CWD
* setting.
*/
public class ExecNioSource extends EventSource.Base {
// What command to run
final String command;
// Should we restart the script when it finishes?
final boolean restart;
// Time to wait if restart is true
final int period;
// Return line by line, or aggregate into a single pair of events?
final boolean inAggregateMode;
// Flags used to signal the end of an input stream
private final AtomicBoolean errFinished = new AtomicBoolean(false);
private final AtomicBoolean outFinished = new AtomicBoolean(false);
private final BlockingQueue<Event> eventQueue = new LinkedBlockingQueue<Event>();
private static Logger LOG = LoggerFactory.getLogger(ExecNioSource.class);
public static final String A_PROC_SOURCE = "procsource";
public static final String A_EXEC_CMD = "execcmd";
// Input sources
private ReadableByteChannel stdout = null;
private ReadableByteChannel stderr = null;
// Two threads to read from each source
ReaderThread readOut = null, readErr = null;
private InputStreamPipe stdinISP = null, stderrISP = null;
private Process proc = null;
// Used to signal that both reader and err threads have exited
private CountDownLatch latch = new CountDownLatch(2);
/**
*
* @param command
* Command line to exec and get output from.
* @param aggregate
* if true, return all the data from a single exec, if false, return
* an event per line.
* @param restart
* if true, restart exec every period ms after previous exit. if
* false, have source return done status after execution.
* @param period
* milliseconds to wait after exec exists before restarting if
* restart is true.
*/
ExecNioSource(String command, boolean aggregate, boolean restart, int period) {
this.command = command;
this.inAggregateMode = aggregate;
this.restart = restart;
this.period = period;
}
/**
* Create an event gathered from an exec of a program.
*/
static Event buildExecEvent(byte[] body, String tag, String command)
throws InterruptedException {
Event e = new EventImpl(body);
Attributes.setString(e, A_PROC_SOURCE, tag);
Attributes.setString(e, A_EXEC_CMD, command);
Attributes.setString(e, Event.A_SERVICE, "exec");
return e;
}
/**
* Makes events from the supplied byte buffer and puts them into the specified
* BlockingQueue. If it doesn't end with \n, then compact to shift the
* leftover bytes to be beginning of the buffer. When this function exits, the
* buffer is in write mode, still contains any leftovers and leaves position
* pointing to the end of the incomplete line.
*
* @param buf
* ByteBuffer in write mode. If this method exits normally, there are
* no remaining '\n's in the buffer and the buf is in write mode
* @return true if any bytes have been consumed
*/
static boolean extractLines(ByteBuffer buf, String command, String tag,
BlockingQueue<Event> sync) throws InterruptedException {
buf.flip();
boolean madeProgress = false;
int start = buf.position();
buf.mark();
while (buf.hasRemaining()) {
byte b = buf.get();
// TODO windows: ('\r\n') line separators
if (b == '\n') {
int end = buf.position();
int sz = end - start - 1; // exclude '\n'
int maxEventSz = (int) FlumeConfiguration.get().getEventMaxSizeBytes();
if (sz > maxEventSz) {
// Truncating path.
byte[] body = new byte[(int) maxEventSz];
buf.reset(); // go back to mark
buf.get(body, 0, maxEventSz); // read data
buf.position(end);
buf.mark(); // new mark.
start = buf.position();
sync.put(buildExecEvent(body, tag, command));
madeProgress = true;
} else {
byte[] body = new byte[sz];
buf.reset(); // go back to mark
buf.get(body, 0, sz); // read data
buf.get(); // read '\n'
buf.mark(); // new mark.
start = buf.position();
sync.put(buildExecEvent(body, tag, command));
madeProgress = true;
}
}
}
// rewind for any left overs
buf.reset();
buf.compact(); // shift leftovers to front.
return madeProgress;
}
/**
* @param in
* byte buffer in write mode
* @return true if needs to stay in drop mode, false if reached a '\n' char.
* Buffer is in write mode when it exits.
*/
static boolean dropUntilNewLine(ByteBuffer in) {
in.flip();
while (in.hasRemaining()) {
if (in.get() == '\n') {
in.compact(); // get rid of everything and flip back into normal mode
return false;
}
}
// wipe out the data and stay in drop mode.
in.clear();
return true;
}
/**
* Polls an input and formats lines read as events, places them on the event
* queue.
*/
class ReaderThread extends Thread {
ReadableByteChannel readChan = null;
volatile boolean shutdown = false;
String tag;
AtomicBoolean signalDone;
ReaderThread(ReadableByteChannel input, String tag, AtomicBoolean signal) {
super("ReaderThread (" + command + "-" + tag + ")");
Preconditions.checkArgument(input != null);
Preconditions.checkArgument(signal != null);
this.readChan = input;
this.tag = tag;
this.signalDone = signal;
}
/**
* Returns true of the process is terminated
*
* @param proc
* @return true if process completed, false if was in weird state
*/
boolean isProcDone(Process proc) {
try {
proc.exitValue();
return true;
} catch (IllegalThreadStateException e) {
// This is gross but the only java way to figure out if the
// subprocess is running.
return false;
}
}
/**
* Takes exec output and converts individual lines into events.
*/
void doLineMode() {
// make sure we have a buffer big enough to get relevant data.
int maxEventSize = (int) FlumeConfiguration.get().getEventMaxSizeBytes();
int bufSize = Math.max(Short.MAX_VALUE, maxEventSize * 4);
try {
ByteBuffer in = ByteBuffer.allocate(bufSize);
boolean dropMode = false; // for truncations of extremely long lines
while (!shutdown) {
// If interrupted, this throws an IOException
int read = readChan.read(in);
if (read == 0) {
// don't burn the cpu if nothing is read.
Clock.sleep(100);
continue;
}
if (read < 0) {
// end of input stream reached.
if (isProcDone(proc)) {
shutdown = true;
}
continue;
}
// At this point, I have read data.
if (dropMode) {
dropMode = dropUntilNewLine(in);
if (dropMode) {
// didn't reach new line, keep dropping
continue;
}
// fall through and do extract lines
}
// exits with 'in' in write mode
extractLines(in, command, tag, eventQueue);
// the leftovers bytes ideally should always be smaller than
// maxEventSize, and has the invariant of not having a '\n' in it.
if (in.position() > maxEventSize) {
// read up to max size, and then throw out the rest
LOG.error("Entry too long, truncating: " + in.position() + " > "
+ maxEventSize + "(max event size)");
in.flip(); // read mode.
byte[] buf = new byte[maxEventSize];
in.get(buf);
eventQueue.put(buildExecEvent(buf, command, tag));
// We can now drop the remaining data, and continue to drop
in.clear(); // back in write mode.
dropMode = true;
}
}
} catch (InterruptedException e) {
// interruptions are only expected in shutdown.
if (!shutdown) {
LOG.warn(tag + " ReaderThread received "
+ "unexpected InterruptedException", e);
}
} catch (BufferOverflowException b) {
// This should never happen (buffer is bigger than max event size)
LOG.error("Event was too large for buffer", b);
} catch (IOException e) {
if (!shutdown) {
LOG.warn(tag + " ReaderThread received unexpected IOException", e);
}
} finally {
try {
readChan.close();
} catch (IOException i) {
LOG.warn("Failed to close input stream in ExecNioSource", i);
}
}
signalDone.set(true);
latch.countDown();
}
/**
* This takes exec and reads as much as it can from a single exec and then
* creates a single event.
*/
void doAggregateMode() {
// make sure we have a buffer big enough to get relevant data.
int maxEventSize = (int) FlumeConfiguration.get().getEventMaxSizeBytes();
try {
// TODO evaluate allocate vs allocateDirect
ByteBuffer in = ByteBuffer.allocate(maxEventSize);
Event evt = null;
boolean dropMode = false;
while (!shutdown) {
// If interrupted, this throws an IOException
int read = readChan.read(in);
if (read == 0) { // nothing read? chill out for a bit.
Clock.sleep(100);
continue;
}
if (read < 0) { // read end of input stream.
if (isProcDone(proc)) {
shutdown = true;
}
if (evt != null) {
// full event already saved off
eventQueue.put(evt);
break;
}
if (evt == null && in.position() != 0) {
// event that didn't fill the buffer
byte[] eventBuf = new byte[in.position()];
in.flip();
in.get(eventBuf);
evt = buildExecEvent(eventBuf, command, tag);
eventQueue.put(evt);
break;
}
// buffer of 0 size, (didn't read anything): do nothing.
break;
}
// if we read data, keep reading into buffer. If the buffer is full,
// read the data out of the buf
if (dropMode) {
in.clear();
continue;
}
if (in.remaining() == 0) {
byte[] eventBuf = new byte[in.position()];
in.flip();
in.get(eventBuf);
evt = buildExecEvent(eventBuf, command, tag);
in.clear(); // don't need the data any more
dropMode = true;
}
}
} catch (InterruptedException e) {
// interruptions are expected in shutdown.
if (!shutdown) {
LOG.warn(tag + " ReaderThread received "
+ "unexpected InterruptedException", e);
}
} catch (IOException e) {
if (!shutdown) {
LOG.warn(tag + " ReaderThread received unexpected IOException", e);
}
} finally {
try {
readChan.close();
} catch (IOException i) {
LOG.warn("Failed to close input stream in ExecEventSource", i);
}
signalDone.set(true);
latch.countDown();
}
}
/**
* Blocks on a line of input to be available from an input stream; formats
* as an event and then places it on a queue.
*/
public void run() {
if (inAggregateMode) {
// are stderr and stdout different events? so this can do two events?
doAggregateMode();
} else {
doLineMode();
}
}
void shutdown() throws IOException {
this.shutdown = true;
if (this.readChan != null) {
readChan.close();
this.interrupt();
}
}
}
public void close() throws IOException {
// Note that this does not guarantee that any further next() calls will
// return the EOF null that signals the process shut down.
readOut.shutdown();
readErr.shutdown();
boolean latched = false;
// Want to make sure that both threads have exited before we kill the
// process
try {
latched = latch.await(5000, TimeUnit.MILLISECONDS);
} catch (InterruptedException e) {
LOG.debug("Waiting for exec thread exit was interrupted", e);
}
stdinISP.shutdown();
stderrISP.shutdown();
if (proc != null) {
proc.destroy();
proc = null;
}
if (!latched) {
throw new IOException("Timeout waiting for exec threads to exit");
}
}
/**
* Blocks on either output from stdout / stderr or process exit (at which
* point it throws an exception)
*
* @return an Event with two tags: the stream which produced the line
*/
public Event next() throws IOException {
Event line = null;
while (true) {
try {
line = eventQueue.poll(1000, TimeUnit.MILLISECONDS);
if (line == null) {
if (errFinished.get() && outFinished.get()) {
// We may have missed events between waking up and testing
line = eventQueue.poll();
if (line != null) {
updateEventProcessingStats(line);
return line;
}
if (restart) {
close();
Thread.sleep(period);
open();
} else {
return null;
}
}
} else {
updateEventProcessingStats(line);
return line;
}
} catch (InterruptedException e) {
// this is in the driver so needs to re-flag interrupted status
LOG.warn("Exec next was interrupted " + e);
Thread.currentThread().interrupt();
throw new RuntimeException("ExecEventSource was interrupted - " + e);
}
}
}
/**
* Starts a Process and two threads to read from stdout / stderr
*/
public void open() throws IOException {
if (proc != null) {
throw new IllegalStateException("Tried to open exec process twice");
}
latch = new CountDownLatch(2);
outFinished.set(false);
errFinished.set(false);
proc = Runtime.getRuntime().exec(command);
// Just reading from stdout and stderr can block, so we wrap them with
// InputSTreamPipe allows them to be nonblocking.
stdinISP = new InputStreamPipe(proc.getInputStream());
stderrISP = new InputStreamPipe(proc.getErrorStream());
stdout = (ReadableByteChannel) stdinISP.getChannel();
stderr = (ReadableByteChannel) stderrISP.getChannel();
readOut = new ReaderThread(stdout, "STDOUT", outFinished);
readErr = new ReaderThread(stderr, "STDERR", errFinished);
stdinISP.start();
stderrISP.start();
readOut.start();
readErr.start();
}
protected static class Builder extends SourceBuilder {
/**
* Takes 1-4 arguments - the command to run, whether to aggregate each
* output as a single event, whether to restart after one execution is
* finished, and how often if so to restart.
*/
@Override
public EventSource build(String... argv) {
Preconditions.checkArgument(argv.length >= 1 && argv.length <= 4,
"exec(\"cmdline \"[,aggregate [,restart [,period]]]], )");
String command = argv[0];
boolean aggregate = false;
boolean restart = false;
int period = 0;
if (argv.length >= 2) {
aggregate = Boolean.parseBoolean(argv[1]);
}
if (argv.length >= 3) {
restart = Boolean.parseBoolean(argv[2]);
}
if (argv.length >= 4) {
period = Integer.parseInt(argv[3]);
}
return new ExecNioSource(command, aggregate, restart, period);
}
}
/**
* This builder creates a source that periodically execs a program and takes
* the entire output as the body of a event. It takes two arguments - the
* command to run, and a time period to sleep in millis before executing
* again.
*/
public static SourceBuilder buildPeriodic() {
return new SourceBuilder() {
@Override
public EventSource build(String... argv) {
Preconditions.checkArgument(argv.length == 2,
"execPeriodic(\"cmdline \",period)");
String command = argv[0];
boolean aggregate = true;
boolean restart = true;
int period = Integer.parseInt(argv[1]);
return new ExecNioSource(command, aggregate, restart, period);
}
};
}
/**
* This builder creates a source that execs a long running program and takes
* each line of input as the body of an event. It takes one arguemnt, the
* command to run. If the command exits, the exec source returns null signally
* end of records.
*/
public static SourceBuilder buildStream() {
return new SourceBuilder() {
@Override
public EventSource build(String... argv) {
Preconditions.checkArgument(argv.length == 1,
"execStream(\"cmdline \")");
String command = argv[0];
boolean aggregate = false;
boolean restart = false;
int period = 0;
return new ExecNioSource(command, aggregate, restart, period);
}
};
}
public static SourceBuilder builder() {
return new Builder();
}
}