/**
* Licensed to Cloudera, Inc. under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. Cloudera, Inc. licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.cloudera.flume.handlers.text;
import java.io.File;
import java.io.IOException;
import java.text.DateFormat;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.HashMap;
import java.util.Map;
import java.util.concurrent.atomic.AtomicLong;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.cloudera.flume.conf.SourceFactory.SourceBuilder;
import com.cloudera.flume.core.Event;
import com.cloudera.flume.core.EventSource;
import com.cloudera.flume.handlers.text.TailSource.Cursor;
import com.cloudera.flume.reporter.ReportEvent;
import com.cloudera.util.dirwatcher.DirChangeHandler;
import com.cloudera.util.dirwatcher.DirWatcher;
import com.cloudera.util.dirwatcher.RegexFileFilter;
import com.google.common.base.Preconditions;
/**
* This source tails all the file in a directory that match a specified regular
* expression.
*/
public class TailDirSource extends EventSource.Base {
public static final Logger LOG = LoggerFactory.getLogger(TailDirSource.class);
private DirWatcher watcher;
private TailSource tail;
private static final DateFormat dateFormatDayHour = new SimpleDateFormat("yyyy-MM-dd HH:mm");
final private long startFromDateInEpoch;
final private File dir;
final private String regex;
final private AtomicLong filesAdded = new AtomicLong();
final private AtomicLong filesDeleted = new AtomicLong();
final public static String A_FILESADDED = "filesAdded";
final public static String A_FILESDELETED = "filesDeleted";
final public static String A_FILESPRESENT = "filesPresent";
public TailDirSource(File f, String regex, long startFromDateInEpoch) {
Preconditions.checkArgument(f != null, "File should not be null!");
Preconditions.checkArgument(regex != null,
"Regex filter should not be null");
this.dir = f;
this.regex = regex;
this.startFromDateInEpoch = startFromDateInEpoch;
// 100 ms between checks
this.tail = new TailSource(100);
}
/**
* Must be synchronized to isolate watcher
*/
@Override
synchronized public void open() throws IOException {
Preconditions.checkState(watcher == null,
"Attempting to open an already open TailDirSource (" + dir + ", \""
+ regex + ", \"" + startFromDateInEpoch + "\")");
// 250 ms between checks
this.watcher = new DirWatcher(dir, new RegexFileFilter(regex), 250);
synchronized (watcher) {
this.watcher.addHandler(new DirChangeHandler() {
Map<String, TailSource.Cursor> curmap = new HashMap<String, TailSource.Cursor>();
@Override
public void fileCreated(File f) {
// Add a new file to the multi tail.
if (f.isDirectory()) {
LOG.debug("Tail dir will not read or recurse "
+ "into subdirectory " + f);
return;
}
if (f.lastModified() >= startFromDateInEpoch) {
LOG.info("File: " + f + " lastModifiedAt: " + f.lastModified() + " is younger than: " + startFromDateInEpoch + " adding to list");
Cursor c = new Cursor(tail.sync, f);
curmap.put(f.getName(), c);
tail.addCursor(c);
filesAdded.incrementAndGet();
} else {
LOG.info("File: " + f + " lastModifiedAt: " + f.lastModified() + " is older than: " + startFromDateInEpoch + " not adding to list");
}
}
@Override
public void fileDeleted(File f) {
LOG.info("removed file " + f);
Cursor c = curmap.remove(f.getName());
tail.removeCursor(c);
filesDeleted.incrementAndGet();
}
});
this.watcher.start();
}
tail.open();
}
@Override
synchronized public void close() throws IOException {
tail.close();
synchronized (watcher) {
this.watcher.stop();
this.watcher = null;
}
}
@Override
synchronized public ReportEvent getReport() {
ReportEvent rpt = super.getReport();
rpt.setLongMetric(A_FILESADDED, filesAdded.get());
rpt.setLongMetric(A_FILESDELETED, filesDeleted.get());
rpt.setLongMetric(A_FILESPRESENT, tail.cursors.size());
return rpt;
}
@Override
public Event next() throws IOException {
// this cannot be in synchronized because it has a
// blocking call to a queue inside it.
Event e = tail.next();
synchronized (this) {
updateEventProcessingStats(e);
return e;
}
}
public static SourceBuilder builder() {
return new SourceBuilder() {
@Override
public EventSource build(String... argv) throws IllegalArgumentException {
if (argv.length == 2) {
Preconditions.checkArgument(argv.length >= 1 && argv.length <= 2,
"usage: tailDir(dir, regex, lastModifiedTime) ");
}
if (argv.length == 3) {
Preconditions.checkArgument(argv.length >= 1 && argv.length <= 3,
"usage: tailDir(dir, regex, lastModifiedTime=\"1970-01-01 00:00\") ");
}
String regex = ".*"; // default to accepting all
String defaultStartDate = "1970-01-01 00:00"; //epoch time
long startFromDateInEpoch = 0L;
try {
startFromDateInEpoch = dateFormatDayHour.parse(defaultStartDate).getTime();
if (argv.length > 1) {
regex = argv[1];
}
if (argv.length == 3) {
startFromDateInEpoch = dateFormatDayHour.parse(argv[2]).getTime();
}
} catch (ParseException e) {
// TODO Auto-generated catch block
e.printStackTrace();
throw new IllegalArgumentException("Date should be in the following format: yyyy-MM-dd HH:mm\nIncorrectly formatted date: " + argv[2]);
}
return new TailDirSource(new File(argv[0]), regex, startFromDateInEpoch );
}
};
}
}