/*
* Copyright MapR Technologies, $year
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.mapr.storm;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FilenameFilter;
import java.io.IOException;
import java.io.InputStream;
import java.io.Serializable;
import java.util.Queue;
import java.util.Set;
import java.util.regex.Pattern;
import com.google.common.base.Preconditions;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.google.common.collect.Lists;
import com.google.common.collect.Queues;
import com.google.common.collect.Sets;
/**
* Scans through files in a directory as they appear.
* <p/>
* When you have finished processing a file, you can call
* nextInput() to get a stream to read from the next file. If there isn't a new file to read
* from, you will get a null.
* <p/>
* Eventually, when you call nextInput() again, if a new file or files has appeared in the meantime
* you will get streams that read from these new file(s). You should probably do something to
* moderate the rate at which you come back for new files, but other than that, this object
* gives you an infinite sequence of input streams whose concatenation is the concatenation
* of all the files that have ever appeared in this directory.
*/
public class DirectoryScanner implements Serializable {
private static final long serialVersionUID = -8743538912863486122L;
private Logger log = LoggerFactory.getLogger(DirectoryScanner.class);
private final File inputDirectory;
private final Pattern fileNamePattern;
private Set<File> oldFiles = Sets.newHashSet();
private Queue<File> pendingFiles = Queues.newConcurrentLinkedQueue();
private InputStream liveInput = null;
private File liveFile;
public DirectoryScanner(File inputDirectory, Pattern fileNamePattern) {
Preconditions.checkArgument(inputDirectory.exists(), String.format("Directory %s should already exist", inputDirectory));
this.inputDirectory = inputDirectory;
this.fileNamePattern = fileNamePattern;
}
private File scanForFiles() {
if (pendingFiles.size() == 0) {
Set<File> files = Sets.newTreeSet(Lists.newArrayList(inputDirectory.listFiles(new FilenameFilter() {
@Override
public boolean accept(File file, String s) {
return fileNamePattern.matcher(s).matches();
}
})));
oldFiles.retainAll(files);
files.removeAll(oldFiles);
oldFiles.addAll(files);
pendingFiles.addAll(files);
}
return pendingFiles.poll();
}
public FileInputStream nextInput() {
FileInputStream r;
File nextInLine = scanForFiles();
if (nextInLine == null) {
log.trace("No new files");
r = null;
} else {
try {
log.trace("Opening {}", nextInLine);
r = new FileInputStream(nextInLine);
} catch (FileNotFoundException e) {
// bizarre, but conceivable in a directory with crazy updates happening
log.warn("File was found in scan, but disappeared before open {}", nextInLine);
r = null;
}
}
if (nextInLine != null) {
if (liveInput != null) {
if (liveFile != null) {
log.trace("Closing {}", liveFile);
}
try {
liveInput.close();
} catch (IOException e) {
log.warn("Error closing file {}", liveFile);
log.warn("Backtrace", e);
}
}
liveFile = nextInLine;
liveInput = r;
}
return r;
}
public File getLiveFile() {
return liveFile;
}
public Set<File> getOldFiles() {
return oldFiles;
}
public File getInputDirectory() {
return inputDirectory;
}
public Pattern getFileNamePattern() {
return fileNamePattern;
}
public void setOldFiles(Set<File> oldFiles) {
this.oldFiles = oldFiles;
}
public FileInputStream forceInput(File file, long offset) {
FileInputStream r = null;
try {
liveFile = file;
r = new FileInputStream(liveFile);
r.getChannel().position(offset);
liveInput = r;
} catch (IOException e) {
log.warn("Couldn't open replay file", e);
}
return r;
}
}