package picard.illumina.parser;
import htsjdk.samtools.util.IOUtil;
import picard.PicardException;
import picard.illumina.parser.fakers.FileFaker;
import java.io.File;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public abstract class ParameterizedFileUtil {
public static final String PER_TILE_PATTERN_STRING = "s_(\\d+)_(\\d{1,5})";
/**
* The file extension for this class, file extension does not have the standard meaning
* in this instance. It means, all the characters that come after the identifying portion of
* the file (after lane, tile, and end that is). So _qseq.txt and .filter are both file extensions
*/
protected final String extension;
/**
* A pattern that will match files of this type for this lane
*/
protected Pattern matchPattern;
protected final int lane;
protected List<Integer> tiles;
/**
* If you think of the file system as a tree, this is the deepest directory(node) on the tree that
* still contains all of the files for this given type (e.g. If we're talking about BCLs the directory
* structure is:
* <p/>
* BaseCall Dir
* |
* L001
* | | |
* C1.1 C2.1 ... Cn.1
* | | |
* bcl Files ... bclFiles
* <p/>
* L001 is the base because it contains every BCL file in the run (though those files are nested in
* other folders).
*/
protected final File base;
protected final FileFaker faker;
public ParameterizedFileUtil(final boolean laneTileRegex, final String extension, final File base,
final FileFaker faker, final int lane) {
this(extension, base, faker, lane);
if (laneTileRegex) {
matchPattern = Pattern.compile(escapePeriods(makeLaneTileRegex(processTxtExtension(extension), lane)));
} else {
matchPattern = Pattern.compile(escapePeriods(makeLaneRegex(extension, lane)));
}
}
public ParameterizedFileUtil(final String pattern, final String extension, final File base, final FileFaker faker,
final int lane) {
this(extension, base, faker, lane);
this.matchPattern = Pattern.compile(pattern);
}
private ParameterizedFileUtil(final String extension, final File base, final FileFaker faker,
final int lane) {
this.faker = faker;
this.extension = extension;
this.base = base;
this.lane = lane;
}
/**
* Determine whether or not files are available
*
* @return return true if files are found matching this types pattern, false otherwise
*/
public abstract boolean filesAvailable();
/**
* Return a list of all tiles available for this file format and run
*
* @return A List of tile integers
*/
public List<Integer> getTiles() {
return tiles;
}
/**
* Given the expected tiles/expected cycles for this file type, return a list of error messages describing any
* missing/or malformed files
*
* @param expectedTiles An ordered list of tile numbers
* @param expectedCycles An ordered list of cycle numbers that may contain gaps
* @return A list of error messages for this format
*/
public abstract List<String> verify(List<Integer> expectedTiles, int[] expectedCycles);
/**
* Given the expected tiles/expected cycles for this file type create a set of fake files such that the
* verification criteria are met.
*
* @param expectedTiles An ordered list of tile numbers
* @param cycles An ordered list of cycle numbers that may contain gaps
* @param format The format of the files that are to be faked
* @return A list of error messages for this format
*/
public abstract List<String> fakeFiles(List<Integer> expectedTiles, int[] cycles,
IlluminaFileUtil.SupportedIlluminaFormat format);
/**
* Returns only lane and tile information as PerTileFt's do not have End information.
*
* @param fileName Filename to analyze for data
* @return A LaneTile object with the discovered Lane and Tile information and a null end field.
*/
protected Integer fileToTile(final String fileName) {
final Matcher matcher = matchPattern.matcher(fileName);
if (!matcher.matches()) {
return null;
}
return Integer.parseInt(matcher.group(1));
}
/**
* Return a regex string for finding Lane and Tile given a file extension pattern
*/
public static String makeLaneTileRegex(final String fileNameEndPattern, final int lane) {
if (lane < 0) {
throw new PicardException("Lane (" + lane + ") cannot be negative");
}
return "^" + "s_" + lane + "_(\\d{1,5})" + fileNameEndPattern + "$";
}
private String makeLaneRegex(final String fileNameEndPattern, final int lane) {
return "^s_" + lane + fileNameEndPattern + "$";
}
/**
* The period separator is expected in the file extension, since some do not start with it
*/
private String escapePeriods(final String preEscaped) {
return preEscaped
.replaceAll("\\.", "\\."); //In the first one the \\ is inside a regex in the second it's NOT
}
/**
* For filename patterns that end with .txt tack on the option .gz extension
*/
private String processTxtExtension(final String fileNameEndPattern) {
if (fileNameEndPattern.endsWith(".txt")) {
return fileNameEndPattern + "(\\.gz|\\.bz2)?";
} else {
return fileNameEndPattern;
}
}
/**
* Return all files that match pattern of the given file type in the given base directory
*/
protected IlluminaFileMap getTiledFiles(final File baseDirectory, final Pattern pattern) {
final IlluminaFileMap fileMap = new IlluminaFileMap();
if (baseDirectory.exists()) {
IOUtil.assertDirectoryIsReadable(baseDirectory);
final File[] files = IOUtil.getFilesMatchingRegexp(baseDirectory, pattern);
for (final File file : files) {
if (file.length() > 0) {
fileMap.put(fileToTile(file.getName()), file);
}
}
}
return fileMap;
}
}