/*
* The MIT License
*
* Copyright (c) 2011 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
package picard.illumina.parser;
import htsjdk.samtools.util.CloserUtil;
import htsjdk.samtools.util.IOUtil;
import picard.PicardException;
import picard.illumina.parser.fakers.BarcodeFileFaker;
import picard.illumina.parser.fakers.BclFileFaker;
import picard.illumina.parser.fakers.ClocsFileFaker;
import picard.illumina.parser.fakers.FilterFileFaker;
import picard.illumina.parser.fakers.LocsFileFaker;
import picard.illumina.parser.fakers.PosFileFaker;
import picard.illumina.parser.readers.TileMetricsOutReader;
import java.io.File;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.TreeSet;
import java.util.regex.Pattern;
/**
* General utils for dealing with IlluminaFiles as well as utils for specific, support formats.
* This class contains utils that span across multiple Illumina files but it's primary intent
* was to provide support for basic file types. Each supported file type can be accessed
* via a factory method (make<filetype>Ft). When IlluminaFileUtil is created it is parameterized
* by basecallDir and lane and all IlluminaFileTypes created by IlluminaFileUtil will also be
* parameterized in this fashion.
*
* @author jburke@broadinstitute.org
*/
public class IlluminaFileUtil {
public static final Pattern CYCLE_SUBDIRECTORY_PATTERN = Pattern.compile("^C(\\d+)\\.1$");
public enum SupportedIlluminaFormat {
Bcl,
Locs,
Clocs,
Pos,
Filter,
Barcode,
MultiTileFilter,
MultiTileLocs,
MultiTileBcl
}
private final File basecallLaneDir;
private final File intensityLaneDir;
private final File basecallDir;
private final File barcodeDir;
private final File intensityDir;
private final int lane;
private final File tileMetricsOut;
private final Map<SupportedIlluminaFormat, ParameterizedFileUtil> utils = new HashMap<SupportedIlluminaFormat, ParameterizedFileUtil>();
public IlluminaFileUtil(final File basecallDir, final int lane) {
this(basecallDir, null, lane);
}
public IlluminaFileUtil(final File basecallDir, File barcodeDir, final int lane) {
this.lane = lane;
this.basecallDir = basecallDir;
this.barcodeDir = barcodeDir;
this.intensityDir = basecallDir.getParentFile();
final File dataDir = intensityDir.getParentFile();
this.basecallLaneDir = new File(basecallDir, longLaneStr(lane));
this.intensityLaneDir = new File(intensityDir, longLaneStr(lane));
final File interopDir = new File(dataDir.getParentFile(), "InterOp");
tileMetricsOut = new File(interopDir, "TileMetricsOut.bin");
}
/**
* Return the lane we're inspecting
*/
public int getLane() {
return lane;
}
/**
* Given a file type, get the Parameterized File Util object associated with it
*/
public ParameterizedFileUtil getUtil(final SupportedIlluminaFormat format) {
ParameterizedFileUtil parameterizedFileUtil = utils.get(format);
if (parameterizedFileUtil == null) {
switch (format) {
case Bcl:
final ParameterizedFileUtil bclFileUtil = new PerTilePerCycleFileUtil(".bcl", basecallLaneDir, new BclFileFaker(), lane);
final ParameterizedFileUtil gzBclFileUtil = new PerTilePerCycleFileUtil(".bcl.gz", basecallLaneDir, new BclFileFaker(), lane);
if (bclFileUtil.filesAvailable() && !gzBclFileUtil.filesAvailable()) {
parameterizedFileUtil = bclFileUtil;
} else if (!bclFileUtil.filesAvailable() && gzBclFileUtil.filesAvailable()) {
parameterizedFileUtil = gzBclFileUtil;
} else if (!bclFileUtil.filesAvailable() && !gzBclFileUtil.filesAvailable()) {
parameterizedFileUtil = bclFileUtil;
} else {
throw new PicardException(
"Not all BCL files in " + basecallLaneDir.getAbsolutePath() + " have the same extension!");
}
utils.put(SupportedIlluminaFormat.Bcl, parameterizedFileUtil);
break;
case Locs:
parameterizedFileUtil = new PerTileFileUtil(".locs", intensityLaneDir, new LocsFileFaker(), lane);
utils.put(SupportedIlluminaFormat.Locs, parameterizedFileUtil);
break;
case Clocs:
parameterizedFileUtil = new PerTileFileUtil(".clocs", intensityLaneDir, new ClocsFileFaker(), lane);
utils.put(SupportedIlluminaFormat.Clocs, parameterizedFileUtil);
break;
case Pos:
parameterizedFileUtil = new PerTileFileUtil("_pos.txt", intensityDir, new PosFileFaker(), lane);
utils.put(SupportedIlluminaFormat.Pos, parameterizedFileUtil);
break;
case Filter:
parameterizedFileUtil = new PerTileFileUtil(".filter", basecallLaneDir, new FilterFileFaker(), lane);
utils.put(SupportedIlluminaFormat.Filter, parameterizedFileUtil);
break;
case Barcode:
parameterizedFileUtil = new PerTileFileUtil("_barcode.txt", barcodeDir != null ? barcodeDir : basecallDir, new BarcodeFileFaker(), lane);
utils.put(SupportedIlluminaFormat.Barcode, parameterizedFileUtil);
break;
case MultiTileFilter:
parameterizedFileUtil = new MultiTileFilterFileUtil(basecallLaneDir, lane);
utils.put(SupportedIlluminaFormat.MultiTileFilter, parameterizedFileUtil);
break;
case MultiTileLocs:
parameterizedFileUtil = new MultiTileLocsFileUtil(new File(intensityDir, basecallLaneDir.getName()), basecallLaneDir, lane);
utils.put(SupportedIlluminaFormat.MultiTileLocs, parameterizedFileUtil);
break;
case MultiTileBcl:
parameterizedFileUtil = new MultiTileBclFileUtil(basecallLaneDir, lane);
utils.put(SupportedIlluminaFormat.MultiTileBcl, parameterizedFileUtil);
break;
}
}
return parameterizedFileUtil;
}
/**
* Return the list of tiles we would expect for this lane based on the metrics found in InterOp/TileMetricsOut.bin
*/
public List<Integer> getExpectedTiles() {
IOUtil.assertFileIsReadable(tileMetricsOut);
//Used just to ensure predictable ordering
final TreeSet<Integer> expectedTiles = new TreeSet<Integer>();
final Iterator<TileMetricsOutReader.IlluminaTileMetrics> tileMetrics = new TileMetricsOutReader(tileMetricsOut);
while (tileMetrics.hasNext()) {
final TileMetricsOutReader.IlluminaTileMetrics tileMetric = tileMetrics.next();
if (tileMetric.getLaneNumber() == lane) {
if (!expectedTiles.contains(tileMetric.getTileNumber())) {
expectedTiles.add(tileMetric.getTileNumber());
}
}
}
CloserUtil.close(tileMetrics);
return new ArrayList<Integer>(expectedTiles);
}
/**
* Get the available tiles for the given formats, if the formats have tile lists that differ then
* throw an exception, if any of the format
*/
public List<Integer> getActualTiles(final List<SupportedIlluminaFormat> formats) {
if (formats == null) {
throw new PicardException("Format list provided to getTiles was null!");
}
if (formats.size() == 0) {
throw new PicardException(
"0 Formats were specified. You need to specify at least SupportedIlluminaFormat to use getTiles");
}
final List<Integer> tiles = getUtil(formats.get(0)).getTiles();
for (int i = 1; i < formats.size(); i++) {
final List<Integer> fmTiles = getUtil(formats.get(i)).getTiles();
if (tiles.size() != fmTiles.size() || !tiles.containsAll(fmTiles)) {
throw new PicardException(
"Formats do not have the same number of tiles! " + summarizeTileCounts(formats));
}
}
return tiles;
}
public File tileMetricsOut() {
return tileMetricsOut;
}
/*
* Return a string representing the Lane in the format "L00<lane>"
*
* @param lane The lane to transform
* @return A long string representation of the name
*/
public static String longLaneStr(final int lane) {
String lstr = String.valueOf(lane);
final int zerosToAdd = 3 - lstr.length();
for (int i = 0; i < zerosToAdd; i++) {
lstr = "0" + lstr;
}
return "L" + lstr;
}
private String liToStr(final List<Integer> intList) {
if (intList.size() == 0) {
return "";
}
String summary = String.valueOf(intList.get(0));
for (int i = 1; i < intList.size(); i++) {
summary += ", " + String.valueOf(intList.get(i));
}
return summary;
}
private String summarizeTileCounts(final List<SupportedIlluminaFormat> formats) {
String summary;
ParameterizedFileUtil pfu = getUtil(formats.get(0));
List<Integer> tiles = pfu.getTiles();
summary = pfu.extension + "(" + liToStr(tiles) + ")";
for (final SupportedIlluminaFormat format : formats) {
pfu = getUtil(format);
tiles = pfu.getTiles();
summary += ", " + pfu.extension + "(" + liToStr(tiles) + ")";
}
return summary;
}
}