/*
* The MIT License
*
* Copyright (c) 2012 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
package picard.illumina.parser.readers;
import htsjdk.samtools.Defaults;
import htsjdk.samtools.util.BlockCompressedInputStream;
import htsjdk.samtools.util.CloseableIterator;
import htsjdk.samtools.util.CloserUtil;
import htsjdk.samtools.util.IOUtil;
import htsjdk.samtools.util.RuntimeIOException;
import picard.PicardException;
import picard.illumina.parser.BclData;
import picard.illumina.parser.TileIndex;
import picard.util.UnsignedTypeUtil;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.util.List;
import java.util.zip.GZIPInputStream;
/**
* BCL Files are base call and quality score binary files containing a (base,quality) pair for successive clusters.
* The file is structured as followed:
* Bytes 1-4 : unsigned int numClusters
* Bytes 5-numClusters + 5 : 1 byte base/quality score
* <p/>
* The base/quality scores are organized as follows (with one exception, SEE BELOW):
* The right 2 most bits (these are the LEAST significant bits) indicate the base, where
* A=00(0x00), C=01(0x01), G=10(0x02), and T=11(0x03)
* <p/>
* The remaining bytes compose the quality score which is an unsigned int.
* <p/>
* EXCEPTION: If a byte is entirely 0 (e.g. byteRead == 0) then it is a no call, the base
* becomes '.' and the Quality becomes 2, the default illumina masking value
* <p/>
* (E.g. if we get a value in binary of 10001011 it gets transformed as follows:
* <p/>
* Value read: 10001011(0x8B)
* <p/>
* Quality Base
* <p/>
* 100010 11
* 00100010 0x03
* 0x22 T
* 34 T
* <p/>
* So the output base/quality will be a (T/34)
*/
public class BclReader implements CloseableIterator<BclData> {
private static final byte BASE_MASK = 0x0003;
private static final int HEADER_SIZE = 4;
private static final byte[] BASE_LOOKUP = new byte[]{'A', 'C', 'G', 'T'};
private final InputStream[] streams;
private final int[] outputLengths;
int[] numClustersPerCycle;
private final BclQualityEvaluationStrategy bclQualityEvaluationStrategy;
private BclData queue = null;
public BclReader(final List<File> bclsForOneTile, final int[] outputLengths,
final BclQualityEvaluationStrategy bclQualityEvaluationStrategy, final boolean seekable) {
try {
this.bclQualityEvaluationStrategy = bclQualityEvaluationStrategy;
this.outputLengths = outputLengths;
int cycles = 0;
for (final int outputLength : outputLengths) {
cycles += outputLength;
}
this.streams = new InputStream[cycles];
this.numClustersPerCycle = new int[cycles];
final ByteBuffer byteBuffer = ByteBuffer.allocate(HEADER_SIZE);
byteBuffer.order(ByteOrder.LITTLE_ENDIAN);
for (int i = 0; i < cycles; ++i) {
final File bclFile = bclsForOneTile.get(i);
if (bclFile == null) {
close();
throw new RuntimeIOException(String.format("Could not find BCL file for cycle %d", i));
}
final String filePath = bclFile.getName();
final boolean isGzip = filePath.endsWith(".gz");
final boolean isBgzf = filePath.endsWith(".bgzf");
final InputStream stream = open(bclFile, seekable, isGzip, isBgzf);
final int read = stream.read(byteBuffer.array());
if (read != HEADER_SIZE) {
close();
throw new RuntimeIOException(String.format("BCL %s has invalid header structure.", bclFile.getAbsoluteFile()));
}
numClustersPerCycle[i] = byteBuffer.getInt();
if (!isBgzf && !isGzip) {
assertProperFileStructure(bclFile, numClustersPerCycle[i], stream);
}
this.streams[i] = stream;
byteBuffer.clear();
}
} catch (final IOException ioe) {
throw new RuntimeIOException(ioe);
}
}
public static boolean isGzipped(final File file) {
return file.getAbsolutePath().endsWith(".gz");
}
public static boolean isBlockGzipped(final File file) {
return file.getAbsolutePath().endsWith(".bgzf");
}
public static long getNumberOfClusters(final File file) {
InputStream stream = null;
try {
if (isBlockGzipped(file)) stream = new BlockCompressedInputStream(IOUtil.maybeBufferedSeekableStream(file));
else if (isGzipped(file)) stream = new GZIPInputStream(IOUtil.maybeBufferInputStream(new FileInputStream(file)));
else stream = IOUtil.maybeBufferInputStream(new FileInputStream(file));
return getNumberOfClusters(file.getAbsolutePath(), stream);
} catch (final IOException ioe) {
throw new PicardException("Could not open file " + file.getAbsolutePath() + " to get its cluster count: " + ioe.getMessage(), ioe);
} finally {
CloserUtil.close(stream);
}
}
private static long getNumberOfClusters(final String filePath, final InputStream inputStream) {
final byte[] header = new byte[HEADER_SIZE];
try {
final int headerBytesRead = inputStream.read(header);
if (headerBytesRead != HEADER_SIZE) {
throw new PicardException("Malformed file, expected header of size " + HEADER_SIZE + " but received " + headerBytesRead);
}
} catch (final IOException ioe) {
throw new PicardException("Unable to read header for file (" + filePath + ")", ioe);
}
final ByteBuffer headerBuf = ByteBuffer.wrap(header);
headerBuf.order(ByteOrder.LITTLE_ENDIAN);
return UnsignedTypeUtil.uIntToLong(headerBuf.getInt());
}
public BclReader(final File bclFile, final BclQualityEvaluationStrategy bclQualityEvaluationStrategy, final boolean seekable) {
try {
this.outputLengths = new int[]{1};
this.streams = new InputStream[1];
this.numClustersPerCycle = new int[]{1};
this.bclQualityEvaluationStrategy = bclQualityEvaluationStrategy;
final ByteBuffer byteBuffer = ByteBuffer.allocate(HEADER_SIZE);
final String filePath = bclFile.getName();
final boolean isGzip = filePath.endsWith(".gz");
final boolean isBgzf = filePath.endsWith(".bgzf");
final InputStream stream = open(bclFile, seekable, isGzip, isBgzf);
final int read = stream.read(byteBuffer.array());
if (read != HEADER_SIZE) {
throw new RuntimeIOException(String.format("BCL %s has invalid header structure.", bclFile.getAbsoluteFile()));
}
byteBuffer.order(ByteOrder.LITTLE_ENDIAN);
this.numClustersPerCycle[0] = byteBuffer.getInt();
if (!isBgzf && !isGzip) {
assertProperFileStructure(bclFile, this.numClustersPerCycle[0], stream);
}
this.streams[0] = stream;
} catch (final IOException ioe) {
throw new PicardException("IOException opening file " + bclFile.getAbsoluteFile(), ioe);
}
}
void assertProperFileStructure(final File file, final int numClusters, final InputStream stream) {
final long elementsInFile = file.length() - HEADER_SIZE;
if (numClusters != elementsInFile) {
CloserUtil.close(stream);
throw new PicardException("Expected " + numClusters + " in file but found " + elementsInFile);
}
}
InputStream open(final File file, final boolean seekable, final boolean isGzip, final boolean isBgzf) throws IOException {
final String filePath = file.getAbsolutePath();
try {
// Open up a buffered stream to read from the file and optionally wrap it in a gzip stream
// if necessary
if (isBgzf) {
// Only BlockCompressedInputStreams can seek, and only if they are fed a SeekableStream.
return new BlockCompressedInputStream(IOUtil.maybeBufferedSeekableStream(file));
} else if (isGzip) {
if (seekable) {
throw new IllegalArgumentException(
String.format("Cannot create a seekable reader for gzip bcl: %s.", filePath)
);
}
return (IOUtil.maybeBufferInputStream(new GZIPInputStream(new FileInputStream(file), Defaults.BUFFER_SIZE / 2),
Defaults.BUFFER_SIZE / 2));
} else {
if (seekable) {
throw new IllegalArgumentException(
String.format("Cannot create a seekable reader for provided bcl: %s.", filePath)
);
}
return IOUtil.maybeBufferInputStream(new FileInputStream(file));
}
} catch (final FileNotFoundException fnfe) {
throw new PicardException("File not found: (" + filePath + ")", fnfe);
} catch (final IOException ioe) {
throw new PicardException("Error reading file: (" + filePath + ")", ioe);
}
}
public void close() {
for (final InputStream stream : this.streams) {
CloserUtil.close(stream);
}
}
@Override
public boolean hasNext() {
if (queue == null) {
advance();
}
return queue != null;
}
private long getNumClusters() {
return numClustersPerCycle[0];
}
protected void assertProperFileStructure(final File file) {
final long elementsInFile = file.length() - HEADER_SIZE;
if (numClustersPerCycle[0] != elementsInFile) {
throw new PicardException("Expected " + numClustersPerCycle[0] + " in file " + file.getAbsolutePath() + " but found " + elementsInFile);
}
}
public BclData next() {
if (queue == null) {
advance();
}
final BclData data = queue;
queue = null;
return data;
}
@Override
public void remove() {
throw new UnsupportedOperationException();
}
void advance() {
int totalCycleCount = 0;
final BclData data = new BclData(outputLengths);
for (int read = 0; read < outputLengths.length; read++) {
for (int cycle = 0; cycle < outputLengths[read]; ++cycle) {
try {
final int readByte = this.streams[totalCycleCount].read();
if (readByte == -1) {
queue = null;
return;
}
if (readByte == 0) {
//NO CALL, don't confuse with an A call
data.bases[read][cycle] = (byte) '.';
data.qualities[read][cycle] = (byte) 2;
} else {
data.bases[read][cycle] = BASE_LOOKUP[readByte & BASE_MASK];
data.qualities[read][cycle] = bclQualityEvaluationStrategy.reviseAndConditionallyLogQuality((byte) (readByte >>> 2));
}
totalCycleCount++;
} catch (final IOException ioe) {
throw new RuntimeIOException(ioe);
}
}
}
this.queue = data;
}
public static BclReader makeSeekable(final List<File> files, final BclQualityEvaluationStrategy bclQualityEvaluationStrategy, final int[] outputLengths) {
return new BclReader(files, outputLengths, bclQualityEvaluationStrategy, true);
}
public int seek(final List<File> files, final TileIndex tileIndex, final int currentTile) {
int count = 0;
int numClustersInTile = 0;
for (final InputStream inputStream : streams) {
final TileIndex.TileIndexRecord tileIndexRecord = tileIndex.findTile(currentTile);
final BclIndexReader bclIndexReader = new BclIndexReader(files.get(count));
final long virtualFilePointer = bclIndexReader.get(tileIndexRecord.getZeroBasedTileNumber());
if (!(inputStream instanceof BlockCompressedInputStream)) {
throw new UnsupportedOperationException("Seeking only allowed on bzgf");
} else {
try {
if (tileIndex.getNumTiles() != bclIndexReader.getNumTiles()) {
throw new PicardException(String.format("%s.getNumTiles(%d) != %s.getNumTiles(%d)",
tileIndex.getFile().getAbsolutePath(), tileIndex.getNumTiles(), bclIndexReader.getBciFile().getAbsolutePath(), bclIndexReader.getNumTiles()));
}
((BlockCompressedInputStream) inputStream).seek(virtualFilePointer);
numClustersInTile = tileIndexRecord.getNumClustersInTile();
} catch (final IOException e) {
throw new PicardException("Problem seeking to " + virtualFilePointer, e);
}
}
count++;
}
return numClustersInTile;
}
}