Package picard.illumina.parser.readers

Source Code of picard.illumina.parser.readers.BclReader

/*
* The MIT License
*
* Copyright (c) 2012 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
package picard.illumina.parser.readers;

import htsjdk.samtools.Defaults;
import htsjdk.samtools.util.BlockCompressedInputStream;
import htsjdk.samtools.util.CloseableIterator;
import htsjdk.samtools.util.CloserUtil;
import htsjdk.samtools.util.IOUtil;
import htsjdk.samtools.util.RuntimeIOException;
import picard.PicardException;
import picard.illumina.parser.BclData;
import picard.illumina.parser.TileIndex;
import picard.util.UnsignedTypeUtil;

import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.util.List;
import java.util.zip.GZIPInputStream;

/**
* BCL Files are base call and quality score binary files containing a (base,quality) pair for successive clusters.
* The file is structured as followed:
* Bytes 1-4 : unsigned int numClusters
* Bytes 5-numClusters + 5 : 1 byte base/quality score
* <p/>
* The base/quality scores are organized as follows (with one exception, SEE BELOW):
* The right 2 most bits (these are the LEAST significant bits) indicate the base, where
* A=00(0x00), C=01(0x01), G=10(0x02), and T=11(0x03)
* <p/>
* The remaining bytes compose the quality score which is an unsigned int.
* <p/>
* EXCEPTION: If a byte is entirely 0 (e.g. byteRead == 0) then it is a no call, the base
* becomes '.' and the Quality becomes 2, the default illumina masking value
* <p/>
* (E.g. if we get a value in binary of 10001011 it gets transformed as follows:
* <p/>
* Value read: 10001011(0x8B)
* <p/>
* Quality     Base
* <p/>
* 100010      11
* 00100010    0x03
* 0x22        T
* 34          T
* <p/>
* So the output base/quality will be a (T/34)
*/
public class BclReader implements CloseableIterator<BclData> {
    private static final byte BASE_MASK = 0x0003;
    private static final int HEADER_SIZE = 4;
    private static final byte[] BASE_LOOKUP = new byte[]{'A', 'C', 'G', 'T'};

    private final InputStream[] streams;
    private final int[] outputLengths;
    int[] numClustersPerCycle;

    private final BclQualityEvaluationStrategy bclQualityEvaluationStrategy;
    private BclData queue = null;

    public BclReader(final List<File> bclsForOneTile, final int[] outputLengths,
                     final BclQualityEvaluationStrategy bclQualityEvaluationStrategy, final boolean seekable) {
        try {
            this.bclQualityEvaluationStrategy = bclQualityEvaluationStrategy;
            this.outputLengths = outputLengths;

            int cycles = 0;
            for (final int outputLength : outputLengths) {
                cycles += outputLength;
            }
            this.streams = new InputStream[cycles];
            this.numClustersPerCycle = new int[cycles];

            final ByteBuffer byteBuffer = ByteBuffer.allocate(HEADER_SIZE);
            byteBuffer.order(ByteOrder.LITTLE_ENDIAN);

            for (int i = 0; i < cycles; ++i) {
                final File bclFile = bclsForOneTile.get(i);
                if (bclFile == null) {
                    close();
                    throw new RuntimeIOException(String.format("Could not find BCL file for cycle %d", i));
                }
                final String filePath = bclFile.getName();
                final boolean isGzip = filePath.endsWith(".gz");
                final boolean isBgzf = filePath.endsWith(".bgzf");
                final InputStream stream = open(bclFile, seekable, isGzip, isBgzf);
                final int read = stream.read(byteBuffer.array());
                if (read != HEADER_SIZE) {
                    close();
                    throw new RuntimeIOException(String.format("BCL %s has invalid header structure.", bclFile.getAbsoluteFile()));
                }
                numClustersPerCycle[i] = byteBuffer.getInt();
                if (!isBgzf && !isGzip) {
                    assertProperFileStructure(bclFile, numClustersPerCycle[i], stream);
                }
                this.streams[i] = stream;
                byteBuffer.clear();
            }
        } catch (final IOException ioe) {
            throw new RuntimeIOException(ioe);
        }
    }

    public static boolean isGzipped(final File file) {
        return file.getAbsolutePath().endsWith(".gz");
    }

    public static boolean isBlockGzipped(final File file) {
        return file.getAbsolutePath().endsWith(".bgzf");
    }

    public static long getNumberOfClusters(final File file) {
        InputStream stream = null;
        try {
            if (isBlockGzipped(file)) stream = new BlockCompressedInputStream(IOUtil.maybeBufferedSeekableStream(file));
            else if (isGzipped(file)) stream = new GZIPInputStream(IOUtil.maybeBufferInputStream(new FileInputStream(file)));
            else stream = IOUtil.maybeBufferInputStream(new FileInputStream(file));

            return getNumberOfClusters(file.getAbsolutePath(), stream);

        } catch (final IOException ioe) {
            throw new PicardException("Could not open file " + file.getAbsolutePath() + " to get its cluster count: " + ioe.getMessage(), ioe);
        } finally {
            CloserUtil.close(stream);
        }
    }

    private static long getNumberOfClusters(final String filePath, final InputStream inputStream) {
        final byte[] header = new byte[HEADER_SIZE];

        try {
            final int headerBytesRead = inputStream.read(header);
            if (headerBytesRead != HEADER_SIZE) {
                throw new PicardException("Malformed file, expected header of size " + HEADER_SIZE + " but received " + headerBytesRead);
            }
        } catch (final IOException ioe) {
            throw new PicardException("Unable to read header for file (" + filePath + ")", ioe);
        }

        final ByteBuffer headerBuf = ByteBuffer.wrap(header);
        headerBuf.order(ByteOrder.LITTLE_ENDIAN);
        return UnsignedTypeUtil.uIntToLong(headerBuf.getInt());
    }


    public BclReader(final File bclFile, final BclQualityEvaluationStrategy bclQualityEvaluationStrategy, final boolean seekable) {
        try {

            this.outputLengths = new int[]{1};
            this.streams = new InputStream[1];
            this.numClustersPerCycle = new int[]{1};
            this.bclQualityEvaluationStrategy = bclQualityEvaluationStrategy;

            final ByteBuffer byteBuffer = ByteBuffer.allocate(HEADER_SIZE);
            final String filePath = bclFile.getName();
            final boolean isGzip = filePath.endsWith(".gz");
            final boolean isBgzf = filePath.endsWith(".bgzf");
            final InputStream stream = open(bclFile, seekable, isGzip, isBgzf);
            final int read = stream.read(byteBuffer.array());

            if (read != HEADER_SIZE) {
                throw new RuntimeIOException(String.format("BCL %s has invalid header structure.", bclFile.getAbsoluteFile()));
            }

            byteBuffer.order(ByteOrder.LITTLE_ENDIAN);
            this.numClustersPerCycle[0] = byteBuffer.getInt();
            if (!isBgzf && !isGzip) {
                assertProperFileStructure(bclFile, this.numClustersPerCycle[0], stream);
            }
            this.streams[0] = stream;
        } catch (final IOException ioe) {
            throw new PicardException("IOException opening file " + bclFile.getAbsoluteFile(), ioe);
        }
    }

    void assertProperFileStructure(final File file, final int numClusters, final InputStream stream) {
        final long elementsInFile = file.length() - HEADER_SIZE;
        if (numClusters != elementsInFile) {
            CloserUtil.close(stream);
            throw new PicardException("Expected " + numClusters + " in file but found " + elementsInFile);
        }
    }

    InputStream open(final File file, final boolean seekable, final boolean isGzip, final boolean isBgzf) throws IOException {
        final String filePath = file.getAbsolutePath();

        try {
            // Open up a buffered stream to read from the file and optionally wrap it in a gzip stream
            // if necessary
            if (isBgzf) {
                // Only BlockCompressedInputStreams can seek, and only if they are fed a SeekableStream.
                return new BlockCompressedInputStream(IOUtil.maybeBufferedSeekableStream(file));
            } else if (isGzip) {
                if (seekable) {
                    throw new IllegalArgumentException(
                            String.format("Cannot create a seekable reader for gzip bcl: %s.", filePath)
                    );
                }
                return (IOUtil.maybeBufferInputStream(new GZIPInputStream(new FileInputStream(file), Defaults.BUFFER_SIZE / 2),
                        Defaults.BUFFER_SIZE / 2));
            } else {
                if (seekable) {
                    throw new IllegalArgumentException(
                            String.format("Cannot create a seekable reader for provided bcl: %s.", filePath)
                    );
                }
                return IOUtil.maybeBufferInputStream(new FileInputStream(file));
            }
        } catch (final FileNotFoundException fnfe) {
            throw new PicardException("File not found: (" + filePath + ")", fnfe);
        } catch (final IOException ioe) {
            throw new PicardException("Error reading file: (" + filePath + ")", ioe);
        }
    }

    public void close() {
        for (final InputStream stream : this.streams) {
            CloserUtil.close(stream);
        }
    }

    @Override
    public boolean hasNext() {
        if (queue == null) {
            advance();
        }
        return queue != null;
    }

    private long getNumClusters() {
        return numClustersPerCycle[0];
    }

    protected void assertProperFileStructure(final File file) {
        final long elementsInFile = file.length() - HEADER_SIZE;
        if (numClustersPerCycle[0] != elementsInFile) {
            throw new PicardException("Expected " + numClustersPerCycle[0+ " in file " + file.getAbsolutePath() + " but found " + elementsInFile);

        }
    }

    public BclData next() {
        if (queue == null) {
            advance();
        }

        final BclData data = queue;
        queue = null;
        return data;
    }

    @Override
    public void remove() {
        throw new UnsupportedOperationException();
    }

    void advance() {
        int totalCycleCount = 0;
        final BclData data = new BclData(outputLengths);
        for (int read = 0; read < outputLengths.length; read++) {
            for (int cycle = 0; cycle < outputLengths[read]; ++cycle) {
                try {
                    final int readByte = this.streams[totalCycleCount].read();
                    if (readByte == -1) {
                        queue = null;
                        return;
                    }

                    if (readByte == 0) {
                        //NO CALL, don't confuse with an A call
                        data.bases[read][cycle] = (byte) '.';
                        data.qualities[read][cycle] = (byte) 2;
                    } else {
                        data.bases[read][cycle] = BASE_LOOKUP[readByte & BASE_MASK];
                        data.qualities[read][cycle] = bclQualityEvaluationStrategy.reviseAndConditionallyLogQuality((byte) (readByte >>> 2));
                    }
                    totalCycleCount++;
                } catch (final IOException ioe) {
                    throw new RuntimeIOException(ioe);
                }

            }
        }
        this.queue = data;
    }

    public static BclReader makeSeekable(final List<File> files, final BclQualityEvaluationStrategy bclQualityEvaluationStrategy, final int[] outputLengths) {
        return new BclReader(files, outputLengths, bclQualityEvaluationStrategy, true);
    }

    public int seek(final List<File> files, final TileIndex tileIndex, final int currentTile) {
        int count = 0;
        int numClustersInTile = 0;
        for (final InputStream inputStream : streams) {
            final TileIndex.TileIndexRecord tileIndexRecord = tileIndex.findTile(currentTile);
            final BclIndexReader bclIndexReader = new BclIndexReader(files.get(count));
            final long virtualFilePointer = bclIndexReader.get(tileIndexRecord.getZeroBasedTileNumber());
            if (!(inputStream instanceof BlockCompressedInputStream)) {
                throw new UnsupportedOperationException("Seeking only allowed on bzgf");
            } else {
                try {
                    if (tileIndex.getNumTiles() != bclIndexReader.getNumTiles()) {
                        throw new PicardException(String.format("%s.getNumTiles(%d) != %s.getNumTiles(%d)",
                                tileIndex.getFile().getAbsolutePath(), tileIndex.getNumTiles(), bclIndexReader.getBciFile().getAbsolutePath(), bclIndexReader.getNumTiles()));
                    }
                    ((BlockCompressedInputStream) inputStream).seek(virtualFilePointer);
                    numClustersInTile = tileIndexRecord.getNumClustersInTile();
                } catch (final IOException e) {
                    throw new PicardException("Problem seeking to " + virtualFilePointer, e);
                }
            }
            count++;
        }
        return numClustersInTile;
    }
}
TOP

Related Classes of picard.illumina.parser.readers.BclReader

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.