Package picard.analysis

Source Code of picard.analysis.CollectBaseDistributionByCycle

package picard.analysis;

import htsjdk.samtools.SAMFileHeader;
import htsjdk.samtools.SAMReadGroupRecord;
import htsjdk.samtools.SAMRecord;
import htsjdk.samtools.metrics.MetricsFile;
import htsjdk.samtools.reference.ReferenceSequence;
import htsjdk.samtools.util.IOUtil;
import htsjdk.samtools.util.Log;

import java.io.File;
import java.util.Arrays;
import java.util.List;

import htsjdk.samtools.util.StringUtil;
import picard.PicardException;
import picard.cmdline.CommandLineProgramProperties;
import picard.cmdline.Option;
import picard.cmdline.programgroups.Metrics;
import picard.util.RExecutor;

@CommandLineProgramProperties(
        usage = "Program to chart the nucleotide distribution per cycle in a SAM or BAM file.",
        usageShort = "Program to chart the nucleotide distribution per cycle in a SAM or BAM file.",
        programGroup = Metrics.class
)
public class CollectBaseDistributionByCycle extends SinglePassSamProgram {

    @Option(shortName = "CHART", doc = "A file (with .pdf extension) to write the chart to.")
    public File CHART_OUTPUT;

    @Option(doc = "If set to true, calculate the base distribution over aligned reads only.")
    public boolean ALIGNED_READS_ONLY = false;

    @Option(doc = "If set to true calculate the base distribution over PF reads only.")
    public boolean PF_READS_ONLY = false;

    private HistogramGenerator hist;
    private String plotSubtitle = "";
    private final Log log = Log.getInstance(CollectBaseDistributionByCycle.class);

    public static void main(String[] args) {
        System.exit(new CollectBaseDistributionByCycle().instanceMain(args));
    }

    @Override
    protected void setup(final SAMFileHeader header, final File samFile) {
        IOUtil.assertFileIsWritable(CHART_OUTPUT);
        final List<SAMReadGroupRecord> readGroups = header.getReadGroups();
        if (readGroups.size() == 1) {
            plotSubtitle = StringUtil.asEmptyIfNull(readGroups.get(0).getLibrary());
        }
        hist = new HistogramGenerator();
    }

    @Override
    protected void acceptRead(final SAMRecord rec, final ReferenceSequence ref) {
        if ((PF_READS_ONLY) && (rec.getReadFailsVendorQualityCheckFlag())) {
            return;
        }
        if ((ALIGNED_READS_ONLY) && (rec.getReadUnmappedFlag())) {
            return;
        }
        if (rec.isSecondaryOrSupplementary()) {
            return;
        }
        hist.addRecord(rec);
    }

    @Override
    protected void finish() {
        final MetricsFile<BaseDistributionByCycleMetrics, ?> metrics = getMetricsFile();
        hist.addToMetricsFile(metrics);
        metrics.write(OUTPUT);
        if (hist.isEmpty()) {
            log.warn("No valid bases found in input file. No plot will be produced.");
        } else {
            final int rResult = RExecutor.executeFromClasspath("picard/analysis/baseDistributionByCycle.R",
                    OUTPUT.getAbsolutePath(),
                    CHART_OUTPUT.getAbsolutePath(),
                    INPUT.getName(),
                    plotSubtitle);
            if (rResult != 0) {
                throw new PicardException("R script nucleotideDistributionByCycle.R failed with return code " + rResult);
            }
        }
    }

    private class HistogramGenerator {
        private int maxLengthSoFar = 0;
        final private long[][] firstReadTotalsByCycle = new long[5][maxLengthSoFar];
        private long[] firstReadCountsByCycle = new long[maxLengthSoFar];
        final private long[][] secondReadTotalsByCycle = new long[5][maxLengthSoFar];
        private long[] secondReadCountsByCycle = new long[maxLengthSoFar];
        private boolean seenSecondEnd = false;

        private int baseToInt(final byte base) {
            switch (base) {
                case 'A':
                case 'a':
                    return 0;
                case 'C':
                case 'c':
                    return 1;
                case 'G':
                case 'g':
                    return 2;
                case 'T':
                case 't':
                    return 3;
            }
            return 4;
        }

        void addRecord(final SAMRecord rec) {
            final byte[] bases = rec.getReadBases();
            if (bases == null) {
                return;
            }
            final int length = bases.length;
            final boolean rc = rec.getReadNegativeStrandFlag();
            ensureArraysBigEnough(length + 1);
            if ((rec.getReadPairedFlag()) && (rec.getSecondOfPairFlag())) {
                seenSecondEnd = true;
                for (int i = 0; i < length; i++) {
                    final int cycle = rc ? length - i : i + 1;
                    secondReadTotalsByCycle[baseToInt(bases[i])][cycle] += 1;
                    secondReadCountsByCycle[cycle] += 1;
                }
            } else {
                for (int i = 0; i < length; i++) {
                    final int cycle = rc ? length - i : i + 1;
                    firstReadTotalsByCycle[baseToInt(bases[i])][cycle] += 1;
                    firstReadCountsByCycle[cycle] += 1;
                }
            }
        }

        private void ensureArraysBigEnough(final int length) {
            if (length > maxLengthSoFar) {
                for (int i = 0; i < 5; i++) {
                    firstReadTotalsByCycle[i] = Arrays.copyOf(firstReadTotalsByCycle[i], length);
                    secondReadTotalsByCycle[i] = Arrays.copyOf(secondReadTotalsByCycle[i], length);
                }
                firstReadCountsByCycle = Arrays.copyOf(firstReadCountsByCycle, length);
                secondReadCountsByCycle = Arrays.copyOf(secondReadCountsByCycle, length);
                maxLengthSoFar = length;
            }
        }

        boolean isEmpty() {
            return maxLengthSoFar == 0;
        }

        public void addToMetricsFile(final MetricsFile<BaseDistributionByCycleMetrics, ?> metrics) {
            int firstReadLength = 0;
            for (int i = 0; i < maxLengthSoFar; i++) {
                if (0 != firstReadCountsByCycle[i]) {
                    final BaseDistributionByCycleMetrics metric = new BaseDistributionByCycleMetrics();
                    metric.READ_END = 1;
                    metric.CYCLE = i;
                    metric.PCT_A = (100.0 * firstReadTotalsByCycle[0][i] / firstReadCountsByCycle[i]);
                    metric.PCT_C = (100.0 * firstReadTotalsByCycle[1][i] / firstReadCountsByCycle[i]);
                    metric.PCT_G = (100.0 * firstReadTotalsByCycle[2][i] / firstReadCountsByCycle[i]);
                    metric.PCT_T = (100.0 * firstReadTotalsByCycle[3][i] / firstReadCountsByCycle[i]);
                    metric.PCT_N = (100.0 * firstReadTotalsByCycle[4][i] / firstReadCountsByCycle[i]);
                    metrics.addMetric(metric);
                    firstReadLength = i;
                }
            }
            if (seenSecondEnd) {
                for (int i = 0; i < maxLengthSoFar; i++) {
                    if (0 != secondReadCountsByCycle[i]) {
                        final BaseDistributionByCycleMetrics metric = new BaseDistributionByCycleMetrics();
                        metric.READ_END = 2;
                        metric.CYCLE = (i + firstReadLength);
                        metric.PCT_A = (100.0 * secondReadTotalsByCycle[0][i] / secondReadCountsByCycle[i]);
                        metric.PCT_C = (100.0 * secondReadTotalsByCycle[1][i] / secondReadCountsByCycle[i]);
                        metric.PCT_G = (100.0 * secondReadTotalsByCycle[2][i] / secondReadCountsByCycle[i]);
                        metric.PCT_T = (100.0 * secondReadTotalsByCycle[3][i] / secondReadCountsByCycle[i]);
                        metric.PCT_N = (100.0 * secondReadTotalsByCycle[4][i] / secondReadCountsByCycle[i]);
                        metrics.addMetric(metric);
                    }
                }
            }
        }
    }
}
TOP

Related Classes of picard.analysis.CollectBaseDistributionByCycle

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.