Package org.broadinstitute.gatk.engine.datasources.reads

Source Code of org.broadinstitute.gatk.engine.datasources.reads.ReadShardBalancerUnitTest$ReadShardBalancerTest

/*
* Copyright (c) 2012 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/

package org.broadinstitute.gatk.engine.datasources.reads;

import htsjdk.samtools.*;
import org.broadinstitute.gatk.engine.arguments.ValidationExclusion;
import org.broadinstitute.gatk.engine.downsampling.DownsampleType;
import org.broadinstitute.gatk.engine.downsampling.DownsamplingMethod;
import org.broadinstitute.gatk.engine.filters.ReadFilter;
import org.broadinstitute.gatk.engine.resourcemanagement.ThreadAllocation;
import org.broadinstitute.gatk.utils.BaseTest;
import org.broadinstitute.gatk.utils.GenomeLocParser;
import org.broadinstitute.gatk.utils.commandline.Tags;
import org.broadinstitute.gatk.utils.sam.ArtificialSAMUtils;
import org.broadinstitute.gatk.utils.sam.ArtificialSingleSampleReadStream;
import org.testng.Assert;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;

import java.io.File;
import java.util.ArrayList;
import java.util.Arrays;

public class ReadShardBalancerUnitTest extends BaseTest {

    /**
     * Tests to ensure that ReadShardBalancer works as expected and does not place shard boundaries
     * at inappropriate places, such as within an alignment start position
     */
    private static class ReadShardBalancerTest extends TestDataProvider {
        private int numContigs;
        private int numStacksPerContig;
        private int stackSize;
        private int numUnmappedReads;
        private DownsamplingMethod downsamplingMethod;
        private int expectedReadCount;

        private SAMFileHeader header;
        private SAMReaderID testBAM;

        public ReadShardBalancerTest( int numContigs,
                                      int numStacksPerContig,
                                      int stackSize,
                                      int numUnmappedReads,
                                      int downsamplingTargetCoverage ) {
            super(ReadShardBalancerTest.class);

            this.numContigs = numContigs;
            this.numStacksPerContig = numStacksPerContig;
            this.stackSize = stackSize;
            this.numUnmappedReads = numUnmappedReads;

            this.downsamplingMethod = new DownsamplingMethod(DownsampleType.BY_SAMPLE, downsamplingTargetCoverage, null);
            this.expectedReadCount = Math.min(stackSize, downsamplingTargetCoverage) * numStacksPerContig * numContigs + numUnmappedReads;

            setName(String.format("%s: numContigs=%d numStacksPerContig=%d stackSize=%d numUnmappedReads=%d downsamplingTargetCoverage=%d",
                                  getClass().getSimpleName(), numContigs, numStacksPerContig, stackSize, numUnmappedReads, downsamplingTargetCoverage));
        }

        public void run() {
            createTestBAM();

            SAMDataSource dataSource = new SAMDataSource(Arrays.asList(testBAM),
                                                         new ThreadAllocation(),
                                                         null,
                                                         new GenomeLocParser(header.getSequenceDictionary()),
                                                         false,
                                                         ValidationStringency.SILENT,
                                                         ReadShard.DEFAULT_MAX_READS,  // reset ReadShard.MAX_READS to ReadShard.DEFAULT_MAX_READS for each test
                                                         downsamplingMethod,
                                                         new ValidationExclusion(),
                                                         new ArrayList<ReadFilter>(),
                                                         false);

            Iterable<Shard> shardIterator = dataSource.createShardIteratorOverAllReads(new ReadShardBalancer());

            SAMRecord readAtEndOfLastShard = null;
            int totalReadsSeen = 0;

            for ( Shard shard : shardIterator ) {
                int numContigsThisShard = 0;
                SAMRecord lastRead = null;

                for ( SAMRecord read : shard.iterator() ) {
                    totalReadsSeen++;

                    if ( lastRead == null ) {
                        numContigsThisShard = 1;
                    }
                    else if ( ! read.getReadUnmappedFlag() && ! lastRead.getReferenceIndex().equals(read.getReferenceIndex()) ) {
                        numContigsThisShard++;
                    }

                    // If the last read from the previous shard is not unmapped, we have to make sure
                    // that no reads in this shard start at the same position
                    if ( readAtEndOfLastShard != null && ! readAtEndOfLastShard.getReadUnmappedFlag() ) {
                        Assert.assertFalse(readAtEndOfLastShard.getReferenceIndex().equals(read.getReferenceIndex()) &&
                                           readAtEndOfLastShard.getAlignmentStart() == read.getAlignmentStart(),
                                           String.format("Reads from alignment start position %d:%d are split across multiple shards",
                                                         read.getReferenceIndex(), read.getAlignmentStart()));
                    }

                    lastRead = read;
                }

                // There should never be reads from more than 1 contig in a shard (ignoring unmapped reads)
                Assert.assertTrue(numContigsThisShard == 1, "found a shard with reads from multiple contigs");

                readAtEndOfLastShard = lastRead;
            }

            Assert.assertEquals(totalReadsSeen, expectedReadCount, "did not encounter the expected number of reads");
        }

        private void createTestBAM() {
            header = ArtificialSAMUtils.createArtificialSamHeader(numContigs, 1, 100000);
            SAMReadGroupRecord readGroup = new SAMReadGroupRecord("foo");
            readGroup.setSample("testSample");
            header.addReadGroup(readGroup);
            ArtificialSingleSampleReadStream artificialReads = new ArtificialSingleSampleReadStream(header,
                                                                                                    "foo",
                                                                                                    numContigs,
                                                                                                    numStacksPerContig,
                                                                                                    stackSize,
                                                                                                    stackSize,
                                                                                                    1,
                                                                                                    100,
                                                                                                    50,
                                                                                                    150,
                                                                                                    numUnmappedReads);

            final File testBAMFile = createTempFile("SAMDataSourceFillShardBoundaryTest", ".bam");

            SAMFileWriter bamWriter = new SAMFileWriterFactory().setCreateIndex(true).makeBAMWriter(header, true, testBAMFile);
            for ( SAMRecord read : artificialReads ) {
                bamWriter.addAlignment(read);
            }
            bamWriter.close();

            testBAM =  new SAMReaderID(testBAMFile, new Tags());

            new File(testBAM.getSamFilePath().replace(".bam", ".bai")).deleteOnExit();
            new File(testBAM.getSamFilePath() + ".bai").deleteOnExit();
        }
    }

    @DataProvider(name = "ReadShardBalancerTestDataProvider")
    public Object[][] createReadShardBalancerTests() {
        for ( int numContigs = 1; numContigs <= 3; numContigs++ ) {
            for ( int numStacksPerContig : Arrays.asList(1, 2, 4) ) {
                // Use crucial read shard boundary values as the stack sizes
                for ( int stackSize : Arrays.asList(ReadShard.DEFAULT_MAX_READS / 2, ReadShard.DEFAULT_MAX_READS / 2 + 10, ReadShard.DEFAULT_MAX_READS, ReadShard.DEFAULT_MAX_READS - 1, ReadShard.DEFAULT_MAX_READS + 1, ReadShard.DEFAULT_MAX_READS * 2) ) {
                    for ( int numUnmappedReads : Arrays.asList(0, ReadShard.DEFAULT_MAX_READS / 2, ReadShard.DEFAULT_MAX_READS * 2) ) {
                        // The first value will result in no downsampling at all, the others in some downsampling
                        for ( int downsamplingTargetCoverage : Arrays.asList(ReadShard.DEFAULT_MAX_READS * 10, ReadShard.DEFAULT_MAX_READS, ReadShard.DEFAULT_MAX_READS / 2) ) {
                            new ReadShardBalancerTest(numContigs, numStacksPerContig, stackSize, numUnmappedReads, downsamplingTargetCoverage);
                        }
                    }
                }
            }
        }

        return ReadShardBalancerTest.getTests(ReadShardBalancerTest.class);
    }

    @Test(dataProvider = "ReadShardBalancerTestDataProvider")
    public void runReadShardBalancerTest( ReadShardBalancerTest test ) {
        logger.warn("Running test: " + test);

        test.run();
    }
}
TOP

Related Classes of org.broadinstitute.gatk.engine.datasources.reads.ReadShardBalancerUnitTest$ReadShardBalancerTest

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.