Package org.broadinstitute.gatk.engine

Source Code of org.broadinstitute.gatk.engine.ReadMetricsUnitTest$EveryTenthReadFilter

/*
* Copyright (c) 2012 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/

package org.broadinstitute.gatk.engine;

import htsjdk.samtools.reference.IndexedFastaSequenceFile;
import htsjdk.samtools.*;
import org.broadinstitute.gatk.utils.BaseTest;
import org.broadinstitute.gatk.utils.commandline.Tags;
import org.broadinstitute.gatk.engine.arguments.ValidationExclusion;
import org.broadinstitute.gatk.engine.contexts.AlignmentContext;
import org.broadinstitute.gatk.engine.contexts.ReferenceContext;
import org.broadinstitute.gatk.engine.datasources.providers.LocusShardDataProvider;
import org.broadinstitute.gatk.engine.datasources.providers.ReadShardDataProvider;
import org.broadinstitute.gatk.engine.datasources.reads.*;
import org.broadinstitute.gatk.engine.datasources.rmd.ReferenceOrderedDataSource;
import org.broadinstitute.gatk.engine.executive.WindowMaker;
import org.broadinstitute.gatk.engine.filters.ReadFilter;
import org.broadinstitute.gatk.engine.iterators.ReadTransformer;
import org.broadinstitute.gatk.engine.refdata.RefMetaDataTracker;
import org.broadinstitute.gatk.engine.resourcemanagement.ThreadAllocation;
import org.broadinstitute.gatk.engine.traversals.*;
import org.broadinstitute.gatk.engine.walkers.*;
import org.broadinstitute.gatk.utils.GenomeLoc;
import org.broadinstitute.gatk.utils.GenomeLocParser;
import org.broadinstitute.gatk.utils.GenomeLocSortedSet;
import org.broadinstitute.gatk.utils.SampleUtils;
import org.broadinstitute.gatk.utils.activeregion.ActiveRegion;
import org.broadinstitute.gatk.utils.activeregion.ActivityProfileState;
import org.broadinstitute.gatk.utils.fasta.CachingIndexedFastaSequenceFile;
import org.broadinstitute.gatk.utils.interval.IntervalMergingRule;
import org.broadinstitute.gatk.utils.sam.*;
import org.testng.Assert;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.Test;

import java.io.File;
import java.io.IOException;
import java.util.*;

public class ReadMetricsUnitTest extends BaseTest {

    @Test
    public void testReadsSeenDoNotOverflowInt() {

        final ReadMetrics metrics = new ReadMetrics();

        final long moreThanMaxInt = ((long)Integer.MAX_VALUE) + 1L;

        for ( long i = 0L; i < moreThanMaxInt; i++ ) {
            metrics.incrementNumReadsSeen();
        }

        Assert.assertEquals(metrics.getNumReadsSeen(), moreThanMaxInt);
        Assert.assertTrue(metrics.getNumReadsSeen() > (long) Integer.MAX_VALUE);

        logger.warn(String.format("%d %d %d", Integer.MAX_VALUE, moreThanMaxInt, Long.MAX_VALUE));
    }


    // Test the accuracy of the read metrics

    private IndexedFastaSequenceFile reference;
    private SAMSequenceDictionary dictionary;
    private SAMFileHeader header;
    private GATKSAMReadGroupRecord readGroup;
    private GenomeLocParser genomeLocParser;
    private File testBAM;

    private static final int numReadsPerContig = 250000;
    private static final List<String> contigs = Arrays.asList("1", "2", "3");

    @BeforeClass
    private void init() throws IOException {
        reference = new CachingIndexedFastaSequenceFile(new File(b37KGReference));
        dictionary = reference.getSequenceDictionary();
        genomeLocParser = new GenomeLocParser(dictionary);
        header = ArtificialSAMUtils.createDefaultReadGroup(new SAMFileHeader(), "test", "test");
        header.setSequenceDictionary(dictionary);
        header.setSortOrder(SAMFileHeader.SortOrder.coordinate);
        readGroup = new GATKSAMReadGroupRecord(header.getReadGroup("test"));

        final List<GATKSAMRecord> reads = new ArrayList<>();
        for ( final String contig : contigs ) {
            for ( int i = 1; i <= numReadsPerContig; i++ ) {
                reads.add(buildSAMRecord("read" + contig + "_" + i, contig, i));
            }
        }

        createBAM(reads);
    }

    private void createBAM(final List<GATKSAMRecord> reads) throws IOException {
        testBAM = createTempFile("TraverseActiveRegionsUnitTest", ".bam");

        SAMFileWriter out = new SAMFileWriterFactory().setCreateIndex(true).makeBAMWriter(reads.get(0).getHeader(), true, testBAM);
        for (GATKSAMRecord read : reads ) {
            out.addAlignment(read);
        }
        out.close();

        new File(testBAM.getAbsolutePath().replace(".bam", ".bai")).deleteOnExit();
        new File(testBAM.getAbsolutePath() + ".bai").deleteOnExit();
    }

    // copied from LocusViewTemplate
    protected GATKSAMRecord buildSAMRecord(final String readName, final String contig, final int alignmentStart) {
        GATKSAMRecord record = new GATKSAMRecord(header);

        record.setReadName(readName);
        record.setReferenceIndex(dictionary.getSequenceIndex(contig));
        record.setAlignmentStart(alignmentStart);

        record.setCigarString("1M");
        record.setReadString("A");
        record.setBaseQualityString("A");
        record.setReadGroup(readGroup);

        return record;
    }

    @Test
    public void testCountsFromReadTraversal() {
        final GenomeAnalysisEngine engine = new GenomeAnalysisEngine();
        engine.setGenomeLocParser(genomeLocParser);

        final Collection<SAMReaderID> samFiles = new ArrayList<>();
        final SAMReaderID readerID = new SAMReaderID(testBAM, new Tags());
        samFiles.add(readerID);

        final SAMDataSource dataSource = new SAMDataSource(samFiles, new ThreadAllocation(), null, genomeLocParser,
                false,
                ValidationStringency.STRICT,
                null,
                null,
                new ValidationExclusion(),
                new ArrayList<ReadFilter>(),
                new ArrayList<ReadTransformer>(),
                false, (byte)30, false, true, null, IntervalMergingRule.ALL);

        engine.setReadsDataSource(dataSource);

        final TraverseReadsNano traverseReadsNano = new TraverseReadsNano(1);
        final DummyReadWalker walker = new DummyReadWalker();
        traverseReadsNano.initialize(engine, walker, null);

        for ( final Shard shard : dataSource.createShardIteratorOverAllReads(new ReadShardBalancer()) ) {
            final ReadShardDataProvider dataProvider = new ReadShardDataProvider(shard, engine.getGenomeLocParser(), dataSource.seek(shard), reference, new ArrayList<ReferenceOrderedDataSource>());
            traverseReadsNano.traverse(walker, dataProvider, 0);
            dataProvider.close();
        }

        Assert.assertEquals(engine.getCumulativeMetrics().getNumReadsSeen(), contigs.size() * numReadsPerContig);
        Assert.assertEquals(engine.getCumulativeMetrics().getNumIterations(), contigs.size() * numReadsPerContig);
    }

    @Test
    public void testCountsFromLocusTraversal() {
        final GenomeAnalysisEngine engine = new GenomeAnalysisEngine();
        engine.setGenomeLocParser(genomeLocParser);

        final Collection<SAMReaderID> samFiles = new ArrayList<>();
        final SAMReaderID readerID = new SAMReaderID(testBAM, new Tags());
        samFiles.add(readerID);

        final SAMDataSource dataSource = new SAMDataSource(samFiles, new ThreadAllocation(), null, genomeLocParser,
                false,
                ValidationStringency.STRICT,
                null,
                null,
                new ValidationExclusion(),
                new ArrayList<ReadFilter>(),
                new ArrayList<ReadTransformer>(),
                false, (byte)30, false, true, null, IntervalMergingRule.ALL);

        engine.setReadsDataSource(dataSource);
        final Set<String> samples = SampleUtils.getSAMFileSamples(dataSource.getHeader());

        final TraverseLociNano traverseLociNano = new TraverseLociNano(1);
        final DummyLocusWalker walker = new DummyLocusWalker();
        traverseLociNano.initialize(engine, walker, null);

        for ( final Shard shard : dataSource.createShardIteratorOverAllReads(new LocusShardBalancer()) ) {
            final WindowMaker windowMaker = new WindowMaker(shard, genomeLocParser, dataSource.seek(shard), shard.getGenomeLocs(), samples);
            for ( WindowMaker.WindowMakerIterator window : windowMaker ) {
                final LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, shard.getReadProperties(), genomeLocParser, window.getLocus(), window, reference, new ArrayList<ReferenceOrderedDataSource>());
                traverseLociNano.traverse(walker, dataProvider, 0);
                dataProvider.close();
            }
            windowMaker.close();
        }

        //dataSource.close();
        Assert.assertEquals(engine.getCumulativeMetrics().getNumReadsSeen(), contigs.size() * numReadsPerContig);
        Assert.assertEquals(engine.getCumulativeMetrics().getNumIterations(), contigs.size() * numReadsPerContig);
    }

    @Test
    public void testCountsFromActiveRegionTraversal() {
        final GenomeAnalysisEngine engine = new GenomeAnalysisEngine();
        engine.setGenomeLocParser(genomeLocParser);

        final Collection<SAMReaderID> samFiles = new ArrayList<>();
        final SAMReaderID readerID = new SAMReaderID(testBAM, new Tags());
        samFiles.add(readerID);

        final SAMDataSource dataSource = new SAMDataSource(samFiles, new ThreadAllocation(), null, genomeLocParser,
                false,
                ValidationStringency.STRICT,
                null,
                null,
                new ValidationExclusion(),
                new ArrayList<ReadFilter>(),
                new ArrayList<ReadTransformer>(),
                false, (byte)30, false, true, null, IntervalMergingRule.ALL);

        engine.setReadsDataSource(dataSource);
        final Set<String> samples = SampleUtils.getSAMFileSamples(dataSource.getHeader());

        final List<GenomeLoc> intervals = new ArrayList<>(contigs.size());
        for ( final String contig : contigs )
            intervals.add(genomeLocParser.createGenomeLoc(contig, 1, numReadsPerContig));

        final TraverseActiveRegions traverseActiveRegions = new TraverseActiveRegions();
        final DummyActiveRegionWalker walker = new DummyActiveRegionWalker();
        traverseActiveRegions.initialize(engine, walker, null);

        for ( final Shard shard : dataSource.createShardIteratorOverIntervals(new GenomeLocSortedSet(genomeLocParser, intervals), new ActiveRegionShardBalancer()) ) {
            final WindowMaker windowMaker = new WindowMaker(shard, genomeLocParser, dataSource.seek(shard), shard.getGenomeLocs(), samples);
            for ( WindowMaker.WindowMakerIterator window : windowMaker ) {
                final LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, shard.getReadProperties(), genomeLocParser, window.getLocus(), window, reference, new ArrayList<ReferenceOrderedDataSource>());
                traverseActiveRegions.traverse(walker, dataProvider, 0);
                dataProvider.close();
            }
            windowMaker.close();
        }

        Assert.assertEquals(engine.getCumulativeMetrics().getNumReadsSeen(), contigs.size() * numReadsPerContig);
        Assert.assertEquals(engine.getCumulativeMetrics().getNumIterations(), contigs.size() * numReadsPerContig);
    }

    @Test
    public void testFilteredCounts() {
        final GenomeAnalysisEngine engine = new GenomeAnalysisEngine();
        engine.setGenomeLocParser(genomeLocParser);

        final Collection<SAMReaderID> samFiles = new ArrayList<>();
        final SAMReaderID readerID = new SAMReaderID(testBAM, new Tags());
        samFiles.add(readerID);

        final List<ReadFilter> filters = new ArrayList<>();
        filters.add(new EveryTenthReadFilter());

        final SAMDataSource dataSource = new SAMDataSource(samFiles, new ThreadAllocation(), null, genomeLocParser,
                false,
                ValidationStringency.STRICT,
                null,
                null,
                new ValidationExclusion(),
                filters,
                new ArrayList<ReadTransformer>(),
                false, (byte)30, false, true, null, IntervalMergingRule.ALL);

        engine.setReadsDataSource(dataSource);

        final TraverseReadsNano traverseReadsNano = new TraverseReadsNano(1);
        final DummyReadWalker walker = new DummyReadWalker();
        traverseReadsNano.initialize(engine, walker, null);

        for ( final Shard shard : dataSource.createShardIteratorOverAllReads(new ReadShardBalancer()) ) {
            final ReadShardDataProvider dataProvider = new ReadShardDataProvider(shard, engine.getGenomeLocParser(), dataSource.seek(shard), reference, new ArrayList<ReferenceOrderedDataSource>());
            traverseReadsNano.traverse(walker, dataProvider, 0);
            dataProvider.close();
        }

        Assert.assertEquals((long)engine.getCumulativeMetrics().getCountsByFilter().get(EveryTenthReadFilter.class.getSimpleName()), contigs.size() * numReadsPerContig / 10);
    }

    class DummyLocusWalker extends LocusWalker<Integer, Integer> {
        @Override
        public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
            return 0;
        }

        @Override
        public Integer reduceInit() {
            return 0;
        }

        @Override
        public Integer reduce(Integer value, Integer sum) {
            return 0;
        }
    }

    class DummyReadWalker extends ReadWalker<Integer, Integer> {
        @Override
        public Integer map(ReferenceContext ref, GATKSAMRecord read, RefMetaDataTracker metaDataTracker) {
            return 0;
        }

        @Override
        public Integer reduceInit() {
            return 0;
        }

        @Override
        public Integer reduce(Integer value, Integer sum) {
            return 0;
        }
    }

    class DummyActiveRegionWalker extends ActiveRegionWalker<Integer, Integer> {
        @Override
        public ActivityProfileState isActive(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
            return new ActivityProfileState(ref.getLocus(), 0.0);
        }

        @Override
        public Integer map(ActiveRegion activeRegion, RefMetaDataTracker metaDataTracker) {
            return 0;
        }

        @Override
        public Integer reduceInit() {
            return 0;
        }

        @Override
        public Integer reduce(Integer value, Integer sum) {
            return 0;
        }
    }

    private final class EveryTenthReadFilter extends ReadFilter {

        private int myCounter = 0;

        @Override
        public boolean filterOut(final SAMRecord record) {
            if ( ++myCounter == 10 ) {
                myCounter = 0;
                return true;
            }

            return false;
        }
    }
}
TOP

Related Classes of org.broadinstitute.gatk.engine.ReadMetricsUnitTest$EveryTenthReadFilter

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.