Package picard.sam

Source Code of picard.sam.SamToFastqTest

/*
* The MIT License
*
* Copyright (c) 2009 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
package picard.sam;

import htsjdk.samtools.SAMException;
import htsjdk.samtools.SAMFileReader;
import htsjdk.samtools.SAMFormatException;
import htsjdk.samtools.SAMRecord;
import htsjdk.samtools.fastq.FastqReader;
import htsjdk.samtools.fastq.FastqRecord;
import htsjdk.samtools.util.IOUtil;
import org.testng.Assert;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import picard.cmdline.CommandLineProgramTest;
import picard.PicardException;

import java.io.File;
import java.io.IOException;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.Map;
import java.util.Set;

/**
* Tests for SamToFastq
*/
public class SamToFastqTest extends CommandLineProgramTest {
    private static final File TEST_DATA_DIR = new File("testdata/picard/sam/bam2fastq/paired");
    private static final String CLIPPING_TEST_DATA = "ok/clipping_test.sam";

    public String getCommandLineProgramName() {
        return SamToFastq.class.getSimpleName();
    }

    @DataProvider(name = "okFiles")
    public Object[][] okFiles() {
        return new Object[][] {
                {"ok/sorted-pair.sam"}, // 5 sorted pairs (10 records) - mate1, mate2
                {"ok/sorted-pair-no-rg.sam"}, // 5 sorted pairs (10 records) - mate1, mate2, no read group
                {"ok/last-pair-mates-flipped.sam" }, // 4 pairs, :05 mate2, mate1
                {"ok/first-mate-bof-last-mate-eof.sam"}, // :01 mate1, 4 pairs, :01 mate2
        };
    }


    @DataProvider(name = "badFiles")
    public Object[][] badFiles() {
        return new Object[][] {
            {"bad/unpaired-mate.sam"} // mate1 without its mate2
        };
    }

    private void convertFile(final String [] args) {
        Assert.assertEquals(runPicardCommandLine(args), 0);
    }

    @Test(dataProvider = "clippingTests")
    public void testClipping(final String clippingAction, final String bases1_1, final String quals1_1, final String bases1_2, final String quals1_2,
                             final String bases2_1, final String quals2_1, final String bases2_2, final String quals2_2, final String testName) throws IOException {
        final File samFile = new File(TEST_DATA_DIR, CLIPPING_TEST_DATA) ;
        final File f1 = File.createTempFile("clippingtest1", "fastq");
        final File f2 = File.createTempFile("clippingtest2", "fastq");
        f1.deleteOnExit();
        f2.deleteOnExit();

        if (clippingAction != null) {
            convertFile(new String[]{
                "INPUT="            + samFile.getAbsolutePath(),
                "FASTQ="            + f1.getAbsolutePath(),
                "SECOND_END_FASTQ=" + f2.getAbsolutePath(),
                "CLIPPING_ACTION="  + clippingAction,
                "CLIPPING_ATTRIBUTE=" + "XT"
            });
        } else {
            convertFile(new String[]{
                "INPUT="            + samFile.getAbsolutePath(),
                "FASTQ="            + f1.getAbsolutePath(),
                "SECOND_END_FASTQ=" + f2.getAbsolutePath(),
            });
        }

        Iterator<FastqRecord> it = new FastqReader(f1).iterator();
        FastqRecord first = it.next();
        Assert.assertEquals(first.getReadString(), bases1_1, testName);
        Assert.assertEquals(first.getBaseQualityString(), quals1_1, testName);
        FastqRecord second = it.next();
        Assert.assertEquals(second.getReadString(), bases1_2, testName);
        Assert.assertEquals(second.getBaseQualityString(), quals1_2, testName);
        it = new FastqReader(f2).iterator();
        first = it.next();
        Assert.assertEquals(first.getReadString(), bases2_1, testName);
        Assert.assertEquals(first.getBaseQualityString(), quals2_1, testName);
        second = it.next();
        Assert.assertEquals(second.getReadString(), bases2_2, testName);
        Assert.assertEquals(second.getBaseQualityString(), quals2_2, testName);
    }

    @DataProvider(name = "clippingTests")
    public Object[][] clippingTests() {
        return new Object[][] {
            {null, "AAAAAAAAAA", "1111111111", "AAAAAAAAAA", "1111111111", "CCCCCCCCCC", "2222222222", "GGGGGGGGGG", "2222222222", "No clipping test"},
            {"X""AAAAAAA",    "1111111",    "AAAAAA",     "111111",     "CCCCCCCC",   "22222222",   "GGGGGG",     "222222",     "Cut clipped bases test"},
            {"N""AAAAAAANNN", "1111111111", "AAAAAANNNN", "1111111111", "CCCCCCCCNN", "2222222222", "GGGGGGNNNN", "2222222222", "Mask clipped bases test"},
            {"2""AAAAAAAAAA", "1111111###", "AAAAAAAAAA", "111111####", "CCCCCCCCCC", "22222222##", "GGGGGGGGGG", "222222####", "Change clipped qualities test"}
        };
    }

    @Test(dataProvider = "okFiles")
    public void testOkFile(final String samFilename) throws IOException {
        final File samFile = new File(TEST_DATA_DIR,samFilename);
        final File pair1File = newTempFastqFile("pair1");
        final File pair2File = newTempFastqFile("pair2");

        convertFile(new String[]{
              "INPUT=" + samFile.getAbsolutePath(),
              "FASTQ=" + pair1File.getAbsolutePath(),
              "SECOND_END_FASTQ=" + pair2File.getAbsolutePath()
        });

        // Check that paired fastq files are same size
        final Set<String> outputHeaderSet1 = createFastqReadHeaderSet(pair1File);
        final Set<String> outputHeaderSet2 = createFastqReadHeaderSet(pair2File);
        Assert.assertEquals(outputHeaderSet1.size(), outputHeaderSet2.size());

        // Create map of mate pairs from SAM records
        final Map<String,MatePair> map = createSamMatePairsMap(samFile) ;
        Assert.assertEquals(map.size(), outputHeaderSet2.size());

        // Ensure that each mate of each pair in SAM file is in the correct fastq pair file
        for (final Map.Entry<String,MatePair> entry : map.entrySet() ) {
            final MatePair mpair = entry.getValue();
            Assert.assertNotNull(mpair.mate1); // ensure we have two mates
            Assert.assertNotNull(mpair.mate2);
            Assert.assertEquals(mpair.mate1.getReadName(),mpair.mate2.getReadName());
            final String readName = mpair.mate1.getReadName() ;
            Assert.assertTrue(outputHeaderSet1.contains(readName+"/1")); // ensure mate is in correct file
            Assert.assertTrue(outputHeaderSet2.contains(readName+"/2"));
        }
    }

    @Test(dataProvider =  "okFiles")
    public void testOkInterleavedFile(final String samFilename) throws IOException {
        final File samFile = new File(TEST_DATA_DIR,samFilename);
        final File pairFile = newTempFastqFile("pair");

        convertFile(new String[]{
                "INPUT=" + samFile.getAbsolutePath(),
                "FASTQ=" + pairFile.getAbsolutePath(),
                "INTERLEAVE=true"
        });

        final Set<String> outputHeaderSet = createFastqReadHeaderSet(pairFile);
        // Create map of mate pairs from SAM records
        final Map<String,MatePair> map = createSamMatePairsMap(samFile) ;
        Assert.assertEquals(map.size() * 2, outputHeaderSet.size());

        // Ensure that each mate of each pair in SAM file is in the correct fastq pair file
        for (final Map.Entry<String,MatePair> entry : map.entrySet() ) {
            final MatePair mpair = entry.getValue();
            Assert.assertNotNull(mpair.mate1); // ensure we have two mates
            Assert.assertNotNull(mpair.mate2);
            Assert.assertEquals(mpair.mate1.getReadName(),mpair.mate2.getReadName());
            final String readName = mpair.mate1.getReadName() ;
            Assert.assertTrue(outputHeaderSet.contains(readName+"/1")); // ensure mate is in correct file
            Assert.assertTrue(outputHeaderSet.contains(readName+"/2"));
        }
    }


    @Test (dataProvider = "badFiles", expectedExceptions= SAMFormatException.class)
    public void testBadFile(final String samFilename) throws IOException {
        final File samFile = new File(TEST_DATA_DIR,samFilename);
        final File pair1 = File.createTempFile("tt-pair1.", ".fastq");
        final File pair2 = File.createTempFile("tt-pair2.", ".fastq");
        pair1.deleteOnExit();
        pair2.deleteOnExit();
        convertFile(new String[]{
              "INPUT=" + samFile.getAbsolutePath(),
              "FASTQ=" + pair1.getAbsolutePath(),
              "SECOND_END_FASTQ=" + pair2.getAbsolutePath()
        });
    }

    @DataProvider(name = "okGroupedFiles")
    public Object[][] okGroupedFiles() {
        return new Object[][] {
            {"ok/grouped-last-pair-mates-flipped.sam", null,  null,  new String[]{"rg1","rg2"}},
        };
    }


    @DataProvider(name = "badGroupedFiles")
    public Object[][] badGroupedFiles() {
        return new Object[][] {
            {"bad/grouped-unpaired-mate.sam", null,  null,  new String[]{"rg1.fastq","rg2.fastq"}}
        };
    }

    @Test(dataProvider = "okGroupedFiles")
    public void testOkGroupedFiles(final String samFilename, final String fastq, final String secondEndFastq,
                                   final String [] groupFiles) throws IOException {
        final File samFile = new File(TEST_DATA_DIR,samFilename);
        final Map<String, Set<String>> outputSets = new HashMap<String, Set<String>>(groupFiles.length);

        final String tmpDir = IOUtil.getDefaultTmpDir().getAbsolutePath() + "/";
        final String [] args = new String[]{
              "INPUT=" + samFile.getAbsolutePath(),
              "OUTPUT_PER_RG=true",
              "OUTPUT_DIR=" + tmpDir,
        };
        runPicardCommandLine(args);

        File f1;
        File f2;
        String fname1;
        String fname2;
        String keyName1;
        String keyName2;
        Set<String> outputHeaderSet1;
        Set<String> outputHeaderSet2;
        for(final String groupPUName : groupFiles)
        {
            keyName1 = groupPUName + "_1";
            keyName2 = groupPUName + "_2";
            fname1 = tmpDir + "/" + keyName1 + ".fastq";
            fname2 = tmpDir + "/" + keyName2 + ".fastq";
            f1 = new File(fname1);
            f2 = new File(fname2);
            f1.deleteOnExit();
            f2.deleteOnExit();
            IOUtil.assertFileIsReadable(f1);
            IOUtil.assertFileIsReadable(f2);

            // Check that paired fastq files are same size and store them for later comparison
            outputHeaderSet1 = createFastqReadHeaderSet(f1);
            outputHeaderSet2 = createFastqReadHeaderSet(f2);
            outputSets.put(keyName1 , outputHeaderSet1);
            outputSets.put(keyName2, outputHeaderSet2);
            Assert.assertEquals(outputHeaderSet1.size(), outputHeaderSet2.size());
        }

        // Create map of read groups and mate pairs from SAM records
        final Map<String, Map<String,MatePair>> map = createPUPairsMap(samFile);

        for(final Map.Entry<String, Map<String, MatePair>> groupEntry : map.entrySet()) {
            // Ensure that for each group, each mate of each pair in the SAM file is in the correct fastq pair file
            for (final Map.Entry<String,MatePair> entry : groupEntry.getValue().entrySet() ) {
                final MatePair mpair = entry.getValue();
                outputHeaderSet1 = outputSets.get(groupEntry.getKey() + "_1");
                outputHeaderSet2 = outputSets.get(groupEntry.getKey() + "_2");

                Assert.assertNotNull(mpair.mate1); // ensure we have two mates
                Assert.assertNotNull(mpair.mate2);
                Assert.assertEquals(mpair.mate1.getReadName(),mpair.mate2.getReadName());
                final String readName = mpair.mate1.getReadName() ;
                Assert.assertTrue(outputHeaderSet1.contains(readName+"/1")); // ensure mate is in correct file
                Assert.assertTrue(outputHeaderSet2.contains(readName+"/2"));
            }
        }
    }


    @Test (dataProvider = "badGroupedFiles", expectedExceptions= SAMException.class)
    public void testBadGroupedFile(final String samFilename, final String fastq, final String secondEndFastq,
                                   final String [] groupFiles) throws IOException {
        final File samFile = new File(TEST_DATA_DIR,samFilename);
        final String tmpDir = IOUtil.getDefaultTmpDir().getAbsolutePath() + "/";
        final String [] args = new String[]{
              "INPUT=" + samFile.getAbsolutePath(),
              "OUTPUT_PER_RG=true",
              "OUTPUT_DIR=" + tmpDir,
        };
        runPicardCommandLine(args);

        File f1;
        File f2;
        String fname1;
        String fname2;
        for(final String groupPUName : groupFiles)
        {
            fname1 = tmpDir + groupPUName + "_1.fastq";
            fname2 = tmpDir + groupPUName + "_2.fastq";
            f1 = new File(fname1);
            f2 = new File(fname2);
            f1.deleteOnExit();
            f1.deleteOnExit();
        }
    }

    @Test(dataProvider = "trimmedData")
    public void testTrimming(final String samFilename, final int read1Trim,
                             final int read1MaxBases, final int expectedRead1Length, final int read2Trim,
                             final int read2MaxBases, final int expectedRead2Length) throws IOException {

        final File samFile = new File(TEST_DATA_DIR, samFilename);
        final File pair1File = newTempFastqFile("pair1");
        final File pair2File = newTempFastqFile("pair2");
        pair1File.deleteOnExit();
        pair2File.deleteOnExit();

        convertFile(new String[]{
              "INPUT=" + samFile.getAbsolutePath(),
              "FASTQ=" + pair1File.getAbsolutePath(),
              "SECOND_END_FASTQ=" + pair2File.getAbsolutePath(),
              "READ1_TRIM=" + read1Trim,
              "READ1_MAX_BASES_TO_WRITE=" + read1MaxBases,
              "READ2_TRIM=" + read2Trim,
              "READ2_MAX_BASES_TO_WRITE=" + read2MaxBases
        });

        for (final FastqRecord first : new FastqReader(pair1File)) {
            Assert.assertEquals(first.getReadString().length(), expectedRead1Length, "Incorrect read length");
            Assert.assertEquals(first.getBaseQualityString().length(), expectedRead1Length, "Incorrect quality string length");
        }
        for (final FastqRecord second : new FastqReader(pair2File)) {
            Assert.assertEquals(second.getReadString().length(), expectedRead2Length, "Incorrect read length");
            Assert.assertEquals(second.getBaseQualityString().length(), expectedRead2Length, "Incorrect quality string length");
        }
    }

    @DataProvider(name = "trimmedData")
    public Object[][] trimmedData() {
        return new Object[][] {
            // There are 13 bases in each of these reads
            {"ok/sorted-pair.sam", 6, 7, 7, 5, 8, 8}, // exact matches for everything
            {"ok/sorted-pair.sam", 7, 7, 6, 3, 8, 8// fewer or more bases
        };
    }

    private Set<String> createFastqReadHeaderSet(final File file) {
        final Set<String> set = new HashSet<String>();
        final FastqReader freader = new FastqReader(file);
        while (freader.hasNext()) {
            final FastqRecord frec = freader.next();
            set.add(frec.getReadHeader());
        }
        return set ;
    }

    private Map<String,MatePair> createSamMatePairsMap(final File samFile) throws IOException {
        IOUtil.assertFileIsReadable(samFile);
        final SAMFileReader reader = new SAMFileReader(IOUtil.openFileForReading(samFile));

        final Map<String,MatePair> map = new LinkedHashMap<String,MatePair>();
        for (final SAMRecord record : reader ) {
            MatePair mpair = map.get(record.getReadName());
            if (mpair == null) {
                 mpair = new MatePair();
                 map.put(record.getReadName(), mpair);
            }
            mpair.add(record);
        }
        reader.close();
        return map;
    }


    private Map<String, Map<String, MatePair>> createPUPairsMap(final File samFile) throws IOException {
        IOUtil.assertFileIsReadable(samFile);
        final SAMFileReader reader = new SAMFileReader(IOUtil.openFileForReading(samFile));
        final Map<String, Map<String, MatePair>> map = new LinkedHashMap<String, Map<String,MatePair>>();

        Map<String,MatePair> curFileMap;
        for (final SAMRecord record : reader ) {
            final String platformUnit = record.getReadGroup().getPlatformUnit();
            curFileMap = map.get(platformUnit);
            if(curFileMap == null)
            {
                curFileMap = new LinkedHashMap<String, MatePair>();
                map.put(platformUnit, curFileMap);
            }

            MatePair mpair = curFileMap.get(record.getReadName());
            if (mpair == null) {
                 mpair = new MatePair();
                 curFileMap.put(record.getReadName(), mpair);
            }
            mpair.add(record);
        }
        reader.close();
        return map;
    }

    class MatePair {
        SAMRecord mate1 ;
        SAMRecord mate2 ;
        void add(final SAMRecord record) {
            if (!record.getReadPairedFlag()) throw new PicardException("Record "+record.getReadName()+" is not paired");
            if (record.getFirstOfPairFlag()) {
                if (mate1 != null) throw new PicardException("Mate 1 already set for record: "+record.getReadName());
                mate1 = record ;
            }
            else if (record.getSecondOfPairFlag()) {
                if (mate2 != null) throw new PicardException("Mate 2 already set for record: "+record.getReadName());
                mate2 = record ;
            }
            else throw new PicardException("Neither FirstOfPairFlag or SecondOfPairFlag is set for a paired record");
        }
    }

    private File newTempFastqFile(final String filename) throws IOException {
        if(filename == null) return null;
        final File file = File.createTempFile(filename,".fastq");
        file.deleteOnExit();
        return file;
    }
}
TOP

Related Classes of picard.sam.SamToFastqTest

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.