Package picard.sam

Source Code of picard.sam.ReorderSam

/*
* The MIT License
*
* Copyright (c) 2011 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
package picard.sam;

import htsjdk.samtools.SAMFileHeader;
import htsjdk.samtools.SAMFileReader;
import htsjdk.samtools.SAMFileWriter;
import htsjdk.samtools.SAMFileWriterFactory;
import htsjdk.samtools.SAMRecord;
import htsjdk.samtools.SAMRecordIterator;
import htsjdk.samtools.SAMSequenceDictionary;
import htsjdk.samtools.SAMSequenceRecord;
import htsjdk.samtools.SAMTag;
import htsjdk.samtools.reference.ReferenceSequenceFile;
import htsjdk.samtools.reference.ReferenceSequenceFileFactory;
import htsjdk.samtools.util.IOUtil;
import htsjdk.samtools.util.Log;
import picard.PicardException;
import picard.cmdline.CommandLineProgram;
import picard.cmdline.CommandLineProgramProperties;
import picard.cmdline.Option;
import picard.cmdline.StandardOptionDefinitions;
import picard.cmdline.programgroups.SamOrBam;

import java.io.File;
import java.util.HashMap;
import java.util.Map;

/**
* Reorders a SAM/BAM input file according to the order of contigs in a second reference sequence
*
* @author mdepristo
*/
@CommandLineProgramProperties(
        usage = "Not to be confused with SortSam which sorts a SAM or BAM file with a valid sequence dictionary, " +
                "ReorderSam reorders reads in a SAM/BAM file to match the contig ordering in a provided reference file, " +
                "as determined by exact name matching of contigs.  Reads mapped to contigs absent in the new " +
                "reference are dropped. Runs substantially faster if the input is an indexed BAM file.",
        usageShort = "Reorders reads in a SAM or BAM file to match ordering in reference",
        programGroup = SamOrBam.class
)
public class ReorderSam extends CommandLineProgram {

    @Option(shortName=StandardOptionDefinitions.INPUT_SHORT_NAME, doc="Input file (bam or sam) to extract reads from.")
    public File INPUT;

    @Option(shortName=StandardOptionDefinitions.OUTPUT_SHORT_NAME, doc="Output file (bam or sam) to write extracted reads to.")
    public File OUTPUT;

    @Option(shortName=StandardOptionDefinitions.REFERENCE_SHORT_NAME, doc="Reference sequence to reorder reads to match.  " +
            "A sequence dictionary corresponding to the reference fasta is required.  Create one with CreateSequenceDictionary.jar.")
    public File REFERENCE;

    @Option(shortName="S", doc="If true, then allows only a partial overlap of the BAM contigs with the new reference " +
                               "sequence contigs.  By default, this tool requires a corresponding contig in the new " +
                               "reference for each read contig")
    public boolean ALLOW_INCOMPLETE_DICT_CONCORDANCE = false;

    @Option(shortName="U", doc="If true, then permits mapping from a read contig to a new reference contig with the " +
                               "same name but a different length.  Highly dangerous, only use if you know what you " +
                               "are doing.")
    public boolean ALLOW_CONTIG_LENGTH_DISCORDANCE = false;

    private final Log log = Log.getInstance(ReorderSam.class);

    /** Required main method implementation. */
    public static void main(final String[] argv) {
        new ReorderSam().instanceMainWithExit(argv);
    }

    protected int doWork() {
        IOUtil.assertFileIsReadable(INPUT);
        IOUtil.assertFileIsReadable(REFERENCE);
        IOUtil.assertFileIsWritable(OUTPUT);

        final SAMFileReader in = new SAMFileReader(INPUT);

        ReferenceSequenceFile reference = ReferenceSequenceFileFactory.getReferenceSequenceFile(REFERENCE);
        SAMSequenceDictionary refDict = reference.getSequenceDictionary();

        if (refDict == null) {
          log.error("No reference sequence dictionary found. Aborting.  You can create a sequence dictionary for the reference fasta using CreateSequenceDictionary.jar.");
          in.close();
          return 1;
        }

        printDictionary("SAM/BAM file", in.getFileHeader().getSequenceDictionary());
        printDictionary("Reference", refDict);
        Map<Integer, Integer> newOrder = buildSequenceDictionaryMap(refDict, in.getFileHeader().getSequenceDictionary());

        // has to be after we create the newOrder
        SAMFileHeader outHeader = in.getFileHeader().clone();
        outHeader.setSequenceDictionary(refDict);

        log.info("Writing reads...");
        if (in.hasIndex()) {
            final SAMFileWriter out = new SAMFileWriterFactory().makeSAMOrBAMWriter(outHeader, true, OUTPUT);

            // write the reads in contig order
            for (final SAMSequenceRecord contig : refDict.getSequences() ) {
                final SAMRecordIterator it = in.query(contig.getSequenceName(), 0, 0, false);
                writeReads(out, it, newOrder, contig.getSequenceName());
            }
            // don't forget the unmapped reads
            writeReads( out, in.queryUnmapped(), newOrder, "unmapped" );
            out.close();
        }
        else {
            SAMFileWriter out = new SAMFileWriterFactory().makeSAMOrBAMWriter(outHeader, false, OUTPUT);
            writeReads(out, in.iterator(), newOrder, "All reads");
            out.close();
        }

        // cleanup
        in.close();
        return 0;
    }

    /**
     * Low-level helper function that returns the new reference index for oldIndex according to the
     * ordering map newOrder.  Read is provided in case an error occurs, so that an informative message
     * can be made.
     */
    private int newOrderIndex(SAMRecord read, int oldIndex, Map<Integer, Integer> newOrder) {
        if ( oldIndex == -1 )
            return -1; // unmapped read
        else {
            final Integer n = newOrder.get(oldIndex);

            if (n == null) throw new PicardException("BUG: no mapping found for read " + read.format());
            else return n;
        }
    }

    /**
     * Helper function that writes reads from iterator it into writer out, updating each SAMRecord along the way
     * according to the newOrder mapping from dictionary index -> index.  Name is used for printing only.
     */
    private void writeReads(final SAMFileWriter out,
                                   final SAMRecordIterator it,
                                   final Map<Integer, Integer> newOrder,
                                   final String name) {
        long counter = 0;
        log.info("  Processing " + name);

        while ( it.hasNext() ) {
            counter++;
            final SAMRecord read = it.next();
            final int oldRefIndex = read.getReferenceIndex();
            final int oldMateIndex = read.getMateReferenceIndex();
            final int newRefIndex = newOrderIndex(read, oldRefIndex, newOrder);

            read.setHeader(out.getFileHeader());
            read.setReferenceIndex(newRefIndex);

            final int newMateIndex = newOrderIndex(read, oldMateIndex, newOrder);
            if ( oldMateIndex != -1 && newMateIndex == -1 ) { // becoming unmapped
                read.setMateAlignmentStart(0);
                read.setMateUnmappedFlag(true);
                read.setAttribute(SAMTag.MC.name(), null);      // Set the Mate Cigar String to null
            }
            read.setMateReferenceIndex(newMateIndex);

            out.addAlignment(read);
        }

        it.close();
        log.info("Wrote " + counter + " reads");
    }

    /**
     * Constructs a mapping from read sequence records index -> new sequence dictionary index for use in
     * reordering the reference index and mate reference index in each read.  -1 means unmapped.
     */
    private Map<Integer, Integer> buildSequenceDictionaryMap(final SAMSequenceDictionary refDict,
                                                             final SAMSequenceDictionary readsDict) {
        Map<Integer, Integer> newOrder = new HashMap<Integer, Integer>();

        log.info("Reordering SAM/BAM file:");
        for (final SAMSequenceRecord refRec : refDict.getSequences() ) {
            final SAMSequenceRecord readsRec = readsDict.getSequence(refRec.getSequenceName());

            if (readsRec != null) {
                if ( refRec.getSequenceLength() != readsRec.getSequenceLength() ) {
                    String msg = String.format("Discordant contig lengths: read %s LN=%d, ref %s LN=%d",
                            readsRec.getSequenceName(), readsRec.getSequenceLength(),
                            refRec.getSequenceName(), refRec.getSequenceLength());
                    if ( ALLOW_CONTIG_LENGTH_DISCORDANCE ) {
                        log.warn(msg);
                    }
                    else {
                        throw new PicardException(msg);
                    }
                }

                log.info(String.format("  Reordering read contig %s [index=%d] to => ref contig %s [index=%d]%n",
                                       readsRec.getSequenceName(), readsRec.getSequenceIndex(),
                                       refRec.getSequenceName(), refRec.getSequenceIndex()  ));
                newOrder.put(readsRec.getSequenceIndex(), refRec.getSequenceIndex());
            }
        }

        for ( SAMSequenceRecord readsRec : readsDict.getSequences() ) {
            if ( ! newOrder.containsKey(readsRec.getSequenceIndex()) ) {
                if ( ALLOW_INCOMPLETE_DICT_CONCORDANCE )
                    newOrder.put(readsRec.getSequenceIndex(), -1);
                else
                    throw new PicardException("New reference sequence does not contain a matching contig for " + readsRec.getSequenceName());
            }
        }

        return newOrder;
    }

    /**
     * Helper function to print out a sequence dictionary
     */
    private void printDictionary(String name, SAMSequenceDictionary dict) {
        log.info(name);
        for (final SAMSequenceRecord contig : dict.getSequences()) {
            log.info( "  SN=%s LN=%d%n", contig.getSequenceName(), contig.getSequenceLength());
        }
    }
}
TOP

Related Classes of picard.sam.ReorderSam

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.