Package picard.sam

Source Code of picard.sam.FixMateInformation

/*
* The MIT License
*
* Copyright (c) 2009 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/

package picard.sam;

import htsjdk.samtools.BAMRecordCodec;
import htsjdk.samtools.BamFileIoUtils;
import htsjdk.samtools.DuplicateScoringStrategy;
import htsjdk.samtools.DuplicateScoringStrategy.ScoringStrategy;
import htsjdk.samtools.MergingSamRecordIterator;
import htsjdk.samtools.SAMFileHeader;
import htsjdk.samtools.SAMFileHeader.SortOrder;
import htsjdk.samtools.SAMFileReader;
import htsjdk.samtools.SAMFileWriter;
import htsjdk.samtools.SAMFileWriterFactory;
import htsjdk.samtools.SAMRecord;
import htsjdk.samtools.SAMRecordQueryNameComparator;
import htsjdk.samtools.SamFileHeaderMerger;
import htsjdk.samtools.SamPairUtil;
import htsjdk.samtools.util.IOUtil;
import htsjdk.samtools.util.Log;
import htsjdk.samtools.util.PeekableIterator;
import htsjdk.samtools.util.ProgressLogger;
import htsjdk.samtools.util.RuntimeIOException;
import htsjdk.samtools.util.SortingCollection;
import picard.PicardException;
import picard.cmdline.CommandLineProgram;
import picard.cmdline.CommandLineProgramProperties;
import picard.cmdline.Option;
import picard.cmdline.StandardOptionDefinitions;
import picard.cmdline.programgroups.SamOrBam;

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;

/**
* Class to fix mate pair information for all reads in a SAM file.  Will run in fairly limited
* memory unless there are lots of mate pairs that are far apart from each other in the file.
*
* @author Tim Fennell
*/
@CommandLineProgramProperties(
        usage = "Ensure that all mate-pair information is in sync between each read " +
                "and its mate pair.  If no OUTPUT file is supplied then the output is written to a temporary file " +
                "and then copied over the INPUT file.  Reads marked with the secondary alignment flag are written " +
                "to the output file unchanged.",
        usageShort = "Ensure that all mate-pair information is in sync between each read and its mate pair",
        programGroup = SamOrBam.class
)
public class FixMateInformation extends CommandLineProgram {

    @Option(shortName=StandardOptionDefinitions.INPUT_SHORT_NAME, doc="The input file to fix.")
    public List<File> INPUT;

    @Option(shortName=StandardOptionDefinitions.OUTPUT_SHORT_NAME, optional=true,
            doc="The output file to write to. If no output file is supplied, the input file is overwritten.")
    public File OUTPUT;

    @Option(shortName=StandardOptionDefinitions.SORT_ORDER_SHORT_NAME, optional=true,
    doc="Optional sort order if the OUTPUT file should be sorted differently than the INPUT file.")
    public SortOrder SORT_ORDER;

    @Option(doc="If true, assume that the input file is queryname sorted, even if the header says otherwise.",
    shortName=StandardOptionDefinitions.ASSUME_SORTED_SHORT_NAME)
    public boolean ASSUME_SORTED = false;

    @Option(shortName="MC", optional=true, doc="Adds the mate CIGAR tag (MC) if true, does not if false.")
    public Boolean ADD_MATE_CIGAR = true;

    private static final Log log = Log.getInstance(FixMateInformation.class);

    protected SAMFileWriter out;

    public static void main(final String[] args) {
        new FixMateInformation().instanceMainWithExit(args);
    }

    protected int doWork() {
        // Open up the input
        boolean allQueryNameSorted = true;
        final List<SAMFileReader> readers = new ArrayList<SAMFileReader>();
        for (final File f : INPUT) {
            IOUtil.assertFileIsReadable(f);
            final SAMFileReader reader = new SAMFileReader(f);
            readers.add(reader);
            if (reader.getFileHeader().getSortOrder() != SortOrder.queryname) allQueryNameSorted = false;
        }

        // Decide where to write the fixed file - into the specified output file
        // or into a temporary file that will overwrite the INPUT file eventually
        if (OUTPUT != null) OUTPUT = OUTPUT.getAbsoluteFile();
        final boolean differentOutputSpecified = OUTPUT != null;

        if (differentOutputSpecified) {
            IOUtil.assertFileIsWritable(OUTPUT);
        }
        else if (INPUT.size() != 1) {
            throw new PicardException("Must specify either an explicit OUTPUT file or a single INPUT file to be overridden.");
        }
        else {
            final File soleInput = INPUT.get(0).getAbsoluteFile();
            final File dir       = soleInput.getParentFile().getAbsoluteFile();
            try {
                IOUtil.assertFileIsWritable(soleInput);
                IOUtil.assertDirectoryIsWritable(dir);
                OUTPUT = File.createTempFile(soleInput.getName() + ".being_fixed.", BamFileIoUtils.BAM_FILE_EXTENSION, dir);
            }
            catch (final IOException ioe) {
                throw new RuntimeIOException("Could not create tmp file in " + dir.getAbsolutePath());
            }
        }

        // Get the input records merged and sorted by query name as needed
        final PeekableIterator<SAMRecord> iterator;
        final SAMFileHeader header;

        {
            // Deal with merging if necessary
            final Iterator<SAMRecord> tmp;
            if (INPUT.size() > 1) {
                final List<SAMFileHeader> headers = new ArrayList<SAMFileHeader>(readers.size());
                for (final SAMFileReader reader : readers) {
                    headers.add(reader.getFileHeader());
                }
                final SortOrder sortOrder = (allQueryNameSorted? SortOrder.queryname: SortOrder.unsorted);
                final SamFileHeaderMerger merger = new SamFileHeaderMerger(sortOrder, headers, false);
                tmp = new MergingSamRecordIterator(merger, readers, false);
                header = merger.getMergedHeader();
            }
            else {
                tmp = readers.get(0).iterator();
                header = readers.get(0).getFileHeader();
            }

            // And now deal with re-sorting if necessary
            if (ASSUME_SORTED || allQueryNameSorted) {
                iterator = new SamPairUtil.SetMateInfoIterator(new PeekableIterator<SAMRecord>(tmp), ADD_MATE_CIGAR);
            }
            else {
                log.info("Sorting input into queryname order.");
                final SortingCollection<SAMRecord> sorter = SortingCollection.newInstance(SAMRecord.class,
                                                                                          new BAMRecordCodec(header),
                                                                                          new SAMRecordQueryNameComparator(),
                                                                                          MAX_RECORDS_IN_RAM,
                                                                                          TMP_DIR);
                while (tmp.hasNext()) {
                    sorter.add(tmp.next());

                }

                iterator = new SamPairUtil.SetMateInfoIterator(new PeekableIterator<SAMRecord>(sorter.iterator()) {
                    @Override
                    public void close() {
                        super.close();
                        sorter.cleanup();
                    }
                }, ADD_MATE_CIGAR);
                log.info("Sorting by queryname complete.");
            }

            // Deal with the various sorting complications
            final SortOrder outputSortOrder = SORT_ORDER == null ? readers.get(0).getFileHeader().getSortOrder() : SORT_ORDER;
            log.info("Output will be sorted by " + outputSortOrder);
            header.setSortOrder(outputSortOrder);
        }

        if (CREATE_INDEX && header.getSortOrder() != SortOrder.coordinate){
            throw new PicardException("Can't CREATE_INDEX unless sort order is coordinate");
        }

        createSamFileWriter(header);

        log.info("Traversing query name sorted records and fixing up mate pair information.");
        final ProgressLogger progress = new ProgressLogger(log);
        while (iterator.hasNext()) {
            final SAMRecord record = iterator.next();
            out.addAlignment(record);
            progress.record(record);
        }
        iterator.close();

        if (header.getSortOrder() == SortOrder.queryname) {
            log.info("Closing output file.");
        }
        else {
            log.info("Finished processing reads; re-sorting output file.");
        }
        closeWriter();

        // Lastly if we're fixing in place, swap the files
        if (!differentOutputSpecified) {
            log.info("Replacing input file with fixed file.");

            final File soleInput = INPUT.get(0).getAbsoluteFile();
            final File old = new File(soleInput.getParentFile(), soleInput.getName() + ".old");
            if (!old.exists() && soleInput.renameTo(old)) {
                if (OUTPUT.renameTo(soleInput)) {

                    if (!old.delete()) {
                        log.warn("Could not delete old file: " + old.getAbsolutePath());
                        return 1;
                    }

                    if (CREATE_INDEX) {
                        final File newIndex = new File(OUTPUT.getParent(),
                                                       OUTPUT.getName().substring(0, OUTPUT.getName().length()-4) + ".bai");
                        final File oldIndex = new File(soleInput.getParent(),
                                                       soleInput.getName().substring(0, soleInput.getName().length()-4) + ".bai");

                        if (!newIndex.renameTo(oldIndex)) {
                            log.warn("Could not overwrite index file: " + oldIndex.getAbsolutePath());
                        }
                    }

                }
                else {
                    log.error("Could not move new file to " + soleInput.getAbsolutePath());
                    log.error("Input file preserved as: " + old.getAbsolutePath());
                    log.error("New file preserved as: " + OUTPUT.getAbsolutePath());
                    return 1;
                }
            }
            else {
                log.error("Could not move input file out of the way: " + soleInput.getAbsolutePath());

                if (!OUTPUT.delete()) {
                    log.error("Could not delete temporary file: " + OUTPUT.getAbsolutePath());
                }

                return 1;
            }

        }

        return 0;
    }

    protected void createSamFileWriter(final SAMFileHeader header) {
        out = new SAMFileWriterFactory().makeSAMOrBAMWriter(header,
                  header.getSortOrder() == SortOrder.queryname, OUTPUT);

    }

    protected void writeAlignment(final SAMRecord sam) {
        out.addAlignment(sam);
    }

    protected void closeWriter() {
        out.close();
    }

}
TOP

Related Classes of picard.sam.FixMateInformation

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.