package picard.sam;
import htsjdk.samtools.BAMRecordCodec;
import htsjdk.samtools.CigarElement;
import htsjdk.samtools.CigarOperator;
import htsjdk.samtools.MergingSamRecordIterator;
import htsjdk.samtools.SAMFileHeader;
import htsjdk.samtools.SAMFileHeader.SortOrder;
import htsjdk.samtools.SAMFileReader;
import htsjdk.samtools.SAMProgramRecord;
import htsjdk.samtools.SAMRecord;
import htsjdk.samtools.SAMRecordQueryNameComparator;
import htsjdk.samtools.SamFileHeaderMerger;
import htsjdk.samtools.SamPairUtil;
import htsjdk.samtools.ValidationStringency;
import htsjdk.samtools.util.CloseableIterator;
import htsjdk.samtools.util.DelegatingIterator;
import htsjdk.samtools.util.IOUtil;
import htsjdk.samtools.util.Log;
import htsjdk.samtools.util.PeekableIterator;
import htsjdk.samtools.util.SortingCollection;
import picard.PicardException;
import java.io.File;
import java.util.ArrayList;
import java.util.List;
/**
* Class that takes in a set of alignment information in SAM format and merges it with the set
* of all reads for which alignment was attempted, stored in an unmapped SAM file. This
* class overrides mergeAlignment in AbstractAlignmentNMerger and proceeds on the assumption that
* the underlying alignment records are aleady in query-name sorted order (true for bwa). If
* they are not, the mergeAlignment method catches the IllegalStateException, forces a sort
* of the underlying alignment records, and tries again.
*
* @author ktibbett@broadinstitute.org
*/
public class SamAlignmentMerger extends AbstractAlignmentMerger {
private final Log log = Log.getInstance(SamAlignmentMerger.class);
private final List<File> alignedSamFile;
private final List<File> read1AlignedSamFile;
private final List<File> read2AlignedSamFile;
private final int maxGaps;
private boolean forceSort = false;
/**
* Constructor
*
* @param unmappedBamFile The BAM file that was used as the input to the aligner, which will
* include info on all the reads that did not map. Required.
* @param targetBamFile The file to which to write the merged SAM records. Required.
* @param referenceFasta The reference sequence for the map files. Required.
* @param programRecord Program record for taget file SAMRecords created.
* @param clipAdapters Whether adapters marked in unmapped BAM file should be marked as
* soft clipped in the merged bam. Required.
* @param bisulfiteSequence Whether the reads are bisulfite sequence (used when calculating the
* NM and UQ tags). Required.
* @param alignedReadsOnly Whether to output only those reads that have alignment data
* @param alignedSamFile The SAM file(s) with alignment information. Optional. If this is
* not provided, then read1AlignedSamFile and read2AlignedSamFile must be.
* @param maxGaps The maximum number of insertions or deletions permitted in an
* alignment. Alignments with more than this many gaps will be ignored.
* -1 means to allow any number of gaps.
* @param attributesToRetain attributes from the alignment record that should be
* removed when merging. This overrides attributesToRetain if they share
* common tags.
* @param read1BasesTrimmed The number of bases trimmed from start of read 1 prior to alignment. Optional.
* @param read2BasesTrimmed The number of bases trimmed from start of read 2 prior to alignment. Optional.
* @param read1AlignedSamFile The alignment records for read1. Used when the two ends of a read are
* aligned separately. This is optional, but must be specified if
* alignedSamFile is not.
* @param read2AlignedSamFile The alignment records for read1. Used when the two ends of a read are
* aligned separately. This is optional, but must be specified if
* alignedSamFile is not.
* @param expectedOrientations A List of SamPairUtil.PairOrientations that are expected for
* aligned pairs. Used to determine the properPair flag.
* @param sortOrder The order in which the merged records should be output. If null,
* output will be coordinate-sorted
* @param primaryAlignmentSelectionStrategy How to handle multiple alignments for a fragment or read pair,
* in which none are primary, or more than one is marked primary
* @param addMateCigar True if we are to add or maintain the mate CIGAR (MC) tag, false if we are to remove or not include.
*/
public SamAlignmentMerger(final File unmappedBamFile, final File targetBamFile, final File referenceFasta,
final SAMProgramRecord programRecord, final boolean clipAdapters, final boolean bisulfiteSequence,
final boolean alignedReadsOnly,
final List<File> alignedSamFile, final int maxGaps, final List<String> attributesToRetain,
final List<String> attributesToRemove,
final Integer read1BasesTrimmed, final Integer read2BasesTrimmed,
final List<File> read1AlignedSamFile, final List<File> read2AlignedSamFile,
final List<SamPairUtil.PairOrientation> expectedOrientations,
final SortOrder sortOrder,
final PrimaryAlignmentSelectionStrategy primaryAlignmentSelectionStrategy,
final boolean addMateCigar) {
super(unmappedBamFile, targetBamFile, referenceFasta, clipAdapters, bisulfiteSequence,
alignedReadsOnly, programRecord, attributesToRetain, attributesToRemove, read1BasesTrimmed,
read2BasesTrimmed, expectedOrientations, sortOrder, primaryAlignmentSelectionStrategy, addMateCigar);
if ((alignedSamFile == null || alignedSamFile.size() == 0) &&
(read1AlignedSamFile == null || read1AlignedSamFile.size() == 0 ||
read2AlignedSamFile == null || read2AlignedSamFile.size() == 0)) {
throw new IllegalArgumentException("Either alignedSamFile or BOTH of read1AlignedSamFile and " +
"read2AlignedSamFile must be specified.");
}
if (alignedSamFile != null) {
for (final File f : alignedSamFile) {
IOUtil.assertFileIsReadable(f);
}
} else {
for (final File f : read1AlignedSamFile) {
IOUtil.assertFileIsReadable(f);
}
for (final File f : read2AlignedSamFile) {
IOUtil.assertFileIsReadable(f);
}
}
this.alignedSamFile = alignedSamFile;
this.read1AlignedSamFile = read1AlignedSamFile;
this.read2AlignedSamFile = read2AlignedSamFile;
this.maxGaps = maxGaps;
if (programRecord == null) {
final File tmpFile = this.alignedSamFile != null && this.alignedSamFile.size() > 0
? this.alignedSamFile.get(0)
: this.read1AlignedSamFile.get(0);
final SAMFileReader tmpReader = new SAMFileReader(tmpFile);
tmpReader.setValidationStringency(ValidationStringency.SILENT);
if (tmpReader.getFileHeader().getProgramRecords().size() == 1) {
setProgramRecord(tmpReader.getFileHeader().getProgramRecords().get(0));
}
tmpReader.close();
}
log.info("Processing SAM file(s): " + alignedSamFile != null ? alignedSamFile : read1AlignedSamFile + "," + read2AlignedSamFile);
}
/**
* Merges the alignment from the map file with the non-aligned records from the source BAM file.
* Overrides mergeAlignment in AbstractAlignmentMerger. Tries first to proceed on the assumption
* that the alignment records are pre-sorted. If not, catches the exception, forces a sort, and
* tries again.
*/
public void mergeAlignment() {
try {
super.mergeAlignment();
}
catch(final IllegalStateException ise) {
log.warn("Exception merging bam alignment - attempting to sort aligned reads and try again: ", ise.getMessage());
forceSort = true;
resetRefSeqFileWalker();
super.mergeAlignment();
}
}
/**
* Reads the aligned SAM records into a SortingCollection and returns an iterator over that collection
*/
protected CloseableIterator<SAMRecord> getQuerynameSortedAlignedRecords() {
final CloseableIterator<SAMRecord> mergingIterator;
final SAMFileHeader header;
// When the alignment records, including both ends of a pair, are in SAM files
if (alignedSamFile != null && alignedSamFile.size() > 0) {
final List<SAMFileHeader> headers = new ArrayList<SAMFileHeader>(alignedSamFile.size());
final List<SAMFileReader> readers = new ArrayList<SAMFileReader>(alignedSamFile.size());
for (final File f : this.alignedSamFile) {
final SAMFileReader r = new SAMFileReader(f);
headers.add(r.getFileHeader());
readers.add(r);
}
final SamFileHeaderMerger headerMerger = new SamFileHeaderMerger(SortOrder.queryname, headers, false);
mergingIterator = new MergingSamRecordIterator(headerMerger, readers, true);
header = headerMerger.getMergedHeader();
}
// When the ends are aligned separately and don't have firstOfPair information correctly
// set we use this branch.
else {
mergingIterator = new SeparateEndAlignmentIterator(this.read1AlignedSamFile, this.read2AlignedSamFile);
header = ((SeparateEndAlignmentIterator)mergingIterator).getHeader();
}
if (!forceSort) {
return mergingIterator;
}
final SortingCollection<SAMRecord> alignmentSorter = SortingCollection.newInstance(SAMRecord.class,
new BAMRecordCodec(header), new SAMRecordQueryNameComparator(), MAX_RECORDS_IN_RAM);
int count = 0;
while (mergingIterator.hasNext()) {
alignmentSorter.add(mergingIterator.next());
count++;
if (count > 0 && count % 1000000 == 0) {
log.info("Read " + count + " records from alignment SAM/BAM.");
}
}
log.info("Finished reading " + count + " total records from alignment SAM/BAM.");
mergingIterator.close();
return new DelegatingIterator<SAMRecord>(alignmentSorter.iterator()) {
@Override
public void close() {
super.close();
alignmentSorter.cleanup();
}
};
}
private class SuffixTrimingSamRecordIterator implements CloseableIterator<SAMRecord> {
private final CloseableIterator<SAMRecord> underlyingIterator;
private final String suffixToTrim;
private SuffixTrimingSamRecordIterator(final CloseableIterator<SAMRecord> underlyingIterator, final String suffixToTrim) {
this.underlyingIterator = underlyingIterator;
this.suffixToTrim = suffixToTrim;
}
@Override
public void close() {
underlyingIterator.close();
}
@Override
public boolean hasNext() {
return underlyingIterator.hasNext();
}
@Override
public SAMRecord next() {
final SAMRecord rec = underlyingIterator.next();
final String readName = rec.getReadName();
if (readName.endsWith(suffixToTrim)) {
rec.setReadName(readName.substring(0, readName.length() - suffixToTrim.length()));
}
return rec;
}
@Override
public void remove() {
underlyingIterator.remove();
}
}
private class SeparateEndAlignmentIterator implements CloseableIterator<SAMRecord> {
private final PeekableIterator<SAMRecord> read1Iterator;
private final PeekableIterator<SAMRecord> read2Iterator;
private final SAMFileHeader header;
public SeparateEndAlignmentIterator(final List<File> read1Alignments, final List<File> read2Alignments) {
final List<SAMFileHeader> headers = new ArrayList<SAMFileHeader>();
final List<SAMFileReader> read1 = new ArrayList<SAMFileReader>(read1Alignments.size());
final List<SAMFileReader> read2 = new ArrayList<SAMFileReader>(read2Alignments.size());
for (final File f : read1Alignments) {
final SAMFileReader r = new SAMFileReader(f);
headers.add(r.getFileHeader());
read1.add(r);
}
for (final File f : read2Alignments) {
final SAMFileReader r = new SAMFileReader(f);
headers.add(r.getFileHeader());
read2.add(r);
}
final SamFileHeaderMerger headerMerger = new SamFileHeaderMerger(SAMFileHeader.SortOrder.coordinate, headers, false);
read1Iterator = new PeekableIterator<SAMRecord>(
new SuffixTrimingSamRecordIterator(new MergingSamRecordIterator(headerMerger, read1, true), "/1"));
read2Iterator = new PeekableIterator<SAMRecord>(
new SuffixTrimingSamRecordIterator(new MergingSamRecordIterator(headerMerger, read2, true), "/2"));
header = headerMerger.getMergedHeader();
}
public void close() {
read1Iterator.close();
read2Iterator.close();
}
public boolean hasNext() {
return read1Iterator.hasNext() || read2Iterator.hasNext();
}
public SAMRecord next() {
if (read1Iterator.hasNext()) {
if (read2Iterator.hasNext()) {
return (read1Iterator.peek().getReadName().compareTo(read2Iterator.peek().getReadName()) <= 0)
? setPairFlags(read1Iterator.next(), true)
: setPairFlags(read2Iterator.next(), false);
}
else {
return setPairFlags(read1Iterator.next(), true);
}
}
else {
return setPairFlags(read2Iterator.next(), false);
}
}
public void remove() {
throw new UnsupportedOperationException("remove() not supported");
}
public SAMFileHeader getHeader() { return this.header; }
private SAMRecord setPairFlags(final SAMRecord sam, final boolean firstOfPair) {
sam.setReadPairedFlag(true);
sam.setFirstOfPairFlag(firstOfPair);
sam.setSecondOfPairFlag(!firstOfPair);
return sam;
}
}
/**
* For now, we only ignore those alignments that have more than <code>maxGaps</code> insertions
* or deletions.
*/
protected boolean ignoreAlignment(final SAMRecord sam) {
if (maxGaps == -1) return false;
int gaps = 0;
for (final CigarElement el : sam.getCigar().getCigarElements()) {
if (el.getOperator() == CigarOperator.I || el.getOperator() == CigarOperator.D ) {
gaps++;
}
}
return gaps > maxGaps;
}
// Accessor for testing
public boolean getForceSort() {return this.forceSort; }
}