/*
* The MIT License
*
* Copyright (c) 2009 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
package picard.sam;
import htsjdk.samtools.BAMRecordCodec;
import htsjdk.samtools.Cigar;
import htsjdk.samtools.CigarElement;
import htsjdk.samtools.CigarOperator;
import htsjdk.samtools.ReservedTagConstants;
import htsjdk.samtools.SAMFileHeader;
import htsjdk.samtools.SAMFileHeader.SortOrder;
import htsjdk.samtools.SAMFileReader;
import htsjdk.samtools.SAMFileWriter;
import htsjdk.samtools.SAMFileWriterFactory;
import htsjdk.samtools.SAMProgramRecord;
import htsjdk.samtools.SAMRecord;
import htsjdk.samtools.SAMRecordCoordinateComparator;
import htsjdk.samtools.SAMRecordQueryNameComparator;
import htsjdk.samtools.SAMRecordUtil;
import htsjdk.samtools.SAMSequenceDictionary;
import htsjdk.samtools.SAMSequenceRecord;
import htsjdk.samtools.SAMTag;
import htsjdk.samtools.SAMUtils;
import htsjdk.samtools.SamPairUtil;
import htsjdk.samtools.SamReader;
import htsjdk.samtools.SamReaderFactory;
import htsjdk.samtools.filter.FilteringIterator;
import htsjdk.samtools.filter.SamRecordFilter;
import htsjdk.samtools.reference.ReferenceSequenceFileWalker;
import htsjdk.samtools.util.CigarUtil;
import htsjdk.samtools.util.CloseableIterator;
import htsjdk.samtools.util.CloserUtil;
import htsjdk.samtools.util.IOUtil;
import htsjdk.samtools.util.Log;
import htsjdk.samtools.util.ProgressLogger;
import htsjdk.samtools.util.SequenceUtil;
import htsjdk.samtools.util.SortingCollection;
import picard.PicardException;
import java.io.File;
import java.text.DecimalFormat;
import java.text.NumberFormat;
import java.util.ArrayList;
import java.util.List;
/**
* Abstract class that coordinates the general task of taking in a set of alignment information,
* possibly in SAM format, possibly in other formats, and merging that with the set of all reads
* for which alignment was attempted, stored in an unmapped SAM file.
*
* The order of processing is as follows:
*
* 1. Get records from the unmapped bam and the alignment data
* 2. Merge the alignment information and public tags ONLY from the aligned SAMRecords
* 3. Do additional modifications -- handle clipping, trimming, etc.
* 4. Fix up mate information on paired reads
* 5. Do a final calculation of the NM and UQ tags.
* 6. Write the records to the output file.
*
* Concrete subclasses which extend AbstractAlignmentMerger should implement getQueryNameSortedAlignedRecords.
* If these records are not in queryname order, mergeAlignment will throw an IllegalStateException.
*
* Subclasses may optionally implement ignoreAlignment(), which can be used to skip over certain alignments.
*
*
* @author ktibbett@broadinstitute.org
*/
public abstract class AbstractAlignmentMerger {
public static final int MAX_RECORDS_IN_RAM = 500000;
private static final char[] RESERVED_ATTRIBUTE_STARTS = {'X','Y', 'Z'};
private final Log log = Log.getInstance(AbstractAlignmentMerger.class);
private final ProgressLogger progress = new ProgressLogger(this.log, 1000000, "Written to sorting collection in queryname order", "records");
private final File unmappedBamFile;
private final File targetBamFile;
private final SAMSequenceDictionary sequenceDictionary;
private ReferenceSequenceFileWalker refSeq = null;
private final boolean clipAdapters;
private final boolean bisulfiteSequence;
private SAMProgramRecord programRecord;
private final boolean alignedReadsOnly;
private final SAMFileHeader header;
private final List<String> attributesToRetain = new ArrayList<String>();
private final List<String> attributesToRemove = new ArrayList<String>();
private final File referenceFasta;
private final Integer read1BasesTrimmed;
private final Integer read2BasesTrimmed;
private final List<SamPairUtil.PairOrientation> expectedOrientations;
private final SortOrder sortOrder;
private MultiHitAlignedReadIterator alignedIterator = null;
private boolean clipOverlappingReads = true;
private int maxRecordsInRam = MAX_RECORDS_IN_RAM;
private final PrimaryAlignmentSelectionStrategy primaryAlignmentSelectionStrategy;
private boolean keepAlignerProperPairFlags = false;
private boolean addMateCigar = false;
private final SamRecordFilter alignmentFilter = new SamRecordFilter() {
public boolean filterOut(final SAMRecord record) {
return ignoreAlignment(record);
}
public boolean filterOut(final SAMRecord first, final SAMRecord second) {
throw new UnsupportedOperationException("Paired SamRecordFilter not implemented!");
}
};
private boolean includeSecondaryAlignments = true;
protected abstract CloseableIterator<SAMRecord> getQuerynameSortedAlignedRecords();
protected boolean ignoreAlignment(final SAMRecord sam) { return false; } // default implementation
/**
* Constructor
*
* @param unmappedBamFile The BAM file that was used as the input to the aligner, which will
* include info on all the reads that did not map. Required.
* @param targetBamFile The file to which to write the merged SAM records. Required.
* @param referenceFasta The reference sequence for the map files. Required.
* @param clipAdapters Whether adapters marked in unmapped BAM file should be marked as
* soft clipped in the merged bam. Required.
* @param bisulfiteSequence Whether the reads are bisulfite sequence (used when calculating the
* NM and UQ tags). Required.
* @param alignedReadsOnly Whether to output only those reads that have alignment data
* @param programRecord Program record for target file SAMRecords created.
* @param attributesToRetain private attributes from the alignment record that should be
* included when merging. This overrides the exclusion of
* attributes whose tags start with the reserved characters
* of X, Y, and Z
* @param attributesToRemove attributes from the alignment record that should be
* removed when merging. This overrides attributesToRetain if they share
* common tags.
* @param read1BasesTrimmed The number of bases trimmed from start of read 1 prior to alignment. Optional.
* @param read2BasesTrimmed The number of bases trimmed from start of read 2 prior to alignment. Optional.
* @param expectedOrientations A List of SamPairUtil.PairOrientations that are expected for
* aligned pairs. Used to determine the properPair flag.
* @param sortOrder The order in which the merged records should be output. If null,
* output will be coordinate-sorted
* @param primaryAlignmentSelectionStrategy What to do when there are multiple primary alignments, or multiple
* alignments but none primary, for a read or read pair.
* @param addMateCigar True if we are to add or maintain the mate CIGAR (MC) tag, false if we are to remove or not include.
*/
public AbstractAlignmentMerger(final File unmappedBamFile, final File targetBamFile,
final File referenceFasta, final boolean clipAdapters,
final boolean bisulfiteSequence, final boolean alignedReadsOnly,
final SAMProgramRecord programRecord, final List<String> attributesToRetain,
final List<String> attributesToRemove,
final Integer read1BasesTrimmed, final Integer read2BasesTrimmed,
final List<SamPairUtil.PairOrientation> expectedOrientations,
final SAMFileHeader.SortOrder sortOrder,
final PrimaryAlignmentSelectionStrategy primaryAlignmentSelectionStrategy,
final boolean addMateCigar) {
IOUtil.assertFileIsReadable(unmappedBamFile);
IOUtil.assertFileIsWritable(targetBamFile);
IOUtil.assertFileIsReadable(referenceFasta);
this.unmappedBamFile = unmappedBamFile;
this.targetBamFile = targetBamFile;
this.referenceFasta = referenceFasta;
this.refSeq = new ReferenceSequenceFileWalker(referenceFasta);
this.sequenceDictionary = refSeq.getSequenceDictionary();
if (this.sequenceDictionary == null) {
throw new PicardException("No sequence dictionary found for " + referenceFasta.getAbsolutePath() +
". Use CreateSequenceDictionary.jar to create a sequence dictionary.");
}
this.clipAdapters = clipAdapters;
this.bisulfiteSequence = bisulfiteSequence;
this.alignedReadsOnly = alignedReadsOnly;
this.header = new SAMFileHeader();
this.sortOrder = sortOrder != null ? sortOrder : SortOrder.coordinate;
header.setSortOrder(SortOrder.coordinate);
if (programRecord != null) {
setProgramRecord(programRecord);
}
header.setSequenceDictionary(this.sequenceDictionary);
if (attributesToRetain != null) {
this.attributesToRetain.addAll(attributesToRetain);
}
if (attributesToRemove != null) {
this.attributesToRemove.addAll(attributesToRemove);
// attributesToRemove overrides attributesToRetain
if (!this.attributesToRetain.isEmpty()) {
for (String attribute : this.attributesToRemove) {
if (this.attributesToRetain.contains(attribute)) {
log.info("Overriding retaining the " + attribute + " tag since remove overrides retain.");
this.attributesToRetain.remove(attribute);
}
}
}
}
this.read1BasesTrimmed = read1BasesTrimmed;
this.read2BasesTrimmed = read2BasesTrimmed;
this.expectedOrientations = expectedOrientations;
this.primaryAlignmentSelectionStrategy = primaryAlignmentSelectionStrategy;
this.addMateCigar = addMateCigar;
}
/** Allows the caller to override the maximum records in RAM. */
public void setMaxRecordsInRam(final int maxRecordsInRam) {
this.maxRecordsInRam = maxRecordsInRam;
}
/**
* Do this unconditionally, not just for aligned records, for two reasons:
* - An unaligned read has been processed by the aligner, so it is more truthful.
* - When chaining additional PG records, having all the records in the output file refer to the same PG
* record means that only one chain will need to be created, rather than a chain for the mapped reads
* and a separate chain for the unmapped reads.
*/
private void maybeSetPgTag(final SAMRecord rec) {
if (this.programRecord != null) {
rec.setAttribute(ReservedTagConstants.PROGRAM_GROUP_ID, this.programRecord.getProgramGroupId());
}
}
/**
/**
* Merges the alignment data with the non-aligned records from the source BAM file.
*/
public void mergeAlignment() {
// Open the file of unmapped records and write the read groups to the the header for the merged file
final SamReader unmappedSam = SamReaderFactory.makeDefault().open(this.unmappedBamFile);
// Check that the program record we are going to insert is not already used in the unmapped SAM
if (getProgramRecord() != null) {
for (final SAMProgramRecord pg : unmappedSam.getFileHeader().getProgramRecords()) {
if (pg.getId().equals(getProgramRecord().getId())) {
throw new PicardException("Program Record ID already in use in unmapped BAM file.");
}
}
}
final CloseableIterator<SAMRecord> unmappedIterator = unmappedSam.iterator();
this.header.setReadGroups(unmappedSam.getFileHeader().getReadGroups());
int aligned = 0;
int unmapped = 0;
// Get the aligned records and set up the first one
alignedIterator = new MultiHitAlignedReadIterator(new FilteringIterator(getQuerynameSortedAlignedRecords(), alignmentFilter), primaryAlignmentSelectionStrategy);
HitsForInsert nextAligned = nextAligned();
// Create the sorting collection that will write the records in the coordinate order
// to the final bam file
final SortingCollection<SAMRecord> sorted = SortingCollection.newInstance(
SAMRecord.class, new BAMRecordCodec(header), new SAMRecordCoordinateComparator(),
MAX_RECORDS_IN_RAM);
while (unmappedIterator.hasNext()) {
// Load next unaligned read or read pair.
final SAMRecord rec = unmappedIterator.next();
rec.setHeader(this.header);
maybeSetPgTag(rec);
final SAMRecord secondOfPair;
if (rec.getReadPairedFlag()) {
secondOfPair = unmappedIterator.next();
secondOfPair.setHeader(this.header);
maybeSetPgTag(secondOfPair);
// Validate that paired reads arrive as first of pair followed by second of pair
if (!rec.getReadName().equals(secondOfPair.getReadName()))
throw new PicardException("Second read from pair not found in unmapped bam: " + rec.getReadName() + ", " + secondOfPair.getReadName());
if (!rec.getFirstOfPairFlag()) throw new PicardException("First record in unmapped bam is not first of pair: " + rec.getReadName());
if (!secondOfPair.getReadPairedFlag()) throw new PicardException("Second record in unmapped bam is not marked as paired: " + secondOfPair.getReadName());
if (!secondOfPair.getSecondOfPairFlag()) throw new PicardException("Second record in unmapped bam is not second of pair: " + secondOfPair.getReadName());
}
else {
secondOfPair = null;
}
// See if there are alignments for current unaligned read or read pair.
if (nextAligned != null && rec.getReadName().equals(nextAligned.getReadName())) {
// If there are multiple alignments for a read (pair), then the unaligned SAMRecord must be cloned
// before copying info from the aligned record to the unaligned.
final boolean clone = nextAligned.numHits() > 1 || nextAligned.hasSupplementalHits();
SAMRecord r1Primary = null, r2Primary = null;
if (rec.getReadPairedFlag()) {
for (int i = 0; i < nextAligned.numHits(); ++i) {
// firstAligned or secondAligned may be null, if there wasn't an alignment for the end,
// or if the alignment was rejected by ignoreAlignment.
final SAMRecord firstAligned = nextAligned.getFirstOfPair(i);
final SAMRecord secondAligned = nextAligned.getSecondOfPair(i);
final boolean isPrimaryAlignment = (firstAligned != null && !firstAligned.isSecondaryOrSupplementary()) ||
(secondAligned != null && !secondAligned.isSecondaryOrSupplementary());
final SAMRecord firstToWrite;
final SAMRecord secondToWrite;
if (clone) {
firstToWrite = clone(rec);
secondToWrite = clone(secondOfPair);
} else {
firstToWrite = rec;
secondToWrite = secondOfPair;
}
// If these are the primary alignments then stash them for use on any supplemental alignments
if (isPrimaryAlignment) {
r1Primary = firstToWrite;
r2Primary = secondToWrite;
}
transferAlignmentInfoToPairedRead(firstToWrite, secondToWrite, firstAligned, secondAligned);
// Only write unmapped read when it has the mate info from the primary alignment.
if (!firstToWrite.getReadUnmappedFlag() || isPrimaryAlignment) {
addIfNotFiltered(sorted, firstToWrite);
if (firstToWrite.getReadUnmappedFlag()) ++unmapped;
else ++aligned;
}
if (!secondToWrite.getReadUnmappedFlag() || isPrimaryAlignment) {
addIfNotFiltered(sorted, secondToWrite);
if (!secondToWrite.getReadUnmappedFlag()) ++aligned;
else ++unmapped;
}
}
// Take all of the supplemental reads which had been stashed and add them (as appropriate) to sorted
for (final boolean isRead1 : new boolean[]{true,false}) {
final List<SAMRecord> supplementals = isRead1 ? nextAligned.getSupplementalFirstOfPairOrFragment() : nextAligned.getSupplementalSecondOfPair();
final SAMRecord sourceRec = isRead1 ? rec : secondOfPair;
final SAMRecord matePrimary = isRead1 ? r2Primary : r1Primary;
for (final SAMRecord supp : supplementals) {
final SAMRecord out = clone(sourceRec);
transferAlignmentInfoToFragment(out, supp);
if (matePrimary != null) SamPairUtil.setMateInformationOnSupplementalAlignment(out, matePrimary, addMateCigar);
++aligned;
addIfNotFiltered(sorted, out);
}
}
}
else {
for (int i = 0; i < nextAligned.numHits(); ++i) {
final SAMRecord recToWrite = clone ? clone(rec) : rec;
transferAlignmentInfoToFragment(recToWrite, nextAligned.getFragment(i));
addIfNotFiltered(sorted, recToWrite);
if (recToWrite.getReadUnmappedFlag()) ++unmapped;
else ++aligned;
}
// Take all of the supplemental reads which had been stashed and add them (as appropriate) to sorted
for (final SAMRecord supplementalRec : nextAligned.getSupplementalFirstOfPairOrFragment()) {
final SAMRecord recToWrite = clone(rec);
transferAlignmentInfoToFragment(recToWrite, supplementalRec);
addIfNotFiltered(sorted, recToWrite);
++aligned;
}
}
nextAligned = nextAligned();
} else {
// There was no alignment for this read or read pair.
if (nextAligned != null &&
SAMRecordQueryNameComparator.compareReadNames(rec.getReadName(), nextAligned.getReadName()) > 0) {
throw new IllegalStateException("Aligned record iterator (" + nextAligned.getReadName() +
") is behind the unmapped reads (" + rec.getReadName() + ")");
}
// No matching read from alignedIterator -- just output reads as is.
if (!alignedReadsOnly) {
sorted.add(rec);
++unmapped;
if (secondOfPair != null) {
sorted.add(secondOfPair);
++unmapped;
}
}
}
}
unmappedIterator.close();
if (alignedIterator.hasNext()) {
throw new IllegalStateException("Reads remaining on alignment iterator: " + alignedIterator.next().getReadName() + "!");
}
alignedIterator.close();
// Write the records to the output file in specified sorted order,
header.setSortOrder(this.sortOrder);
final boolean presorted = this.sortOrder == SortOrder.coordinate;
final SAMFileWriter writer = new SAMFileWriterFactory().makeSAMOrBAMWriter(header, presorted, this.targetBamFile);
writer.setProgressLogger(
new ProgressLogger(log, (int) 1e7, "Wrote", "records from a sorting collection"));
final ProgressLogger finalProgress = new ProgressLogger(log, 10000000, "Written in coordinate order to output", "records");
for (final SAMRecord rec : sorted) {
if (!rec.getReadUnmappedFlag()) {
if (refSeq != null) {
final byte[] referenceBases = refSeq.get(sequenceDictionary.getSequenceIndex(rec.getReferenceName())).getBases();
rec.setAttribute(SAMTag.NM.name(), SequenceUtil.calculateSamNmTag(rec, referenceBases, 0, bisulfiteSequence));
if (rec.getBaseQualities() != SAMRecord.NULL_QUALS) {
rec.setAttribute(SAMTag.UQ.name(), SequenceUtil.sumQualitiesOfMismatches(rec, referenceBases, 0, bisulfiteSequence));
}
}
}
writer.addAlignment(rec);
finalProgress.record(rec);
}
writer.close();
sorted.cleanup();
log.info("Wrote " + aligned + " alignment records and " + (alignedReadsOnly ? 0 : unmapped) + " unmapped reads.");
}
/**
* Add record if it is primary or optionally secondary.
*/
private void addIfNotFiltered(final SortingCollection<SAMRecord> sorted, final SAMRecord rec) {
if (includeSecondaryAlignments || !rec.getNotPrimaryAlignmentFlag()) {
sorted.add(rec);
this.progress.record(rec);
}
}
private SAMRecord clone(final SAMRecord rec) {
try {
return (SAMRecord)rec.clone();
} catch (CloneNotSupportedException e) {
throw new PicardException("Should never happen.");
}
}
/**
* @return Next read's alignment(s) from aligned input or null, if there are no more.
* The alignments are run through ignoreAlignment() filter before being returned, which may result
* in an entire read being skipped if all alignments for that read should be ignored.
*/
private HitsForInsert nextAligned() {
if (alignedIterator.hasNext()) return alignedIterator.next();
return null;
}
/**
* Copies alignment info from aligned to unaligned read, clips as appropriate, and sets PG ID.
* @param unaligned Original SAMRecord, and object into which values are copied.
* @param aligned Holds alignment info that will be copied into unaligned.
*/
private void transferAlignmentInfoToFragment(final SAMRecord unaligned, final SAMRecord aligned) {
setValuesFromAlignment(unaligned, aligned);
updateCigarForTrimmedOrClippedBases(unaligned, aligned);
if (SAMUtils.cigarMapsNoBasesToRef(unaligned.getCigar())) {
SAMUtils.makeReadUnmapped(unaligned);
} else if (SAMUtils.recordMapsEntirelyBeyondEndOfReference(aligned)) {
log.warn("Record mapped off end of reference; making unmapped: " + aligned);
SAMUtils.makeReadUnmapped(unaligned);
}
}
/**
* Copies alignment info from aligned to unaligned read, if there is an alignment, and sets mate information.
* @param firstUnaligned Original first of pair, into which alignment and pair info will be written.
* @param secondUnaligned Original second of pair, into which alignment and pair info will be written.
* @param firstAligned Aligned first of pair, or null if no alignment.
* @param secondAligned Aligned second of pair, or null if no alignment.
*/
private void transferAlignmentInfoToPairedRead(final SAMRecord firstUnaligned, final SAMRecord secondUnaligned, final SAMRecord firstAligned, final SAMRecord secondAligned) {
if (firstAligned != null) transferAlignmentInfoToFragment(firstUnaligned, firstAligned);
if (secondAligned != null) transferAlignmentInfoToFragment(secondUnaligned, secondAligned);
if (isClipOverlappingReads()) clipForOverlappingReads(firstUnaligned, secondUnaligned);
SamPairUtil.setMateInfo(secondUnaligned, firstUnaligned, header, addMateCigar);
if (!keepAlignerProperPairFlags) {
SamPairUtil.setProperPairFlags(secondUnaligned, firstUnaligned, expectedOrientations);
}
}
/**
* Checks to see whether the ends of the reads overlap and soft clips reads
* them if necessary.
*/
protected void clipForOverlappingReads(final SAMRecord read1, final SAMRecord read2) {
// If both reads are mapped, see if we need to clip the ends due to small
// insert size
if (!(read1.getReadUnmappedFlag() || read2.getReadUnmappedFlag())) {
if (read1.getReadNegativeStrandFlag() != read2.getReadNegativeStrandFlag())
{
final SAMRecord pos = (read1.getReadNegativeStrandFlag()) ? read2 : read1;
final SAMRecord neg = (read1.getReadNegativeStrandFlag()) ? read1 : read2;
// Innies only -- do we need to do anything else about jumping libraries?
if (pos.getAlignmentStart() < neg.getAlignmentEnd()) {
final int posDiff = pos.getAlignmentEnd() - neg.getAlignmentEnd();
final int negDiff = pos.getAlignmentStart() - neg.getAlignmentStart();
if (posDiff > 0) {
CigarUtil.softClip3PrimeEndOfRead(pos, Math.min(pos.getReadLength(),
pos.getReadLength() - posDiff + 1));
}
if (negDiff > 0) {
CigarUtil.softClip3PrimeEndOfRead(neg, Math.min(neg.getReadLength(),
neg.getReadLength() - negDiff + 1));
}
}
}
else {
// TODO: What about RR/FF pairs?
}
}
}
/**
* Sets the values from the alignment record on the unaligned BAM record. This
* preserves all data from the unaligned record (ReadGroup, NoiseRead status, etc)
* and adds all the alignment info
*
* @param rec The unaligned read record
* @param alignment The alignment record
*/
protected void setValuesFromAlignment(final SAMRecord rec, final SAMRecord alignment) {
for (final SAMRecord.SAMTagAndValue attr : alignment.getAttributes()) {
// Copy over any non-reserved attributes. attributesToRemove overrides attributesToRetain.
if ((!isReservedTag(attr.tag) || this.attributesToRetain.contains(attr.tag)) && !this.attributesToRemove.contains(attr.tag)) {
rec.setAttribute(attr.tag, attr.value);
}
}
rec.setReadUnmappedFlag(alignment.getReadUnmappedFlag());
// Note that it is important to get reference names rather than indices in case the sequence dictionaries
// in the two files are in different orders.
rec.setReferenceName(alignment.getReferenceName());
rec.setAlignmentStart(alignment.getAlignmentStart());
rec.setReadNegativeStrandFlag(alignment.getReadNegativeStrandFlag());
rec.setNotPrimaryAlignmentFlag(alignment.getNotPrimaryAlignmentFlag());
rec.setSupplementaryAlignmentFlag(alignment.getSupplementaryAlignmentFlag());
if (!alignment.getReadUnmappedFlag()) {
// only aligned reads should have cigar and mapping quality set
rec.setCigar(alignment.getCigar()); // cigar may change when a
// clipCigar called below
rec.setMappingQuality(alignment.getMappingQuality());
}
if (rec.getReadPairedFlag()) {
rec.setProperPairFlag(alignment.getProperPairFlag());
// Mate info and alignment size will get set by the ClippedPairFixer.
}
// If it's on the negative strand, reverse complement the bases
// and reverse the order of the qualities
if (rec.getReadNegativeStrandFlag()) {
SAMRecordUtil.reverseComplement(rec);
}
}
private static Cigar createNewCigarIfMapsOffEndOfReference(SAMFileHeader header,
boolean isUnmapped,
int referenceIndex,
int alignmentEnd,
int readLength,
Cigar oldCigar) {
Cigar newCigar = null;
if (!isUnmapped) {
final SAMSequenceRecord refseq = header.getSequence(referenceIndex);
final int overhang = alignmentEnd - refseq.getSequenceLength();
if (overhang > 0) {
// 1-based index of first base in read to clip.
int clipFrom = readLength - overhang + 1;
// we have to check if the last element is soft-clipping, so we can subtract that from clipFrom
final CigarElement cigarElement = oldCigar.getCigarElement(oldCigar.getCigarElements().size()-1);
if (CigarOperator.SOFT_CLIP == cigarElement.getOperator()) clipFrom -= cigarElement.getLength();
final List<CigarElement> newCigarElements = CigarUtil.softClipEndOfRead(clipFrom, oldCigar.getCigarElements());
newCigar = new Cigar(newCigarElements);
}
}
return newCigar;
}
/**
* Soft-clip an alignment that hangs off the end of its reference sequence. Checks both the read and its mate,
* if available.
* @param rec
*/
public static void createNewCigarsIfMapsOffEndOfReference(final SAMRecord rec) {
// If the read maps off the end of the alignment, clip it
if (!rec.getReadUnmappedFlag()) {
final Cigar readCigar = createNewCigarIfMapsOffEndOfReference(rec.getHeader(),
rec.getReadUnmappedFlag(),
rec.getReferenceIndex(),
rec.getAlignmentEnd(),
rec.getReadLength(),
rec.getCigar());
if (null != readCigar) rec.setCigar(readCigar);
}
// If the read's mate maps off the end of the alignment, clip it
if (SAMUtils.hasMateCigar(rec)) {
Cigar mateCigar = SAMUtils.getMateCigar(rec);
mateCigar = createNewCigarIfMapsOffEndOfReference(rec.getHeader(),
rec.getMateUnmappedFlag(),
rec.getMateReferenceIndex(),
SAMUtils.getMateAlignmentEnd(rec), // NB: this could be computed without another call to getMateCigar
mateCigar.getReadLength(),
mateCigar);
if (null != mateCigar) rec.setAttribute(SAMTag.MC.name(), mateCigar.toString());
}
}
protected void updateCigarForTrimmedOrClippedBases(final SAMRecord rec, final SAMRecord alignment) {
// If the read was trimmed or not all the bases were sent for alignment, clip it
final int alignmentReadLength = alignment.getReadLength();
final int originalReadLength = rec.getReadLength();
final int trimmed = (!rec.getReadPairedFlag()) || rec.getFirstOfPairFlag()
? this.read1BasesTrimmed != null ? this.read1BasesTrimmed : 0
: this.read2BasesTrimmed != null ? this.read2BasesTrimmed : 0;
final int notWritten = originalReadLength - (alignmentReadLength + trimmed);
// Update cigar if the mate maps off the reference
createNewCigarsIfMapsOffEndOfReference(rec);
rec.setCigar(CigarUtil.addSoftClippedBasesToEndsOfCigar(
rec.getCigar(), rec.getReadNegativeStrandFlag(), notWritten, trimmed));
// If the adapter sequence is marked and clipAdapter is true, clip it
if (this.clipAdapters && rec.getAttribute(ReservedTagConstants.XT) != null){
CigarUtil.softClip3PrimeEndOfRead(rec, rec.getIntegerAttribute(ReservedTagConstants.XT));
}
}
protected SAMSequenceDictionary getSequenceDictionary() { return this.sequenceDictionary; }
protected SAMProgramRecord getProgramRecord() { return this.programRecord; }
protected void setProgramRecord(final SAMProgramRecord pg ) {
if (this.programRecord != null) {
throw new IllegalStateException("Cannot set program record more than once on alignment merger.");
}
this.programRecord = pg;
this.header.addProgramRecord(pg);
SAMUtils.chainSAMProgramRecord(header, pg);
}
protected boolean isReservedTag(final String tag) {
final char firstCharOfTag = tag.charAt(0);
// All tags that start with a lower-case letter are user defined and should not be overridden by aligner
// unless explicitly specified in attributesToRetain.
if (Character.isLowerCase(firstCharOfTag)) return true;
for (final char c : RESERVED_ATTRIBUTE_STARTS) {
if (firstCharOfTag == c) return true;
}
return false;
}
protected SAMFileHeader getHeader() { return this.header; }
protected void resetRefSeqFileWalker() {
this.refSeq = new ReferenceSequenceFileWalker(referenceFasta);
}
public boolean isClipOverlappingReads() {
return clipOverlappingReads;
}
public void setClipOverlappingReads(final boolean clipOverlappingReads) {
this.clipOverlappingReads = clipOverlappingReads;
}
public boolean isKeepAlignerProperPairFlags() {
return keepAlignerProperPairFlags;
}
/**
* If true, keep the aligner's idea of proper pairs rather than letting alignment merger decide.
*/
public void setKeepAlignerProperPairFlags(final boolean keepAlignerProperPairFlags) {
this.keepAlignerProperPairFlags = keepAlignerProperPairFlags;
}
public void setIncludeSecondaryAlignments(final boolean includeSecondaryAlignments) {
this.includeSecondaryAlignments = includeSecondaryAlignments;
}
public void close() {
CloserUtil.close(this.refSeq);
}
}