/* Copyright 2013 University of North Carolina at Chapel Hill. All rights reserved. */
package abra;
import java.util.ArrayList;
import java.util.List;
import net.sf.samtools.Cigar;
import net.sf.samtools.CigarElement;
import net.sf.samtools.CigarOperator;
import net.sf.samtools.SAMRecord;
/**
* Utility methods for dealing with SAMRecord
*
* @author Lisle E. Mose (lmose at unc dot edu)
*/
public class SAMRecordUtils {
/**
* Replace hard clips with soft clips.
*/
public static void replaceHardClips(SAMRecord read) {
Cigar cigar = read.getCigar();
if (cigar.getCigarElements().size() > 0) {
CigarElement firstElement = cigar.getCigarElement(0);
CigarElement lastElement = cigar.getCigarElement(cigar.numCigarElements()-1);
if ((firstElement.getOperator() == CigarOperator.H) ||
(lastElement.getOperator() == CigarOperator.H)) {
Cigar newCigar = new Cigar();
boolean isFirst = true;
for (CigarElement element : cigar.getCigarElements()) {
if (element.getOperator() != CigarOperator.H) {
newCigar.add(element);
} else {
CigarElement softClip = new CigarElement(element.getLength(), CigarOperator.S);
newCigar.add(softClip);
if (isFirst) {
read.setReadString(padBases(element.getLength()) + read.getReadString());
} else {
read.setReadString(read.getReadString() + padBases(element.getLength()));
}
}
isFirst = false;
}
read.setCigar(newCigar);
}
}
}
private static String padBases(int length) {
StringBuffer buf = new StringBuffer(length);
for (int i=0; i<length; i++) {
buf.append('N');
}
return buf.toString();
}
/**
* Remove leading or trailing soft clips from the input read.
* Does not modify a read entirely comprised of soft clips.
*/
public static void removeSoftClips(SAMRecord read) {
Cigar cigar = read.getCigar();
CigarElement firstElement = cigar.getCigarElement(0);
CigarElement lastElement = cigar.getCigarElement(cigar.numCigarElements()-1);
if ((firstElement.getOperator() == CigarOperator.S) ||
(lastElement.getOperator() == CigarOperator.S)) {
Cigar newCigar = new Cigar();
String bases = read.getReadString();
//String qualities = read.getBaseQualityString();
if (firstElement.getOperator() == CigarOperator.S) {
bases = bases.substring(firstElement.getLength(), bases.length());
//qualities = qualities.substring(firstElement.getLength(), qualities.length()-1);
} else {
newCigar.add(firstElement);
}
for (int i=1; i<cigar.numCigarElements()-1; i++) {
newCigar.add(cigar.getCigarElement(i));
}
if (lastElement.getOperator() == CigarOperator.S) {
bases = bases.substring(0, bases.length() - lastElement.getLength());
//qualities = qualities.substring(0, qualities.length() - lastElement.getLength() - 1);
} else {
newCigar.add(lastElement);
}
read.setCigar(newCigar);
read.setReadString(bases);
//read.setBaseQualityString(qualities);
}
}
public static Cigar subset(Cigar cigar, int startIdx, int endIdx) {
List<CigarElement> subElements = new ArrayList<CigarElement>();
// Find first element and index into first element
int len = 0;
int elemIdx = -1;
for (CigarElement elem : cigar.getCigarElements()) {
// Treat deletions as zero length.
int elemLength = elem.getOperator() == CigarOperator.D ? 0 : elem.getLength();
if (elemIdx < 0) {
// Find first element (Should never be a deletion)
int elemStart = len;
int elemStop = len + elemLength;
if ((startIdx >= elemStart) && (startIdx < elemStop)) {
elemIdx = startIdx - elemStart;
int elemLen = Math.min(elem.getLength()-elemIdx, endIdx-startIdx+1);
CigarElement newElem = new CigarElement(elemLen, elem.getOperator());
subElements.add(newElem);
}
len += elemLength;
} else if ((len + elemLength) <= endIdx) {
// Add this entire element
subElements.add(elem);
len += elemLength;
} else if (len <= endIdx) {
// Add part of last sub element (should never be a deletion)
CigarElement newElem = new CigarElement(endIdx-len+1, elem.getOperator());
subElements.add(newElem);
break;
} else {
break;
}
}
return new Cigar(subElements);
}
/**
* Calculates edit distance for the input read.
* If the input c2r is not null, compare to the actual reference.
* If c2r is null, check the input NM tag.
*/
public static int getEditDistance(SAMRecord read, CompareToReference2 c2r) {
Integer distance = null;
if (read.getReadUnmappedFlag()) {
distance = read.getReadLength();
} else if (c2r != null) {
//distance = c2r.numMismatches(read) + getNumIndelBases(read);
distance = c2r.numMismatches(read) + getNumIndelBases(read);
} else {
distance = read.getIntegerAttribute("NM");
if (distance == null) {
distance = read.getReadLength();
}
}
return distance;
}
/**
* Returns total length of deletions and insertions for the input read.
*/
public static int getNumIndelBases(SAMRecord read) {
int numIndelBases = 0;
for (CigarElement element : read.getCigar().getCigarElements()) {
if ((element.getOperator() == CigarOperator.D) || (element.getOperator() == CigarOperator.I)) {
numIndelBases += element.getLength();
}
}
return numIndelBases;
}
/**
* Returns original edit distance as set in YX tag.
*/
public static int getOrigEditDistance(SAMRecord read) {
Integer distance = null;
if (read.getReadUnmappedFlag()) {
distance = read.getReadLength();
} else {
distance = read.getIntegerAttribute("YX");
if (distance == null) {
distance = read.getReadLength();
}
}
return distance;
}
/**
* Convenience method for retrieving int attribute
*/
public static int getIntAttribute(SAMRecord read, String attribute) {
Integer num = read.getIntegerAttribute(attribute);
if (num == null) {
return 0;
} else {
return num;
}
}
/**
* Return the number of insertions and deletions in a SAMRecord
*/
public static int getNumIndels(SAMRecord read) {
int numIndels = 0;
for (CigarElement element : read.getCigar().getCigarElements()) {
if ((element.getOperator() == CigarOperator.D) || (element.getOperator() == CigarOperator.I)) {
numIndels += 1;
}
}
return numIndels;
}
/**
* Returns true if the updatedRead is essentially the same as the origRead
* minus soft clipping.
*/
public static boolean isSoftClipEquivalent(SAMRecord origRead, SAMRecord updatedRead) {
boolean isEquivalent = false;
if ((origRead.getCigarString().contains("S")) &&
(origRead.getReferenceName().equals(updatedRead.getReferenceName())) &&
(origRead.getReadNegativeStrandFlag() == updatedRead.getReadNegativeStrandFlag()) &&
(origRead.getCigarLength() > 1)) {
// Compare start positions
int nonClippedOrigStart = origRead.getAlignmentStart();
CigarElement firstElem = origRead.getCigar().getCigarElement(0);
if (firstElem.getOperator() == CigarOperator.S) {
nonClippedOrigStart -= firstElem.getLength();
}
if (nonClippedOrigStart == updatedRead.getAlignmentStart()) {
// Compare cigars
List<CigarElement> elems = new ArrayList<CigarElement>(origRead.getCigar().getCigarElements());
CigarElement first = elems.get(0);
// If first element is soft clipped, lengthen the second element
if (first.getOperator() == CigarOperator.S) {
CigarElement second = elems.get(1);
CigarElement newElem = new CigarElement(first.getLength() + second.getLength(), second.getOperator());
elems.set(1, newElem);
}
CigarElement last = elems.get(elems.size()-1);
if (last.getOperator() == CigarOperator.S) {
CigarElement nextToLast = elems.get(elems.size()-2);
CigarElement newElem = new CigarElement(last.getLength() + nextToLast.getLength(), nextToLast.getOperator());
elems.set(elems.size()-2, newElem);
}
List<CigarElement> newElems = new ArrayList<CigarElement>();
for (CigarElement elem : elems) {
if (elem.getOperator() != CigarOperator.S) {
newElems.add(elem);
}
}
Cigar convertedCigar = new Cigar(newElems);
if (convertedCigar.equals(updatedRead.getCigar())) {
isEquivalent = true;
}
}
}
return isEquivalent;
}
/**
* Returns a clone of the input read
*/
public static SAMRecord cloneRead(SAMRecord read) {
try {
return (SAMRecord) read.clone();
} catch (CloneNotSupportedException e) {
// Infamous "this should never happen" comment here.
e.printStackTrace();
throw new RuntimeException(e);
}
}
/**
* Returns true if the input read should be filtered
*/
public static boolean isFiltered(boolean isPairedEnd, SAMRecord read) {
// Filter out single end reads when in paired end mode.
return ((isPairedEnd) && (!read.getReadPairedFlag()));
}
/**
* Returns true if the input read is primary.
* i.e. Bit flag not secondary 0x200 or supplemental 0x800
*/
public static boolean isPrimary(SAMRecord read) {
return ((read.getFlags() & 0x800) == 0) && (!read.getNotPrimaryAlignmentFlag());
}
}