/* Copyright 2013 University of North Carolina at Chapel Hill. All rights reserved. */
package abra;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import net.sf.samtools.Cigar;
import net.sf.samtools.CigarElement;
import net.sf.samtools.CigarOperator;
import net.sf.samtools.SAMFileReader;
import net.sf.samtools.SAMRecord;
import net.sf.samtools.SAMFileReader.ValidationStringency;
public class CompareToReference {
private StringBuffer reference;
private String refFileName;
private String currSeqName = "";
private String cachedRefLine = null;
private BufferedReader refReader;
public void compare(String sam, String refFileName, int maxDiff) throws IOException, FileNotFoundException {
refReader = new BufferedReader(new FileReader(refFileName));
SAMFileReader reader = new SAMFileReader(new File(sam));
reader.setValidationStringency(ValidationStringency.SILENT);
int count = 0;
int totalMapped = 0;
for (SAMRecord read : reader) {
if (!read.getReadUnmappedFlag()) {
String seq = read.getReferenceName();
if (!seq.equals(currSeqName)) {
loadSeqRef(seq);
}
// String refStr = reference.substring(read.getAlignmentStart()-1, read.getAlignmentStart()+99);
// if (!refStr.equals(read.getReadString())) {
// StringBuffer diffStr = new StringBuffer();
if (numDifferences(read) > maxDiff) {
System.out.println("------------");
System.out.println("read: " + read.getSAMString());
// System.out.println("ref: " + refStr);
// System.out.println("dif: " + diffStr.toString());
count += 1;
}
totalMapped += 1;
}
}
System.out.println("count: " + count + " out of: " + totalMapped);
reader.close();
}
public void init(String reference) throws FileNotFoundException {
refReader = new BufferedReader(new FileReader(reference));
}
public void cleanup() throws IOException {
refReader.close();
}
public int numMismatches(SAMRecord read) throws IOException {
int mismatches = 0;
if (!read.getReadUnmappedFlag()) {
String seq = read.getReferenceName();
if (!seq.equals(currSeqName)) {
loadSeqRef(seq);
}
mismatches = numDifferences(read);
}
return mismatches;
}
private int numDifferences(SAMRecord read) {
int diffs = 0;
int readIdx = 0;
int refIdx = read.getAlignmentStart()-1;
for (CigarElement element : read.getCigar().getCigarElements()) {
if (element.getOperator() == CigarOperator.M) {
for (int i=0; i<element.getLength(); i++) {
char readBase = Character.toUpperCase(read.getReadString().charAt(readIdx));
char refBase = Character.toUpperCase(reference.charAt(refIdx));
if ((readBase != refBase) && (readBase != 'N') && (refBase != 'N')) {
diffs++;
}
readIdx++;
refIdx++;
}
} else if (element.getOperator() == CigarOperator.I) {
readIdx += element.getLength();
} else if (element.getOperator() == CigarOperator.D) {
refIdx += element.getLength();
} else if (element.getOperator() == CigarOperator.S) {
readIdx += element.getLength();
}
}
// int diffs = 0;
// int i = 0;
// while ((i < s1.length()) && (i < s2.length())) {
// int ch1 = Character.toUpperCase(s1.charAt(i));
// int ch2 = Character.toUpperCase(s2.charAt(i));
// if (ch1 != ch2 && ch1 != 'N' && ch2 != 'N') {
// diffs += 1;
// desc.append('*');
// } else {
// desc.append(' ');
// }
//
// i +=1;
// }
return diffs;
}
private void loadSeqRef(String seq) throws IOException {
System.out.println("Loading: " + seq);
this.currSeqName = seq;
String line = getRefLine();
while ((line != null) && (!line.equals(">" + seq))) {
line = getRefLine();
}
if (line != null) {
line = getRefLine();
}
reference = new StringBuffer();
while ((line != null) && (!line.startsWith(">"))) {
reference.append(line);
line = getRefLine();
if ((line != null) && (line.startsWith(">"))) {
cachedRefLine = line;
}
}
System.out.println("Loaded: " + seq);
}
private String getRefLine() throws IOException {
String line = null;
if (cachedRefLine != null) {
line = cachedRefLine;
cachedRefLine = null;
} else {
line = refReader.readLine();
}
return line;
}
public static void main(String[] args) throws Exception {
String ref = args[0];
String sam = args[1];
int max = Integer.parseInt(args[2]);
// String ref = "/home/lmose/reference/chr16/chr16.fa";
// String sam = "/home/lmose/dev/ayc/sim/sim80/sorted_rr4.bam";
// String sam = "/home/lmose/dev/ayc/sim/sim80/sorted_rr.bam";
//String sam = "/home/lmose/dev/ayc/sim/sim80/chr16.bam";
// String sam = "/home/lmose/dev/ayc/sim/sim80/rchr16.bam";
CompareToReference c2r = new CompareToReference();
c2r.compare(sam, ref, max);
}
}