package versusSNP.blast;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import javax.swing.JOptionPane;
import versusSNP.Document;
import versusSNP.Parameter;
import versusSNP.AlignmentParser;
import versusSNP.blast.util.SNPFinder;
import versusSNP.gui.UICaption;
import versusSNP.gui.widgets.ProgressBar;
import versusSNP.util.Utils;
class BlastFilter {
private int score;
private int expect;
private int overlap;
private int identity1;
private int identity2;
public BlastFilter(int score, int expect, int overlap, int identity1, int identity2) {
super();
this.score = score;
this.expect = expect;
this.overlap = overlap;
this.identity1 = identity1;
this.identity2 = identity2;
}
public boolean test(BlastSet set) {
if (set.getScore() >= score && set.getExpect() <= expect
&& set.getOverlapPercent() >= overlap )
// && set.getIdentity1() >= identity1
// && set.getIdentity2() >= identity2)
return true;
else
return false;
}
}
public class BlastParser implements AlignmentParser, Runnable {
private String path;
private Document document;
public static final byte BLASTN = 1;
public static final byte BLASTP = 2;
public static final byte BLASTX = 3;
public static final byte TBLASTN = 4;
public static final byte TBLASTX = 5;
public byte program = BLASTN;
private BlastList blastList;
private static final Pattern m_Query = Pattern.compile("^Query= (\\S+)$");
private static final Pattern m_Subject = Pattern.compile("^>(\\S*)");
private static final Pattern m_Annotation = Pattern.compile("^>\\S*\\s+(\\S.*)$");
private static final Pattern m_QuerySeq = Pattern.compile("^Query:\\s+(\\d+)\\s+(\\S+)\\s+(\\d+)");
private static final Pattern m_SubjectSeq = Pattern.compile("^Sbjct:\\s+(\\d+)\\s+(\\S+)\\s+(\\d+)");
private static final Pattern m_Letters = Pattern.compile("^\\s*\\((\\S+)\\s+letters\\)");
private static final Pattern m_Length = Pattern.compile("^\\s*Length = (\\d+)");
private static final Pattern m_Score = Pattern.compile("^\\s*Score = (.+) bits.+Expect =\\s+(\\S+)\\s*");
private static final Pattern m_Score2 = Pattern.compile("Score = (.+) bits.+Expect\\(\\d+\\) =\\s+(\\S+)\\s*");
private static final Pattern m_Identities = Pattern.compile("^\\s*Identities = (\\d+)/(\\d+)\\s+\\((.{0,4})%\\)");
private static final Pattern m_Strand = Pattern.compile("^\\s*Strand = (\\S+)\\s*/\\s*(\\S+)");
private boolean isAnother, isAnnotation, isQuery, isSubject;
public BlastParser() {
super();
blastList = new BlastList();
}
public BlastParser(String path, Document document) {
this();
this.path = path;
this.document = document;
}
public BlastList getBlastList() {
return blastList;
}
public boolean parse(String path) {
try {
BufferedReader br = new BufferedReader(new InputStreamReader(
new FileInputStream(new File(path))));
switch (program) {
case BLASTN:
return parseBlastN(br);
default:
return false;
}
} catch (FileNotFoundException e) {
JOptionPane.showMessageDialog(null, UICaption.dialog_exception_file_io, UICaption.dialog_caption_error, JOptionPane.ERROR_MESSAGE);
return false;
}
}
private boolean parseBlastN(BufferedReader br) {
String line;
Matcher m;
BlastFilter filter = new BlastFilter(Parameter.blast_filter_score,
Parameter.blast_filter_expect,
Parameter.blast_filter_overlap,
Parameter.blast_filter_identity1,
Parameter.blast_filter_identity2);
SNPFinder snpFinder = new SNPFinder();
BlastSet set = new BlastSet();
try {
while ((line=br.readLine())!=null) {
if ((m=m_Query.matcher(line)).find()) {
if (isAnother) {
if (filter.test(set)) {
blastList.add(set);
}
isAnother = isQuery = isSubject = false;
snpFinder.find(set);
set = new BlastSet();
}
set.setQName(m.group(1));
} else if ((m=m_Letters.matcher(line)).find()) {
set.setQLen(m.group(1));
} else if ((m=m_Subject.matcher(line)).find()) {
if (isAnother) {
if (filter.test(set)) {
blastList.add(set);
}
isAnother = isQuery = isSubject = false;
snpFinder.find(set);
set = new BlastSet(set.getQName());
}
set.setSName(m.group(1));
if ((m=m_Annotation.matcher(line)).find()) {
set.setAnnotation(m.group(1));
}
isAnnotation = true;
} else if ((m=m_Length.matcher(line)).find()) {
set.setSLen(m.group(1));
isAnnotation = false;
} else if (isAnnotation) {
set.addAnnotation(Utils.removeSpaceFromBegin(line));
} else if ((m=m_Score.matcher(line)).find()||
(m=m_Score2.matcher(line)).find()) {
if (isAnother) {
if (filter.test(set)) {
blastList.add(set);
}
isAnother = isQuery = isSubject = false;
snpFinder.find(set);
set = new BlastSet(set.getQName(), set.getSName());
}
set.setScore(m.group(1));
set.setExpect(m.group(2));
} else if ((m=m_Identities.matcher(line)).find()) {
set.setIdentity1(m.group(1));
set.setIdentity2(m.group(2));
set.setOverlap(m.group(3));
} else if ((m=m_Strand.matcher(line)).find()) {
set.setStrand1(m.group(1));
set.setStrand2(m.group(2));
} else if ((m=m_QuerySeq.matcher(line)).find()) {
if (!isQuery) {
set.setQBegin(m.group(1));
}
set.addQSeq(m.group(2), m.group(1), m.group(3));
set.setQEnd(m.group(3));
isQuery = true;
} else if ((m=m_SubjectSeq.matcher(line)).find()) {
if (!isSubject) {
set.setSBegin(m.group(1));
}
set.addSSeq(m.group(2), m.group(1), m.group(3));
set.setSEnd(m.group(3));
isSubject = isAnother = true;
}
}
if (isAnother) {
if (filter.test(set)) {
blastList.add(set);
}
isAnother = isQuery = isSubject = false;
snpFinder.find(set);
set = new BlastSet(set.getQName(), set.getSName());
}
br.close();
System.gc();
return true;
} catch (IOException e) {
JOptionPane.showMessageDialog(null, UICaption.dialog_exception_file_io, UICaption.dialog_caption_error, JOptionPane.ERROR_MESSAGE);
return false;
} catch (NumberFormatException e) {
JOptionPane.showMessageDialog(null, UICaption.dialog_exception_parse_int_number_format, UICaption.dialog_caption_error, JOptionPane.ERROR_MESSAGE);
return false;
}
}
@Override
public void run() {
ProgressBar progressBar = new ProgressBar(UICaption.progress_caption_parse_blast, UICaption.progress_label_parse_blast, true);
progressBar.setVisible(true);
if (parse(path)) {
// document.addBlastList(blastList);
document.getQueryGenome().attachBlastSets(blastList, document.getSubjectGenome());
document.updateAllViews();
}
progressBar.dispose();
}
}