package bgu.bio.algorithms.alignment;
import gnu.trove.list.array.TCharArrayList;
import bgu.bio.util.AffineGapScoringMatrix;
import bgu.bio.util.IdentityAffineScoringMatrix;
import bgu.bio.util.MathOperations;
import bgu.bio.util.alphabet.AlphabetUtils;
import bgu.bio.util.alphabet.RnaAlphabet;
public class SemiAffineGapSemiLocalSequenceAlignment implements
SequenceAlignment {
/**
* The first input string
*/
private char[] str1;
/**
* The second input String
*/
private char[] str2;
/**
* The lengths of the input strings
*/
private int length1, length2;
/**
* The score matrix. The true scores should be divided by the normalization
* factor.
*/
private double[][] dpTable;
/** see {@link #dpTable} */
private double[][] dpTableE;
/** see {@link #dpTable} */
private double[][] dpTableF;
private AffineGapScoringMatrix scoringMatrix;
/**
* Constants of directions. Multiple directions are stored by bits. The zero
* direction is the starting point.
*/
static final byte DR_LEFT = 1; // 0001
static final byte DR_UP = 2; // 0010
static final byte DR_DIAG = 4; // 0100
static final byte DR_ZERO = 8; // 1000
/**
* The directions pointing to the cells that give the maximum score at the
* current cell. The first index is the column index. The second index is
* the row index.
*/
private byte[][] prevCells, prevCellsE, prevCellsF;
private AlphabetUtils alphabet;
private double normFactor = 1;
public SemiAffineGapSemiLocalSequenceAlignment(String str1, String str2,
AlphabetUtils alphabet, AffineGapScoringMatrix matrix) {
this.scoringMatrix = matrix;
this.alphabet = alphabet;
this.dpTable = new double[0][0];
this.dpTableE = new double[0][0];
this.dpTableF = new double[0][0];
this.setSequences(str1, str2);
}
public SemiAffineGapSemiLocalSequenceAlignment(int size1, int size2,
AlphabetUtils alphabet, AffineGapScoringMatrix matrix) {
this.scoringMatrix = matrix;
this.alphabet = alphabet;
length1 = size1;
length2 = size2;
dpTable = new double[length1 + 1][length2 + 1];
dpTableE = new double[length1 + 1][length2 + 1];
dpTableF = new double[length1 + 1][length2 + 1];
prevCells = new byte[length1 + 1][length2 + 1];
prevCellsE = new byte[length1 + 1][length2 + 1];
prevCellsF = new byte[length1 + 1][length2 + 1];
}
@Override
public void setSequences(String str1, String str2) {
this.setSequences(str1.toCharArray(), str2.toCharArray());
}
@Override
public void setSequences(TCharArrayList str1, TCharArrayList str2) {
if (this.str1 == null || this.str1.length < str1.size()) {
this.str1 = str1.toArray();
} else {
str1.toArray(this.str1);
}
if (this.str2 == null || this.str2.length < str2.size()) {
this.str2 = str2.toArray();
} else {
str2.toArray(this.str2);
}
boolean init = str1.size() > dpTable.length
|| str2.size() > dpTable[0].length;
length1 = str1.size();
length2 = str2.size();
if (init) {
dpTable = new double[length1 + 1][length2 + 1];
dpTableE = new double[length1 + 1][length2 + 1];
dpTableF = new double[length1 + 1][length2 + 1];
prevCells = new byte[length1 + 1][length2 + 1];
prevCellsE = new byte[length1 + 1][length2 + 1];
prevCellsF = new byte[length1 + 1][length2 + 1];
}
}
@Override
public void setSequences(char[] str1, char[] str2) {
setSequences(str1, str1.length, str2, str2.length);
}
@Override
public void setSequences(char[] str1, int size1, char[] str2, int size2) {
this.str1 = str1;
this.str2 = str2;
boolean init = size1 > dpTable.length || size2 > dpTable[0].length;
length1 = size1;
length2 = size2;
if (init) {
dpTable = new double[length1 + 1][length2 + 1];
dpTableE = new double[length1 + 1][length2 + 1];
dpTableF = new double[length1 + 1][length2 + 1];
prevCells = new byte[length1 + 1][length2 + 1];
prevCellsE = new byte[length1 + 1][length2 + 1];
prevCellsF = new byte[length1 + 1][length2 + 1];
}
}
/**
* Compute the similarity score of substitution: use a substitution matrix
* if the cost model The position of the first character is 1. A position of
* 0 represents a gap.
*
* @param i
* Position of the character in str1
* @param j
* Position of the character in str2
* @return Cost of substitution of the character in str1 by the one in str2
*/
private final double similarity(int i, int j) {
final char c1 = i == 0 ? this.alphabet.emptyLetter() : str1[i - 1];
final char c2 = j == 0 ? this.alphabet.emptyLetter() : str2[j - 1];
return this.scoringMatrix.score(c1, c2);
}
/**
* Build the score matrix using dynamic programming. Note: The indel scores
* must be negative. Otherwise, the part handling the first row and column
* has to be modified.
*/
@Override
public void buildMatrix() {
int i; // length of prefix substring of str1
int j; // length of prefix substring of str2
// base case
dpTable[0][0] = 0.0;
dpTableE[0][0] = Float.NEGATIVE_INFINITY;// dpTable[0][0];
dpTableF[0][0] = Float.NEGATIVE_INFINITY;// dpTable[0][0];
prevCells[0][0] = DR_ZERO; // starting point
prevCellsE[0][0] = DR_ZERO;
prevCellsF[0][0] = DR_ZERO;
// the first column
for (i = 1; i <= length1; i++) {
dpTable[i][0] = this.scoringMatrix.score(str1[i - 1],
alphabet.emptyLetter())
* i + this.scoringMatrix.openGapCost();
dpTableE[i][0] = Float.NEGATIVE_INFINITY;// dpTable[i][0];
prevCells[i][0] = DR_UP;
prevCellsF[i][0] = DR_UP;
}
// the first row
for (j = 1; j <= length2; j++) {
dpTable[0][j] = 0;
dpTableF[0][j] = Float.NEGATIVE_INFINITY;// dpTable[0][j];
prevCells[0][j] = DR_ZERO;
prevCellsE[0][j] = DR_ZERO;
}
// the rest of the matrix
for (i = 1; i <= length1; i++) {
for (j = 1; j <= length2; j++) {
final double diagScore = dpTable[i - 1][j - 1]
+ similarity(i, j);
dpTableE[i][j] = max(dpTableE[i][j - 1], dpTable[i][j - 1]
+ this.scoringMatrix.openGapCost())
+ similarity(0, j);
if (MathOperations.equals(dpTableE[i][j], dpTableE[i][j - 1]
+ similarity(0, j))) {
prevCellsE[i][j] = DR_LEFT;
} else {
prevCellsE[i][j] = DR_UP;
}
dpTableF[i][j] = dpTable[i - 1][j]
+ this.scoringMatrix.openGapCost() + similarity(i, 0);
if (MathOperations.equals(dpTableF[i][j], dpTableF[i - 1][j]
+ similarity(i, 0))) {
prevCellsF[i][j] = DR_UP;
} else {
prevCellsF[i][j] = DR_LEFT;
}
dpTable[i][j] = max(diagScore,
max(dpTableF[i][j], dpTableE[i][j]));
// find the directions that give the maximum scores.
// the bitwise OR operator is used to record multiple
// directions.
prevCells[i][j] = 0;
if (MathOperations.equals(diagScore, dpTable[i][j])) {
prevCells[i][j] |= DR_DIAG;
} else {
if (MathOperations.equals(dpTableE[i][j], dpTable[i][j])) {
prevCells[i][j] |= DR_LEFT;
}
if (MathOperations.equals(dpTableF[i][j], dpTable[i][j])) {
prevCells[i][j] |= DR_UP;
}
}
}
}
}
private final double max(double a, double b) {
return a > b ? a : b;
}
/**
* Get the maximum value in the score matrix.
*/
private double getMaxScore() {
double max = this.dpTable[length1][length2];
for (int i = length2 - 1; i >= 0; i--) {
if (max < this.dpTable[length1][i]) {
max = this.dpTable[length1][i];
}
}
return max;
}
/**
* Get the maximum value in the score matrix.
*/
private int getMaxIndex() {
double max = this.dpTable[length1][length2];
int index = length2;
for (int i = length2 - 1; i >= 0; i--) {
if (max < this.dpTable[length1][i]) {
max = this.dpTable[length1][i];
index = i;
}
}
return index;
}
/**
* Get the alignment score between the two input strings.
*/
@Override
public double getAlignmentScore() {
return getMaxScore() / normFactor;
}
/**
* Output the local alignments ending in the (i, j) cell. aligned1 and
* aligned2 are suffixes of final aligned strings found in backtracking
* before calling this function. Note: the strings are replicated at each
* recursive call. Use buffers or stacks to improve efficiency.
*/
private String[] printAlignments(int i, int j, String aligned1,
String aligned2) {
// we've reached the starting point, so print the alignments
if ((prevCells[i][j] & DR_ZERO) > 0) {
return new String[] { aligned1, aligned2 };
}
// find out which directions to backtrack
if ((prevCells[i][j] & DR_DIAG) > 0) {
return printAlignments(i - 1, j - 1, str1[i - 1] + aligned1,
str2[j - 1] + aligned2);
}
if ((prevCells[i][j] & DR_UP) > 0) {
while ((prevCellsF[i][j] & DR_UP) > 0) {
aligned1 = str1[i - 1] + aligned1;
aligned2 = "_" + aligned2;
i = i - 1;
}
if ((prevCellsF[i][j] & DR_ZERO) > 0) {
return new String[] { aligned1, aligned2 };
}
return printAlignments(i - 1, j, str1[i - 1] + aligned1, "_"
+ aligned2);
}
if ((prevCells[i][j] & DR_LEFT) > 0) {
while ((prevCellsE[i][j] & DR_LEFT) > 0) {
aligned1 = "_" + aligned1;
aligned2 = str2[j - 1] + aligned2;
j = j - 1;
}
if ((prevCellsE[i][j] & DR_ZERO) > 0) {
return new String[] { aligned1, aligned2 };
}
return printAlignments(i, j - 1, "_" + aligned1, str2[j - 1]
+ aligned2);
}
return null;
}
@Override
public String[] getAlignment() {
return printAlignments(length1, getMaxIndex(), "", "");
}
/**
* print the dynamic programming matrix
*/
public void printDPMatrix() {
System.out.print(" ");
for (int j = 1; j <= length2; j++)
System.out.print(" " + str2[j - 1]);
System.out.println();
for (int i = 0; i <= length1; i++) {
if (i > 0)
System.out.print(str1[i - 1] + " ");
else
System.out.print(" ");
for (int j = 0; j <= length2; j++) {
System.out.print(dpTable[i][j] / normFactor + " ");
}
System.out.println();
}
}
/**
* print the dynamic programming matrix
*/
public void printDPMatrixInLatexMatrix() {
System.out
.println("\\begin{tikzpicture}[auto,transform shape,scale=1]\n"
+ "\\tikzset{node style ge/.style={circle,draw,font=\\scriptsize,minimum size=4mm,inner sep=0pt}}\n"
+ "\\tikzset{node style ne/.style={draw=none,font=\\scriptsize,minimum size=4mm,inner sep=0pt}}");
System.out
.println("\\matrix (A) [matrix of nodes,nodes = {node style ge},ampersand replacement=\\&,column sep = 5mm,row sep = 5mm] {");
// System.out.print("\\node[node style ne,opacity=0.0]{-};");
// for (int j=1; j<=length2;j++)
// System.out.print
// ("\\& \\node[node style ne]{"+str2.charAt(j-1)+"};");
// System.out.println("\\\\");
for (int i = 0; i <= length1; i++) {
// if (i>0)
// System.out.print
// ("\\node[node style ne]{"+str2.charAt(i-1)+"};");
System.out.print("\\node[fill=black]{" + 0 + "};");
for (int j = 1; j <= length2; j++) {
System.out.print("\\& \\node[fill=black]{" + 0 + "};"); // dpTable[i][j]
}
System.out.println("\\\\");
}
// end of matrix
System.out.println("};");
for (int i = 0; i <= length1; i++) {
for (int j = 0; j <= length2; j++) {
// right arrow
if (j != length2) {
String print = Math.round(scoringMatrix.score(
alphabet.emptyLetter(), str2[j])) == 0 ? "" : ""
+ Math.round(scoringMatrix.score(
alphabet.emptyLetter(), str2[j]));
System.out.println("\\draw [draw,->,midway] (A-" + (i + 1)
+ "-" + (j + 1)
+ ".east) -- node[above,font=\\scriptsize]{"
+ print + "} (A-" + (i + 1) + "-" + (j + 2)
+ ".west);");
}
// down arrow
if (i != length1) {
String print = Math.round(scoringMatrix.score(str1[i],
alphabet.emptyLetter())) == 0 ? "" : ""
+ Math.round(scoringMatrix.score(str1[i],
alphabet.emptyLetter()));
System.out.println("\\draw [draw,->,midway] (A-" + (i + 1)
+ "-" + (j + 1)
+ ".south) -- node[right=0,font=\\scriptsize]{"
+ print + "} (A-" + (i + 2) + "-" + (j + 1)
+ ".north);");
}
if (i != length1 && j != length2) {
String print = Math.round(scoringMatrix.score(str1[i],
str2[j])) == 0 ? "" : ""
+ Math.round(scoringMatrix.score(str1[i], str2[j]));
System.out
.println("\\draw [draw,->,midway] (A-"
+ (i + 1)
+ "-"
+ (j + 1)
+ ".south east) -- node[right=0.4,pos=0.25,font=\\scriptsize]{"
+ print + "} (A-" + (i + 2) + "-" + (j + 2)
+ ".north west);");
}
}
}
for (int i = 0; i < length2; i++) {
System.out.println("\\draw [midway] (A-1-" + (i + 1)
+ ".east) -- node[above=10,pos=0.5,font=\\scriptsize]{"
+ str2[i] + "} (A-1-" + (i + 2) + ".west);");
}
for (int i = 0; i < length1; i++) {
System.out.println("\\draw [midway] (A-" + (i + 1)
+ "-1.south) -- node[left=20,pos=0.5,font=\\scriptsize]{"
+ str1[i] + "} (A-" + (i + 2) + "-1.north);");
}
System.out.println("\\end{tikzpicture}");
}
public static void main(String[] args) {
String s1 = "GAAUAAGCAUCGGCCCACAUCGGUGGUG";
String s2 = "GAAUAAGCAUCGGCUAAAUAAUUGGCCCCCACAUCGAGUGAUCGAGUGGUGCAAAC";
SemiAffineGapSemiLocalSequenceAlignment alignment = new SemiAffineGapSemiLocalSequenceAlignment(
s1, s2, RnaAlphabet.getInstance(),
new IdentityAffineScoringMatrix(RnaAlphabet.getInstance(), 1,
-10, -1, -5));
alignment.buildMatrix();
System.out.println(alignment.getMaxScore());
String[] ans = alignment.getAlignment();
// alignment.printDPMatrix();
System.out.println(ans[0]);
System.out.println(ans[1]);
}
@Override
public void setNormFactor(double factor) {
this.normFactor = factor;
}
@Override
public SemiAffineGapSemiLocalSequenceAlignment cloneAligner() {
SemiAffineGapSemiLocalSequenceAlignment other = new SemiAffineGapSemiLocalSequenceAlignment(
length1, length2, alphabet, scoringMatrix.cloneMatrix());
return other;
}
}