Package bgu.bio.algorithms.alignment

Source Code of bgu.bio.algorithms.alignment.SemiAffineGapSemiLocalSequenceAlignment

package bgu.bio.algorithms.alignment;

import gnu.trove.list.array.TCharArrayList;
import bgu.bio.util.AffineGapScoringMatrix;
import bgu.bio.util.IdentityAffineScoringMatrix;
import bgu.bio.util.MathOperations;
import bgu.bio.util.alphabet.AlphabetUtils;
import bgu.bio.util.alphabet.RnaAlphabet;

public class SemiAffineGapSemiLocalSequenceAlignment implements
    SequenceAlignment {

  /**
   * The first input string
   */
  private char[] str1;

  /**
   * The second input String
   */
  private char[] str2;

  /**
   * The lengths of the input strings
   */
  private int length1, length2;

  /**
   * The score matrix. The true scores should be divided by the normalization
   * factor.
   */
  private double[][] dpTable;

  /** see {@link #dpTable} */
  private double[][] dpTableE;

  /** see {@link #dpTable} */
  private double[][] dpTableF;

  private AffineGapScoringMatrix scoringMatrix;

  /**
   * Constants of directions. Multiple directions are stored by bits. The zero
   * direction is the starting point.
   */
  static final byte DR_LEFT = 1; // 0001
  static final byte DR_UP = 2; // 0010
  static final byte DR_DIAG = 4; // 0100
  static final byte DR_ZERO = 8; // 1000

  /**
   * The directions pointing to the cells that give the maximum score at the
   * current cell. The first index is the column index. The second index is
   * the row index.
   */
  private byte[][] prevCells, prevCellsE, prevCellsF;

  private AlphabetUtils alphabet;

  private double normFactor = 1;

  public SemiAffineGapSemiLocalSequenceAlignment(String str1, String str2,
      AlphabetUtils alphabet, AffineGapScoringMatrix matrix) {
    this.scoringMatrix = matrix;
    this.alphabet = alphabet;
    this.dpTable = new double[0][0];
    this.dpTableE = new double[0][0];
    this.dpTableF = new double[0][0];
    this.setSequences(str1, str2);
  }

  public SemiAffineGapSemiLocalSequenceAlignment(int size1, int size2,
      AlphabetUtils alphabet, AffineGapScoringMatrix matrix) {
    this.scoringMatrix = matrix;
    this.alphabet = alphabet;
    length1 = size1;
    length2 = size2;

    dpTable = new double[length1 + 1][length2 + 1];
    dpTableE = new double[length1 + 1][length2 + 1];
    dpTableF = new double[length1 + 1][length2 + 1];

    prevCells = new byte[length1 + 1][length2 + 1];
    prevCellsE = new byte[length1 + 1][length2 + 1];
    prevCellsF = new byte[length1 + 1][length2 + 1];
  }

  @Override
  public void setSequences(String str1, String str2) {
    this.setSequences(str1.toCharArray(), str2.toCharArray());
  }

  @Override
  public void setSequences(TCharArrayList str1, TCharArrayList str2) {
    if (this.str1 == null || this.str1.length < str1.size()) {
      this.str1 = str1.toArray();
    } else {
      str1.toArray(this.str1);
    }
    if (this.str2 == null || this.str2.length < str2.size()) {
      this.str2 = str2.toArray();
    } else {
      str2.toArray(this.str2);
    }

    boolean init = str1.size() > dpTable.length
        || str2.size() > dpTable[0].length;
    length1 = str1.size();
    length2 = str2.size();

    if (init) {
      dpTable = new double[length1 + 1][length2 + 1];
      dpTableE = new double[length1 + 1][length2 + 1];
      dpTableF = new double[length1 + 1][length2 + 1];
      prevCells = new byte[length1 + 1][length2 + 1];
      prevCellsE = new byte[length1 + 1][length2 + 1];
      prevCellsF = new byte[length1 + 1][length2 + 1];
    }
  }

  @Override
  public void setSequences(char[] str1, char[] str2) {
    setSequences(str1, str1.length, str2, str2.length);
  }

  @Override
  public void setSequences(char[] str1, int size1, char[] str2, int size2) {
    this.str1 = str1;
    this.str2 = str2;

    boolean init = size1 > dpTable.length || size2 > dpTable[0].length;
    length1 = size1;
    length2 = size2;

    if (init) {
      dpTable = new double[length1 + 1][length2 + 1];
      dpTableE = new double[length1 + 1][length2 + 1];
      dpTableF = new double[length1 + 1][length2 + 1];
      prevCells = new byte[length1 + 1][length2 + 1];
      prevCellsE = new byte[length1 + 1][length2 + 1];
      prevCellsF = new byte[length1 + 1][length2 + 1];
    }
  }

  /**
   * Compute the similarity score of substitution: use a substitution matrix
   * if the cost model The position of the first character is 1. A position of
   * 0 represents a gap.
   *
   * @param i
   *            Position of the character in str1
   * @param j
   *            Position of the character in str2
   * @return Cost of substitution of the character in str1 by the one in str2
   */
  private final double similarity(int i, int j) {
    final char c1 = i == 0 ? this.alphabet.emptyLetter() : str1[i - 1];
    final char c2 = j == 0 ? this.alphabet.emptyLetter() : str2[j - 1];
    return this.scoringMatrix.score(c1, c2);
  }

  /**
   * Build the score matrix using dynamic programming. Note: The indel scores
   * must be negative. Otherwise, the part handling the first row and column
   * has to be modified.
   */
  @Override
  public void buildMatrix() {

    int i; // length of prefix substring of str1
    int j; // length of prefix substring of str2

    // base case
    dpTable[0][0] = 0.0;
    dpTableE[0][0] = Float.NEGATIVE_INFINITY;// dpTable[0][0];
    dpTableF[0][0] = Float.NEGATIVE_INFINITY;// dpTable[0][0];

    prevCells[0][0] = DR_ZERO; // starting point
    prevCellsE[0][0] = DR_ZERO;
    prevCellsF[0][0] = DR_ZERO;

    // the first column
    for (i = 1; i <= length1; i++) {
      dpTable[i][0] = this.scoringMatrix.score(str1[i - 1],
          alphabet.emptyLetter())
          * i + this.scoringMatrix.openGapCost();
      dpTableE[i][0] = Float.NEGATIVE_INFINITY;// dpTable[i][0];

      prevCells[i][0] = DR_UP;
      prevCellsF[i][0] = DR_UP;
    }

    // the first row
    for (j = 1; j <= length2; j++) {
      dpTable[0][j] = 0;
      dpTableF[0][j] = Float.NEGATIVE_INFINITY;// dpTable[0][j];

      prevCells[0][j] = DR_ZERO;
      prevCellsE[0][j] = DR_ZERO;
    }

    // the rest of the matrix
    for (i = 1; i <= length1; i++) {
      for (j = 1; j <= length2; j++) {

        final double diagScore = dpTable[i - 1][j - 1]
            + similarity(i, j);

        dpTableE[i][j] = max(dpTableE[i][j - 1], dpTable[i][j - 1]
            + this.scoringMatrix.openGapCost())
            + similarity(0, j);
        if (MathOperations.equals(dpTableE[i][j], dpTableE[i][j - 1]
            + similarity(0, j))) {
          prevCellsE[i][j] = DR_LEFT;
        } else {
          prevCellsE[i][j] = DR_UP;
        }
        dpTableF[i][j] = dpTable[i - 1][j]
            + this.scoringMatrix.openGapCost() + similarity(i, 0);
        if (MathOperations.equals(dpTableF[i][j], dpTableF[i - 1][j]
            + similarity(i, 0))) {
          prevCellsF[i][j] = DR_UP;
        } else {
          prevCellsF[i][j] = DR_LEFT;
        }

        dpTable[i][j] = max(diagScore,
            max(dpTableF[i][j], dpTableE[i][j]));

        // find the directions that give the maximum scores.
        // the bitwise OR operator is used to record multiple
        // directions.
        prevCells[i][j] = 0;
        if (MathOperations.equals(diagScore, dpTable[i][j])) {
          prevCells[i][j] |= DR_DIAG;
        } else {
          if (MathOperations.equals(dpTableE[i][j], dpTable[i][j])) {
            prevCells[i][j] |= DR_LEFT;
          }
          if (MathOperations.equals(dpTableF[i][j], dpTable[i][j])) {
            prevCells[i][j] |= DR_UP;
          }
        }
      }
    }
  }

  private final double max(double a, double b) {
    return a > b ? a : b;
  }

  /**
   * Get the maximum value in the score matrix.
   */
  private double getMaxScore() {
    double max = this.dpTable[length1][length2];
    for (int i = length2 - 1; i >= 0; i--) {
      if (max < this.dpTable[length1][i]) {
        max = this.dpTable[length1][i];
      }
    }
    return max;
  }

  /**
   * Get the maximum value in the score matrix.
   */
  private int getMaxIndex() {
    double max = this.dpTable[length1][length2];
    int index = length2;
    for (int i = length2 - 1; i >= 0; i--) {
      if (max < this.dpTable[length1][i]) {
        max = this.dpTable[length1][i];
        index = i;
      }
    }
    return index;
  }

  /**
   * Get the alignment score between the two input strings.
   */
  @Override
  public double getAlignmentScore() {
    return getMaxScore() / normFactor;
  }

  /**
   * Output the local alignments ending in the (i, j) cell. aligned1 and
   * aligned2 are suffixes of final aligned strings found in backtracking
   * before calling this function. Note: the strings are replicated at each
   * recursive call. Use buffers or stacks to improve efficiency.
   */
  private String[] printAlignments(int i, int j, String aligned1,
      String aligned2) {
    // we've reached the starting point, so print the alignments

    if ((prevCells[i][j] & DR_ZERO) > 0) {
      return new String[] { aligned1, aligned2 };
    }

    // find out which directions to backtrack
    if ((prevCells[i][j] & DR_DIAG) > 0) {
      return printAlignments(i - 1, j - 1, str1[i - 1] + aligned1,
          str2[j - 1] + aligned2);
    }
    if ((prevCells[i][j] & DR_UP) > 0) {
      while ((prevCellsF[i][j] & DR_UP) > 0) {
        aligned1 = str1[i - 1] + aligned1;
        aligned2 = "_" + aligned2;
        i = i - 1;
      }
      if ((prevCellsF[i][j] & DR_ZERO) > 0) {
        return new String[] { aligned1, aligned2 };
      }
      return printAlignments(i - 1, j, str1[i - 1] + aligned1, "_"
          + aligned2);
    }
    if ((prevCells[i][j] & DR_LEFT) > 0) {
      while ((prevCellsE[i][j] & DR_LEFT) > 0) {
        aligned1 = "_" + aligned1;
        aligned2 = str2[j - 1] + aligned2;
        j = j - 1;
      }
      if ((prevCellsE[i][j] & DR_ZERO) > 0) {
        return new String[] { aligned1, aligned2 };
      }
      return printAlignments(i, j - 1, "_" + aligned1, str2[j - 1]
          + aligned2);
    }

    return null;
  }

  @Override
  public String[] getAlignment() {
    return printAlignments(length1, getMaxIndex(), "", "");
  }

  /**
   * print the dynamic programming matrix
   */
  public void printDPMatrix() {
    System.out.print(" ");
    for (int j = 1; j <= length2; j++)
      System.out.print(" " + str2[j - 1]);
    System.out.println();
    for (int i = 0; i <= length1; i++) {
      if (i > 0)
        System.out.print(str1[i - 1] + " ");
      else
        System.out.print(" ");
      for (int j = 0; j <= length2; j++) {
        System.out.print(dpTable[i][j] / normFactor + " ");
      }
      System.out.println();
    }
  }

  /**
   * print the dynamic programming matrix
   */
  public void printDPMatrixInLatexMatrix() {

    System.out
        .println("\\begin{tikzpicture}[auto,transform shape,scale=1]\n"
            + "\\tikzset{node style ge/.style={circle,draw,font=\\scriptsize,minimum size=4mm,inner sep=0pt}}\n"
            + "\\tikzset{node style ne/.style={draw=none,font=\\scriptsize,minimum size=4mm,inner sep=0pt}}");
    System.out
        .println("\\matrix (A) [matrix of nodes,nodes = {node style ge},ampersand replacement=\\&,column sep = 5mm,row sep = 5mm] {");
    // System.out.print("\\node[node style ne,opacity=0.0]{-};");
    // for (int j=1; j<=length2;j++)
    // System.out.print
    // ("\\&  \\node[node style ne]{"+str2.charAt(j-1)+"};");
    // System.out.println("\\\\");
    for (int i = 0; i <= length1; i++) {
      // if (i>0)
      // System.out.print
      // ("\\node[node style ne]{"+str2.charAt(i-1)+"};");
      System.out.print("\\node[fill=black]{" + 0 + "};");
      for (int j = 1; j <= length2; j++) {
        System.out.print("\\&  \\node[fill=black]{" + 0 + "};"); // dpTable[i][j]
      }
      System.out.println("\\\\");
    }
    // end of matrix
    System.out.println("};");

    for (int i = 0; i <= length1; i++) {
      for (int j = 0; j <= length2; j++) {
        // right arrow
        if (j != length2) {
          String print = Math.round(scoringMatrix.score(
              alphabet.emptyLetter(), str2[j])) == 0 ? "" : ""
              + Math.round(scoringMatrix.score(
                  alphabet.emptyLetter(), str2[j]));

          System.out.println("\\draw [draw,->,midway] (A-" + (i + 1)
              + "-" + (j + 1)
              + ".east) -- node[above,font=\\scriptsize]{"
              + print + "} (A-" + (i + 1) + "-" + (j + 2)
              + ".west);");
        }
        // down arrow
        if (i != length1) {
          String print = Math.round(scoringMatrix.score(str1[i],
              alphabet.emptyLetter())) == 0 ? "" : ""
              + Math.round(scoringMatrix.score(str1[i],
                  alphabet.emptyLetter()));
          System.out.println("\\draw [draw,->,midway] (A-" + (i + 1)
              + "-" + (j + 1)
              + ".south) -- node[right=0,font=\\scriptsize]{"
              + print + "} (A-" + (i + 2) + "-" + (j + 1)
              + ".north);");
        }
        if (i != length1 && j != length2) {
          String print = Math.round(scoringMatrix.score(str1[i],
              str2[j])) == 0 ? "" : ""
              + Math.round(scoringMatrix.score(str1[i], str2[j]));
          System.out
              .println("\\draw [draw,->,midway] (A-"
                  + (i + 1)
                  + "-"
                  + (j + 1)
                  + ".south east) -- node[right=0.4,pos=0.25,font=\\scriptsize]{"
                  + print + "} (A-" + (i + 2) + "-" + (j + 2)
                  + ".north west);");
        }
      }
    }
    for (int i = 0; i < length2; i++) {
      System.out.println("\\draw [midway] (A-1-" + (i + 1)
          + ".east) -- node[above=10,pos=0.5,font=\\scriptsize]{"
          + str2[i] + "} (A-1-" + (i + 2) + ".west);");
    }
    for (int i = 0; i < length1; i++) {
      System.out.println("\\draw [midway] (A-" + (i + 1)
          + "-1.south) -- node[left=20,pos=0.5,font=\\scriptsize]{"
          + str1[i] + "} (A-" + (i + 2) + "-1.north);");
    }
    System.out.println("\\end{tikzpicture}");
  }

  public static void main(String[] args) {
    String s1 = "GAAUAAGCAUCGGCCCACAUCGGUGGUG";
    String s2 = "GAAUAAGCAUCGGCUAAAUAAUUGGCCCCCACAUCGAGUGAUCGAGUGGUGCAAAC";
    SemiAffineGapSemiLocalSequenceAlignment alignment = new SemiAffineGapSemiLocalSequenceAlignment(
        s1, s2, RnaAlphabet.getInstance(),
        new IdentityAffineScoringMatrix(RnaAlphabet.getInstance(), 1,
            -10, -1, -5));
    alignment.buildMatrix();
    System.out.println(alignment.getMaxScore());
    String[] ans = alignment.getAlignment();
    // alignment.printDPMatrix();
    System.out.println(ans[0]);
    System.out.println(ans[1]);

  }

  @Override
  public void setNormFactor(double factor) {
    this.normFactor = factor;
  }

  @Override
  public SemiAffineGapSemiLocalSequenceAlignment cloneAligner() {
    SemiAffineGapSemiLocalSequenceAlignment other = new SemiAffineGapSemiLocalSequenceAlignment(
        length1, length2, alphabet, scoringMatrix.cloneMatrix());
    return other;
  }
}
TOP

Related Classes of bgu.bio.algorithms.alignment.SemiAffineGapSemiLocalSequenceAlignment

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.