Package bgu.bio.ds.rna

Source Code of bgu.bio.ds.rna.RNA

package bgu.bio.ds.rna;

import gnu.trove.stack.array.TIntArrayStack;

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.io.Writer;
import java.net.URL;
import java.net.URLConnection;
import java.net.URLEncoder;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashSet;

import bgu.bio.adt.tuples.IntPair;
import bgu.bio.adt.tuples.IntPairComparator;
import bgu.bio.io.file.json.JSONException;
import bgu.bio.io.file.json.JSONObject;

/**
* @author milon
*
*/
public class RNA {
  private static String newline = System.getProperty("line.separator");
  private final int id;
  private String header;
  private String primary;
  private String secondary;
  private static final char[][] possibleBracketsInStructure = new char[][] {
      { '(', ')' }, { '[', ']' }, { '<', '>' }, { '{', '}' } };

  public RNA(int id, String header, String primary, String secondary) {
    super();
    this.id = id;
    this.primary = primary;
    this.secondary = secondary;
    this.header = header;
  }

  public RNA(int id, String header, String primary) {
    super();
    this.id = id;
    this.primary = primary;
    this.header = header;
  }

  public RNA(int id) {
    this.id = id;
  }

  public String getPrimary() {
    return primary;
  }

  public void setPrimary(String primary) {
    this.primary = primary;
  }

  public String getSecondary() {
    return secondary;
  }

  public void setSecondary(String secondary) {
    this.secondary = secondary;
  }

  public int getId() {
    return id;
  }

  public void fixEmptyHairpins() {
    if (secondary == null)
      return;
    int pos = secondary.indexOf("()");
    while (pos >= 0) {
      secondary = secondary.substring(0, pos + 1) + "..."
          + secondary.substring(pos + 1);
      primary = primary.substring(0, pos + 1) + "NNN"
          + primary.substring(pos + 1);
      pos = secondary.indexOf("()", pos);
    }
  }

  @Override
  public String toString() {
    return "RNA [header=" + header + ", id=" + id + ", primary=" + primary
        + ", secondary=" + secondary + "]";
  }

  public void getEMBLData() {
    try {
      // Construct data
      String data = URLEncoder.encode("db", "UTF-8") + "="
          + URLEncoder.encode("EMBL", "UTF-8");
      data += "&" + URLEncoder.encode("id", "UTF-8") + "="
          + URLEncoder.encode(header, "UTF-8");
      data += "&" + URLEncoder.encode("format", "UTF-8") + "="
          + URLEncoder.encode("embl", "UTF-8");
      data += "&" + URLEncoder.encode("style", "UTF-8") + "="
          + URLEncoder.encode("raw", "UTF-8");
      // Send data
      URL url = new URL("http://www.ebi.ac.uk/cgi-bin/dbfetch");
      URLConnection conn = url.openConnection();
      conn.setDoOutput(true);
      OutputStreamWriter wr = new OutputStreamWriter(
          conn.getOutputStream());
      wr.write(data);
      wr.flush();
      // Get the response
      BufferedReader rd = new BufferedReader(new InputStreamReader(
          conn.getInputStream()));
      StringBuilder sB = new StringBuilder();
      String line;
      while ((line = rd.readLine()) != null) {
        if (line.contains("OS"))
          sB.append(line + "\n");
      }
      System.out.println(sB.toString());
      wr.close();
      rd.close();
    } catch (Exception e) {
    }
  }

  public String toJSON() {
    StringBuilder builder = new StringBuilder();
    builder.append("{\"header\":\"");
    builder.append(this.header);
    builder.append('\"');
    builder.append(",\"sequence\":\"");
    builder.append(this.primary);
    builder.append('\"');
    builder.append(",\"structure\":\"");
    builder.append(this.secondary);
    builder.append("\"}");
    return builder.toString();
  }

  public String toFASTA() {
    StringBuilder builder = new StringBuilder();
    builder.append('>');
    builder.append(this.header);
    builder.append('\n');
    builder.append(this.primary);
    if (this.secondary != null) {
      builder.append(this.secondary);
    }
    return builder.toString();
  }

  public String getHeader() {
    return header;
  }

  public void setHeader(String header) {
    this.header = header;
  }

  public void removeSpecialChars() {
    StringBuilder builder = new StringBuilder(primary.length());
    for (int i = 0; i < this.primary.length(); i++) {
      final char c = Character.toUpperCase(primary.charAt(i));
      if (c == 'A' || c == 'U' || c == 'T' || c == 'G' || c == 'C') {
        builder.append(primary.charAt(i));
      } else {
        builder.append('N');
      }
    }

    this.primary = builder.toString();
  }

  public void removePseudoknotInformation() {
    if (secondary == null) {
      return;
    }
    StringBuilder builder = new StringBuilder(secondary.length());
    for (int i = 0; i < this.secondary.length(); i++) {
      final char c = secondary.charAt(i);
      if (c == '(' || c == ')') {
        builder.append(c);
      } else {
        builder.append('.');
      }
    }

    this.secondary = builder.toString();
  }

  public boolean validateStructure() {
    // check that structure is given
    if (this.secondary == null) {
      throw new UnsupportedOperationException(
          "Can't extract pairs if structure is not given");
    }

    ArrayList<IntPair> ans = new ArrayList<IntPair>();

    TIntArrayStack stack = new TIntArrayStack(secondary.length() / 2);
    // run on all types of brackets
    for (int b = 0; b < possibleBracketsInStructure.length; b++) {
      final char openBracket = possibleBracketsInStructure[b][0];
      final char closeBracket = possibleBracketsInStructure[b][1];
      stack.clear();
      for (int i = 0; i < secondary.length(); i++) {
        if (secondary.charAt(i) == openBracket) {
          stack.push(i);
        } else if (secondary.charAt(i) == closeBracket) {
          if (stack.size() == 0) {
            return false;
          }
          final int start = stack.pop();
          ans.add(new IntPair(start, i));
        }
      }
      if (stack.size() != 0) {
        return false;
      }
    }
    return true;
  }

  public ArrayList<IntPair> extractPairs() {
    // check that structure is given
    if (this.secondary == null) {
      throw new UnsupportedOperationException(
          "Can't extract pairs if structure is not given");
    }

    ArrayList<IntPair> ans = new ArrayList<IntPair>();

    TIntArrayStack stack = new TIntArrayStack(secondary.length() / 2);
    // run on all types of brackets
    for (int b = 0; b < possibleBracketsInStructure.length; b++) {
      final char openBracket = possibleBracketsInStructure[b][0];
      final char closeBracket = possibleBracketsInStructure[b][1];
      stack.clear();
      for (int i = 0; i < secondary.length(); i++) {
        if (secondary.charAt(i) == openBracket) {
          stack.push(i);
        } else if (secondary.charAt(i) == closeBracket) {
          final int start = stack.pop();
          ans.add(new IntPair(start, i));
        }
      }
    }
    return ans;
  }

  public ArrayList<RNA> splitToStems(int maxGapSize) {
    // get the pairs in the structure
    ArrayList<IntPair> pairs = this.extractPairs();
    Collections.sort(pairs, new IntPairComparator());

    ArrayList<ArrayList<IntPair>> stackings = new ArrayList<ArrayList<IntPair>>();

    for (int p = 0; p < pairs.size(); p++) {
      IntPair current = pairs.get(p);
      // check all stackings in data
      boolean found = false;
      for (int s = stackings.size() - 1; s >= 0 && !found; s--) {
        ArrayList<IntPair> currentStacking = stackings.get(s);
        if (isExtends(currentStacking, current, maxGapSize)) {
          currentStacking.add(current);
          found = true;
        }
      }

      // if didn't find in any stacking add new
      if (!found) {
        ArrayList<IntPair> stack = new ArrayList<IntPair>();
        stack.add(current);
        stackings.add(stack);
      }
    }
    return convertToRNA(stackings);
  }

  private ArrayList<RNA> convertToRNA(ArrayList<ArrayList<IntPair>> stackings) {
    ArrayList<RNA> ans = new ArrayList<RNA>();
    for (int s = 0; s < stackings.size(); s++) {
      ArrayList<IntPair> current = stackings.get(s);
      // size is the length of the most external bp distance minus the
      // most inner bp distance
      final int size = current.get(0).getSecond()
          - current.get(0).getFirst() + 1;
      StringBuilder structure = new StringBuilder(size);
      for (int i = 0; i < size; i++) {
        structure.append('.');
      }
      String sequence = this.primary.substring(current.get(0).getFirst(),
          current.get(0).getSecond() + 1);
      for (int i = 0; i < current.size(); i++) {
        IntPair pair = current.get(i);
        structure.setCharAt(
            pair.getFirst() - current.get(0).getFirst(), '(');
        structure.setCharAt(pair.getSecond()
            - current.get(0).getFirst(), ')');
      }
      String newHeader = this.header;
      try {
        JSONObject json = new JSONObject();
        json.put("header", this.header.substring(1).trim());
        json.put("start", current.get(0).getFirst());
        newHeader = json.toString();
      } catch (JSONException ex) {

      }

      ans.add(new RNA(ans.size(), newHeader, sequence, structure
          .toString()));
    }
    return ans;
  }

  private boolean isExtends(ArrayList<IntPair> currentStacking,
      IntPair current, int maxGapSize) {

    final IntPair edgeOfStack = currentStacking
        .get(currentStacking.size() - 1);
    return (current.getFirst() - edgeOfStack.getFirst() - 1 <= maxGapSize
        && current.getFirst() - edgeOfStack.getFirst() > 0
        && edgeOfStack.getSecond() - current.getSecond() - 1 <= maxGapSize && edgeOfStack
        .getSecond() - current.getSecond() > 0);
  }

  public void saveToFASTA(Writer writer) throws IOException {
    writer.write('>');
    writer.write(this.header);
    writer.write('\n');
    writer.write(this.primary);
    writer.write('\n');
    if (this.secondary != null && !this.secondary.trim().equals("")) {
      writer.write(this.secondary);
      writer.write('\n');
    }
  }

  public void saveToFASTA(String fileName) {
    try {
      FileWriter file = new FileWriter(new File(fileName));
      BufferedWriter writer = new BufferedWriter(file);
      this.saveToFASTA(writer);
      writer.close();
    } catch (IOException ex) {

    }
  }

  public static ArrayList<RNA> loadFromFile(BufferedReader reader,
      boolean withStructure) {
    ArrayList<RNA> list = new ArrayList<RNA>();
    try {
      String line = reader.readLine();
      String header = null;
      StringBuilder data = new StringBuilder();

      int id = 0;
      while (line != null) {
        line = line.trim();
        if (line.startsWith(">")) {// found new header
          if (header != null) {
            addRNA(withStructure, list, header, data, id);
            data.setLength(0);
            id++;

          }
          header = line;
        } else {
          data.append(line);
        }
        line = reader.readLine();
      }
      if (header != null && data.length() > 0) {
        addRNA(withStructure, list, header, data, id);
      }
      data.setLength(0);
      id++;
      reader.close();
    } catch (IOException e) {
    }
    return list;
  }

  public static ArrayList<RNA> loadFromFile(File f, boolean withStructure) {
    FileReader fileReader;
    try {
      fileReader = new FileReader(f);
    } catch (FileNotFoundException e) {
      return new ArrayList<RNA>();
    }
    BufferedReader reader = new BufferedReader(fileReader);
    return loadFromFile(reader, withStructure);
  }

  public static ArrayList<RNA> loadFromFile(String filename,
      boolean withStructure) {
    return loadFromFile(new File(filename), withStructure);
  }

  /**
   * @param withStructure
   * @param list
   * @param header
   * @param data
   * @param id
   */
  private static void addRNA(boolean withStructure, ArrayList<RNA> list,
      String header, StringBuilder data, int id) {
    if (withStructure) {
      list.add(new RNA(id, header.substring(1), data.substring(0,
          data.length() / 2).toUpperCase(), data.substring(data
          .length() / 2)));
    } else {
      list.add(new RNA(id, header.substring(1), data.toString()
          .toUpperCase(), null));
    }
  }

  public static void loadFixAndSave(String filename, String filterIn,
      String filterOut) {
    ArrayList<RNA> list = loadFromFile(filename, true);
    HashSet<String> mapIn = new HashSet<String>();
    HashSet<String> mapOut = new HashSet<String>();
    if (filterOut != null) {
      try {
        FileReader file = new FileReader(new File(filterOut));
        BufferedReader reader = new BufferedReader(file);

        String line = reader.readLine();
        while (line != null) {
          mapOut.add(line.trim().toLowerCase());
          line = reader.readLine();
        }
        reader.close();
      } catch (IOException ex) {
        ex.printStackTrace();
      }
    }
    if (filterIn != null) {
      try {
        FileReader file = new FileReader(new File(filterIn));
        BufferedReader reader = new BufferedReader(file);

        String line = reader.readLine();
        while (line != null) {
          mapIn.add(line.trim().toLowerCase());
          line = reader.readLine();
        }
        reader.close();
      } catch (IOException ex) {
        ex.printStackTrace();
      }
    }
    ArrayList<RNA> list2 = new ArrayList<RNA>();
    for (RNA rna : list) {
      if (mapIn.isEmpty()
          || mapIn.contains(rna.getHeader().toLowerCase())) {
        // change special chars to N's
        rna.removeSpecialChars();
        // remove pseudoknots from structures
        rna.removePseudoknotInformation();
        // fix empty hair pins
        rna.fixEmptyHairpins();
        // trim sides
        rna.removeDanglingNs();

        if (!mapOut.contains(rna.getHeader().toLowerCase())) {
          list2.add(rna);
        }
      }
    }
    // write the list back to the file
    saveToFile(list2, filename);
  }

  public void removeDanglingNs() {
    if (secondary == null) {
      return;
    }
    // left side
    int count = 0;
    int pos = 0;
    while (Character.toUpperCase(primary.charAt(pos)) == 'N'
        && secondary.charAt(pos) == '.') {
      pos++;
      count++;
    }

    if (pos == primary.length())
      return;
    if (count > 3) {
      count -= 3;
      primary = primary.substring(count);
      secondary = secondary.substring(count);
    }
    // right side
    count = 0;
    pos = primary.length() - 1;
    while (Character.toUpperCase(primary.charAt(pos)) == 'N'
        && secondary.charAt(pos) == '.') {
      pos--;
      count++;
    }
    if (count > 3) {
      count -= 3;
      primary = primary.substring(0, primary.length() - count);
      secondary = secondary.substring(0, secondary.length() - count);
    }
  }

  public static void saveToFile(ArrayList<RNA> list, String filename) {
    try {
      FileWriter file = new FileWriter(new File(filename));
      BufferedWriter writer = new BufferedWriter(file);
      for (RNA rna : list) {
        if (rna.getSecondary() != null
            && rna.getPrimary().length() != rna.getSecondary()
                .length()) {
          writer.close();
          throw new RuntimeException("Error in parsing");
        }

        writer.write('>');
        writer.write(rna.getHeader());
        writer.write(newline);
        writer.write(rna.getPrimary());
        if (rna.getSecondary() != null) {
          writer.write(newline);
          writer.write(rna.getSecondary());
        }
        writer.write(newline);
      }
      writer.close();
    } catch (IOException e) {
      e.printStackTrace();
    }
  }
}
TOP

Related Classes of bgu.bio.ds.rna.RNA

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.