package bgu.bio.ds.rna;
import gnu.trove.stack.array.TIntArrayStack;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.io.Writer;
import java.net.URL;
import java.net.URLConnection;
import java.net.URLEncoder;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashSet;
import bgu.bio.adt.tuples.IntPair;
import bgu.bio.adt.tuples.IntPairComparator;
import bgu.bio.io.file.json.JSONException;
import bgu.bio.io.file.json.JSONObject;
/**
* @author milon
*
*/
public class RNA {
private static String newline = System.getProperty("line.separator");
private final int id;
private String header;
private String primary;
private String secondary;
private static final char[][] possibleBracketsInStructure = new char[][] {
{ '(', ')' }, { '[', ']' }, { '<', '>' }, { '{', '}' } };
public RNA(int id, String header, String primary, String secondary) {
super();
this.id = id;
this.primary = primary;
this.secondary = secondary;
this.header = header;
}
public RNA(int id, String header, String primary) {
super();
this.id = id;
this.primary = primary;
this.header = header;
}
public RNA(int id) {
this.id = id;
}
public String getPrimary() {
return primary;
}
public void setPrimary(String primary) {
this.primary = primary;
}
public String getSecondary() {
return secondary;
}
public void setSecondary(String secondary) {
this.secondary = secondary;
}
public int getId() {
return id;
}
public void fixEmptyHairpins() {
if (secondary == null)
return;
int pos = secondary.indexOf("()");
while (pos >= 0) {
secondary = secondary.substring(0, pos + 1) + "..."
+ secondary.substring(pos + 1);
primary = primary.substring(0, pos + 1) + "NNN"
+ primary.substring(pos + 1);
pos = secondary.indexOf("()", pos);
}
}
@Override
public String toString() {
return "RNA [header=" + header + ", id=" + id + ", primary=" + primary
+ ", secondary=" + secondary + "]";
}
public void getEMBLData() {
try {
// Construct data
String data = URLEncoder.encode("db", "UTF-8") + "="
+ URLEncoder.encode("EMBL", "UTF-8");
data += "&" + URLEncoder.encode("id", "UTF-8") + "="
+ URLEncoder.encode(header, "UTF-8");
data += "&" + URLEncoder.encode("format", "UTF-8") + "="
+ URLEncoder.encode("embl", "UTF-8");
data += "&" + URLEncoder.encode("style", "UTF-8") + "="
+ URLEncoder.encode("raw", "UTF-8");
// Send data
URL url = new URL("http://www.ebi.ac.uk/cgi-bin/dbfetch");
URLConnection conn = url.openConnection();
conn.setDoOutput(true);
OutputStreamWriter wr = new OutputStreamWriter(
conn.getOutputStream());
wr.write(data);
wr.flush();
// Get the response
BufferedReader rd = new BufferedReader(new InputStreamReader(
conn.getInputStream()));
StringBuilder sB = new StringBuilder();
String line;
while ((line = rd.readLine()) != null) {
if (line.contains("OS"))
sB.append(line + "\n");
}
System.out.println(sB.toString());
wr.close();
rd.close();
} catch (Exception e) {
}
}
public String toJSON() {
StringBuilder builder = new StringBuilder();
builder.append("{\"header\":\"");
builder.append(this.header);
builder.append('\"');
builder.append(",\"sequence\":\"");
builder.append(this.primary);
builder.append('\"');
builder.append(",\"structure\":\"");
builder.append(this.secondary);
builder.append("\"}");
return builder.toString();
}
public String toFASTA() {
StringBuilder builder = new StringBuilder();
builder.append('>');
builder.append(this.header);
builder.append('\n');
builder.append(this.primary);
if (this.secondary != null) {
builder.append(this.secondary);
}
return builder.toString();
}
public String getHeader() {
return header;
}
public void setHeader(String header) {
this.header = header;
}
public void removeSpecialChars() {
StringBuilder builder = new StringBuilder(primary.length());
for (int i = 0; i < this.primary.length(); i++) {
final char c = Character.toUpperCase(primary.charAt(i));
if (c == 'A' || c == 'U' || c == 'T' || c == 'G' || c == 'C') {
builder.append(primary.charAt(i));
} else {
builder.append('N');
}
}
this.primary = builder.toString();
}
public void removePseudoknotInformation() {
if (secondary == null) {
return;
}
StringBuilder builder = new StringBuilder(secondary.length());
for (int i = 0; i < this.secondary.length(); i++) {
final char c = secondary.charAt(i);
if (c == '(' || c == ')') {
builder.append(c);
} else {
builder.append('.');
}
}
this.secondary = builder.toString();
}
public boolean validateStructure() {
// check that structure is given
if (this.secondary == null) {
throw new UnsupportedOperationException(
"Can't extract pairs if structure is not given");
}
ArrayList<IntPair> ans = new ArrayList<IntPair>();
TIntArrayStack stack = new TIntArrayStack(secondary.length() / 2);
// run on all types of brackets
for (int b = 0; b < possibleBracketsInStructure.length; b++) {
final char openBracket = possibleBracketsInStructure[b][0];
final char closeBracket = possibleBracketsInStructure[b][1];
stack.clear();
for (int i = 0; i < secondary.length(); i++) {
if (secondary.charAt(i) == openBracket) {
stack.push(i);
} else if (secondary.charAt(i) == closeBracket) {
if (stack.size() == 0) {
return false;
}
final int start = stack.pop();
ans.add(new IntPair(start, i));
}
}
if (stack.size() != 0) {
return false;
}
}
return true;
}
public ArrayList<IntPair> extractPairs() {
// check that structure is given
if (this.secondary == null) {
throw new UnsupportedOperationException(
"Can't extract pairs if structure is not given");
}
ArrayList<IntPair> ans = new ArrayList<IntPair>();
TIntArrayStack stack = new TIntArrayStack(secondary.length() / 2);
// run on all types of brackets
for (int b = 0; b < possibleBracketsInStructure.length; b++) {
final char openBracket = possibleBracketsInStructure[b][0];
final char closeBracket = possibleBracketsInStructure[b][1];
stack.clear();
for (int i = 0; i < secondary.length(); i++) {
if (secondary.charAt(i) == openBracket) {
stack.push(i);
} else if (secondary.charAt(i) == closeBracket) {
final int start = stack.pop();
ans.add(new IntPair(start, i));
}
}
}
return ans;
}
public ArrayList<RNA> splitToStems(int maxGapSize) {
// get the pairs in the structure
ArrayList<IntPair> pairs = this.extractPairs();
Collections.sort(pairs, new IntPairComparator());
ArrayList<ArrayList<IntPair>> stackings = new ArrayList<ArrayList<IntPair>>();
for (int p = 0; p < pairs.size(); p++) {
IntPair current = pairs.get(p);
// check all stackings in data
boolean found = false;
for (int s = stackings.size() - 1; s >= 0 && !found; s--) {
ArrayList<IntPair> currentStacking = stackings.get(s);
if (isExtends(currentStacking, current, maxGapSize)) {
currentStacking.add(current);
found = true;
}
}
// if didn't find in any stacking add new
if (!found) {
ArrayList<IntPair> stack = new ArrayList<IntPair>();
stack.add(current);
stackings.add(stack);
}
}
return convertToRNA(stackings);
}
private ArrayList<RNA> convertToRNA(ArrayList<ArrayList<IntPair>> stackings) {
ArrayList<RNA> ans = new ArrayList<RNA>();
for (int s = 0; s < stackings.size(); s++) {
ArrayList<IntPair> current = stackings.get(s);
// size is the length of the most external bp distance minus the
// most inner bp distance
final int size = current.get(0).getSecond()
- current.get(0).getFirst() + 1;
StringBuilder structure = new StringBuilder(size);
for (int i = 0; i < size; i++) {
structure.append('.');
}
String sequence = this.primary.substring(current.get(0).getFirst(),
current.get(0).getSecond() + 1);
for (int i = 0; i < current.size(); i++) {
IntPair pair = current.get(i);
structure.setCharAt(
pair.getFirst() - current.get(0).getFirst(), '(');
structure.setCharAt(pair.getSecond()
- current.get(0).getFirst(), ')');
}
String newHeader = this.header;
try {
JSONObject json = new JSONObject();
json.put("header", this.header.substring(1).trim());
json.put("start", current.get(0).getFirst());
newHeader = json.toString();
} catch (JSONException ex) {
}
ans.add(new RNA(ans.size(), newHeader, sequence, structure
.toString()));
}
return ans;
}
private boolean isExtends(ArrayList<IntPair> currentStacking,
IntPair current, int maxGapSize) {
final IntPair edgeOfStack = currentStacking
.get(currentStacking.size() - 1);
return (current.getFirst() - edgeOfStack.getFirst() - 1 <= maxGapSize
&& current.getFirst() - edgeOfStack.getFirst() > 0
&& edgeOfStack.getSecond() - current.getSecond() - 1 <= maxGapSize && edgeOfStack
.getSecond() - current.getSecond() > 0);
}
public void saveToFASTA(Writer writer) throws IOException {
writer.write('>');
writer.write(this.header);
writer.write('\n');
writer.write(this.primary);
writer.write('\n');
if (this.secondary != null && !this.secondary.trim().equals("")) {
writer.write(this.secondary);
writer.write('\n');
}
}
public void saveToFASTA(String fileName) {
try {
FileWriter file = new FileWriter(new File(fileName));
BufferedWriter writer = new BufferedWriter(file);
this.saveToFASTA(writer);
writer.close();
} catch (IOException ex) {
}
}
public static ArrayList<RNA> loadFromFile(BufferedReader reader,
boolean withStructure) {
ArrayList<RNA> list = new ArrayList<RNA>();
try {
String line = reader.readLine();
String header = null;
StringBuilder data = new StringBuilder();
int id = 0;
while (line != null) {
line = line.trim();
if (line.startsWith(">")) {// found new header
if (header != null) {
addRNA(withStructure, list, header, data, id);
data.setLength(0);
id++;
}
header = line;
} else {
data.append(line);
}
line = reader.readLine();
}
if (header != null && data.length() > 0) {
addRNA(withStructure, list, header, data, id);
}
data.setLength(0);
id++;
reader.close();
} catch (IOException e) {
}
return list;
}
public static ArrayList<RNA> loadFromFile(File f, boolean withStructure) {
FileReader fileReader;
try {
fileReader = new FileReader(f);
} catch (FileNotFoundException e) {
return new ArrayList<RNA>();
}
BufferedReader reader = new BufferedReader(fileReader);
return loadFromFile(reader, withStructure);
}
public static ArrayList<RNA> loadFromFile(String filename,
boolean withStructure) {
return loadFromFile(new File(filename), withStructure);
}
/**
* @param withStructure
* @param list
* @param header
* @param data
* @param id
*/
private static void addRNA(boolean withStructure, ArrayList<RNA> list,
String header, StringBuilder data, int id) {
if (withStructure) {
list.add(new RNA(id, header.substring(1), data.substring(0,
data.length() / 2).toUpperCase(), data.substring(data
.length() / 2)));
} else {
list.add(new RNA(id, header.substring(1), data.toString()
.toUpperCase(), null));
}
}
public static void loadFixAndSave(String filename, String filterIn,
String filterOut) {
ArrayList<RNA> list = loadFromFile(filename, true);
HashSet<String> mapIn = new HashSet<String>();
HashSet<String> mapOut = new HashSet<String>();
if (filterOut != null) {
try {
FileReader file = new FileReader(new File(filterOut));
BufferedReader reader = new BufferedReader(file);
String line = reader.readLine();
while (line != null) {
mapOut.add(line.trim().toLowerCase());
line = reader.readLine();
}
reader.close();
} catch (IOException ex) {
ex.printStackTrace();
}
}
if (filterIn != null) {
try {
FileReader file = new FileReader(new File(filterIn));
BufferedReader reader = new BufferedReader(file);
String line = reader.readLine();
while (line != null) {
mapIn.add(line.trim().toLowerCase());
line = reader.readLine();
}
reader.close();
} catch (IOException ex) {
ex.printStackTrace();
}
}
ArrayList<RNA> list2 = new ArrayList<RNA>();
for (RNA rna : list) {
if (mapIn.isEmpty()
|| mapIn.contains(rna.getHeader().toLowerCase())) {
// change special chars to N's
rna.removeSpecialChars();
// remove pseudoknots from structures
rna.removePseudoknotInformation();
// fix empty hair pins
rna.fixEmptyHairpins();
// trim sides
rna.removeDanglingNs();
if (!mapOut.contains(rna.getHeader().toLowerCase())) {
list2.add(rna);
}
}
}
// write the list back to the file
saveToFile(list2, filename);
}
public void removeDanglingNs() {
if (secondary == null) {
return;
}
// left side
int count = 0;
int pos = 0;
while (Character.toUpperCase(primary.charAt(pos)) == 'N'
&& secondary.charAt(pos) == '.') {
pos++;
count++;
}
if (pos == primary.length())
return;
if (count > 3) {
count -= 3;
primary = primary.substring(count);
secondary = secondary.substring(count);
}
// right side
count = 0;
pos = primary.length() - 1;
while (Character.toUpperCase(primary.charAt(pos)) == 'N'
&& secondary.charAt(pos) == '.') {
pos--;
count++;
}
if (count > 3) {
count -= 3;
primary = primary.substring(0, primary.length() - count);
secondary = secondary.substring(0, secondary.length() - count);
}
}
public static void saveToFile(ArrayList<RNA> list, String filename) {
try {
FileWriter file = new FileWriter(new File(filename));
BufferedWriter writer = new BufferedWriter(file);
for (RNA rna : list) {
if (rna.getSecondary() != null
&& rna.getPrimary().length() != rna.getSecondary()
.length()) {
writer.close();
throw new RuntimeException("Error in parsing");
}
writer.write('>');
writer.write(rna.getHeader());
writer.write(newline);
writer.write(rna.getPrimary());
if (rna.getSecondary() != null) {
writer.write(newline);
writer.write(rna.getSecondary());
}
writer.write(newline);
}
writer.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}