/*
* To change this template, choose Tools | Templates
* and open the template in the editor.
*/
package data.cerevisiae.region;
import fork.lib.base.file.io.txt.ReadTable;
import fork.lib.bio.anno.genomic.region.DirectionalGenomicRegion;
import fork.lib.bio.anno.genomic.region.GenomicRegionsBuilder;
import fork.lib.math.algebra.elementary.set.continuous.RegionException;
import fork.lib.math.algebra.elementary.set.continuous.Region;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
/**
*
* @author forksapien
*/
public class XuAnnoReader {
public static String CUT= "CUTs";
public static String SUT= "SUTs";
public static String ORF= "ORF-T";
public static String OTHER= "other";
protected File f;
public HashMap<String, GenomicRegionsBuilder> hm;
protected HashSet<String> allids= new HashSet<>();
public XuAnnoReader(File f) throws IOException, RegionException{
this.f=f;
init();
}
protected void init() throws IOException, RegionException{
hm= new HashMap<>();
ReadTable rt= new ReadTable(f);
rt.param().setSkipRows(0);
String[][] arr= rt.getTableAsArray();
for( int i=0; i<arr.length ; i++ ){
String[] r= arr[i];
r[6]= r[6].replaceAll(" ", "");
String id= r[6].split(",")[0];
String type= r[5];
if(!allids.contains(id)){
DirectionalGenomicRegion reg= new DirectionalGenomicRegion(
"chr"+r[1], r[2].charAt(0), Integer.parseInt(r[3]), Integer.parseInt(r[4])
);
reg.attr= id;
if(!hm.containsKey(type)){
hm.put(type, new GenomicRegionsBuilder());
}
hm.get(type).add(reg);
allids.add(id);
}
}
Iterator<String> it= hm.keySet().iterator();
while(it.hasNext()){
hm.get(it.next()).sortAll();
}
}
public GenomicRegionsBuilder getCUTs(){
return hm.get(CUT);
}
public GenomicRegionsBuilder getSUTs(){
return hm.get(SUT);
}
public GenomicRegionsBuilder getORFs(){
return hm.get(ORF);
}
public GenomicRegionsBuilder getOthers(){
return hm.get(OTHER);
}
public GenomicRegionsBuilder getAll(){
GenomicRegionsBuilder gb= new GenomicRegionsBuilder();
gb.addAll(getSUTs());
gb.addAll(getCUTs());
gb.addAll(getORFs());
gb.addAll(getOthers());
gb.sortAll();
return gb;
}
public static void main(String[] args) throws Exception {
File dir= new File("/home/forksapien/mystudy/phd/files");
XuAnnoReader xr= new XuAnnoReader(new File(dir+"/anno/raw/Xu_2009_transcripts.txt"));
xr.getORFs().writeToFileBed(new File(dir+"/anno/xu_2009_orfs.bed"));
}
}