/*
* To change this template, choose Tools | Templates
* and open the template in the editor.
*/
package data.cerevisiae.tfpwm;
import data.cerevisiae.tfpwm.ZhuToLandscape.PWMEntry;
import fork.lib.base.collection.NamedTable;
import fork.lib.base.file.FileName;
import fork.lib.base.file.management.Dirs;
import fork.lib.bio.anno.genomic.BedAttribute;
import fork.lib.bio.anno.genomic.BedReader;
import fork.lib.bio.anno.genomic.region.GenomicRegion;
import fork.lib.bio.anno.genomic.region.GenomicRegionsBuilder;
import fork.lib.bio.seq.FastaSequenceExtractor;
import fork.lib.bio.seq.NucleotideSequenceParser;
import fork.lib.bio.seq.SequenceExtractorEntry;
import fork.lib.bio.seq.align.PWM;
import java.io.File;
import java.util.Arrays;
import java.util.HashMap;
/**
*
* @author forksapien
*/
public class ZhuToGene {
public static File dir= Dirs.getFile("dir");
public static File sf= new File(dir+"/anno/genomes/sacCer1/sacCer1.fa");
protected File f;
protected HashMap<String,String> idseq= new HashMap<>();
protected NamedTable<String,String,String> tab= new NamedTable<>();
public ZhuToGene(File f)throws Exception{
this.f=f;
init();
}
protected void init() throws Exception{
GenomicRegionsBuilder gb= new BedReader(f).getGenomicRegionsBuilder();
FastaSequenceExtractor se= new FastaSequenceExtractor(sf, gb);
SequenceExtractorEntry en;
while((en=se.nextEntry())!=null){
GenomicRegion reg= en.getGenomicRegion();
String seq= en.getSequence();
String id= reg.getID();
idseq.put(id , seq);
tab.appendEmptyRow(id);
}
}
public void writeToFile(File out)throws Exception{
out.getParentFile().mkdirs();
tab.setNullValues("0");
tab.writeToFile(out);
}
public void addTF(String tf, PWM mot) throws Exception{
tab.appendEmptyColumn(tf);
String[] ids= new String[idseq.keySet().size()];
idseq.keySet().toArray(ids);
Arrays.sort(ids);
for(int i=0; i<ids.length ; i++){
String id= ids[i];
String seq= idseq.get(id);
String cseq= NucleotideSequenceParser.parseSequence(seq).toString();
double s1= mot.bestAlign(seq);
double s2= mot.bestAlign(cseq);
double score= Math.max(s1, s2);
score= (double)Math.round(score*1000)/1000;
if(score<0){
score=0;
}
String sc= Double.toString(score);
tab.setValueAt(sc, id, tf);
}
}
public static void main(String[] args) throws Exception{ //debug
File dir= Dirs.getFile("dir");
File sf= new File(dir+"/anno/genomes/sacCer1/sacCer1.fa");
File f= new File(dir+"/anno/bed/sacCer1-xu/sacCer1-xu_gene_five_150.0.bed");
File tf= new File(dir+"/data/tf/zhu_2009_yeast-tf.txt");
File od= new File(dir+"/data/tf");
ZhuToGene zg= new ZhuToGene(f);
ZhuToLandscape zz= new ZhuToLandscape(tf);
PWMEntry en;
int ind=0;
while((en=zz.nextEntry())!=null){
String tit= en.tit;
PWM mot= en.pwm;
System.out.println("add: "+ tit);
zg.addTF(tit, mot);
ind++;
//if(ind>5){break;}
}
File of= new File(od+"/all_"+FileName.getBaseName(tf)+"_"+FileName.getBaseName(f)+".txt");
zg.writeToFile(of);
}
}