/*
* To change this license header, choose License Headers in Project Properties.
* To change this template file, choose Tools | Templates
* and open the template in the editor.
*/
package data.cerevisiae.test.regseq;
import fork.lib.base.file.FileName;
import fork.lib.base.file.management.Dirs;
import fork.lib.bio.seq.FastaEntry;
import fork.lib.bio.seq.FastaReader;
import fork.lib.math.algebra.Algebra1D;
import fork.lib.math.applied.stat.FrequencyCount;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileWriter;
import java.util.HashMap;
/**
*
* @author man-mqbpjmg4
*/
public class FindMotif {
public static void main(String[] args) throws Exception { //debug
File dir= Dirs.getFile("dir");
File genomef= new File(dir+"/anno/genomes/sacCer1/sacCer1.fa");
File d= new File(dir+"/test_anal\\z3seq\\quant_z3as\\3prime_up");
//File d= new File(dir+"/test_anal\\z3seq\\quant_z3\\3prime_up");
int size= 1;
FrequencyCount<String> fcg= new FrequencyCount();
FastaReader frg= new FastaReader(genomef);
FastaEntry eng;
while( (eng=frg.nextEntry())!=null ){
String s= eng.getSequence();
for( int i=0; i<s.length()-size; i++ ){
String ss= s.substring(i, i+size);
fcg.add(ss);
}
}
Object[] ks= fcg.getSortedKeys();
File[] fs= d.listFiles();
for( int j=0; j<fs.length; j++ ){
File f= fs[j];
if(FileName.getExt(f).equals("fasta")){
FastaReader fr= new FastaReader(f);
FastaEntry en;
FrequencyCount<String> fc= new FrequencyCount<>();
while( (en=fr.nextEntry())!=null ){
String s= en.getSequence();
for( int i=0; i<s.length()-size; i++ ){
String ss= s.substring(i, i+size);
fc.add(ss);
}
}
HashMap<String, Integer> kvs= fc.getFrequencyCounts();
File od= new File(d+"/"+size);
od.mkdirs();
double[] vs= new double[fcg.keySize()];
double[] gvs= new double[fcg.keySize()];
double[] nvs= new double[fcg.keySize()];
for( int i=0; i<ks.length; i++ ){
String k = ks[i].toString();
double v= kvs.containsKey(k) ?
(double) Math.round( (double) kvs.get(k)/ fc.totalCounts() * ks.length*1000 ) / 1000
: 0d ;
double gv= (double) Math.round((double)fcg.getFrequencyCounts().get(k) / fcg.totalCounts() * fcg.keySize() *1000 )/1000;
double nv= (double) Math.round(v/gv *1000)/1000;
vs[i]= v;
gvs[i]= gv;
nvs[i]= nv;
}
int[] sinds= Algebra1D.getIndexForOrigin(nvs);
File of= new File(od+"/motif_"+FileName.getBaseName(f)+".txt");
BufferedWriter bw= new BufferedWriter(new FileWriter(of));
for( int i=0; i<sinds.length; i++ ){
int ind= sinds[sinds.length-1-i];
bw.write(ks[ind]+"\t"+vs[ind]+"\t"+gvs[ind]+"\t"+nvs[ind]+"\n");
}
bw.close();
}
}
}
}