Package data.cerevisiae.test.regseq

Source Code of data.cerevisiae.test.regseq.FindMotif

/*
* To change this license header, choose License Headers in Project Properties.
* To change this template file, choose Tools | Templates
* and open the template in the editor.
*/

package data.cerevisiae.test.regseq;

import fork.lib.base.file.FileName;
import fork.lib.base.file.management.Dirs;
import fork.lib.bio.seq.FastaEntry;
import fork.lib.bio.seq.FastaReader;
import fork.lib.math.algebra.Algebra1D;
import fork.lib.math.applied.stat.FrequencyCount;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileWriter;
import java.util.HashMap;

/**
*
* @author man-mqbpjmg4
*/
public class FindMotif {

   
   


public static void main(String[] args) throws Exception { //debug
    File dir= Dirs.getFile("dir");
    File genomef= new File(dir+"/anno/genomes/sacCer1/sacCer1.fa");
   

    File d= new File(dir+"/test_anal\\z3seq\\quant_z3as\\3prime_up");
    //File d= new File(dir+"/test_anal\\z3seq\\quant_z3\\3prime_up");
   
    int size= 1;
   
   
    FrequencyCount<String> fcg= new FrequencyCount();
    FastaReader frg= new FastaReader(genomef);
    FastaEntry eng;
    while( (eng=frg.nextEntry())!=null ){
        String s= eng.getSequence();
        for( int i=0; i<s.length()-size; i++ ){
            String ss= s.substring(i, i+size);
            fcg.add(ss);
        }
    }
    Object[] ks= fcg.getSortedKeys();
   
   
    File[] fs= d.listFiles();
    for( int j=0; j<fs.length; j++ ){
        File f= fs[j];
        if(FileName.getExt(f).equals("fasta")){
            FastaReader fr= new FastaReader(f);
            FastaEntry en;
            FrequencyCount<String> fc= new FrequencyCount<>();


            while( (en=fr.nextEntry())!=null ){
                String s= en.getSequence();
                for( int i=0; i<s.length()-size; i++ ){
                    String ss= s.substring(i, i+size);
                    fc.add(ss);
                }
            }
            HashMap<String, Integer> kvs= fc.getFrequencyCounts();
           

            File od= new File(d+"/"+size);
            od.mkdirs();
            double[] vs= new double[fcg.keySize()];
            double[] gvs= new double[fcg.keySize()];
            double[] nvs= new double[fcg.keySize()];
            for( int i=0; i<ks.length; i++ ){
                String k = ks[i].toString();
                double v= kvs.containsKey(k) ?
                        (double) Math.round( (double) kvs.get(k)/ fc.totalCounts() * ks.length*1000 ) / 1000
                        : 0d ;
                double gv= (double) Math.round((double)fcg.getFrequencyCounts().get(k) / fcg.totalCounts() * fcg.keySize() *1000 )/1000;
                double nv= (double) Math.round(v/gv *1000)/1000;
                vs[i]= v;
                gvs[i]= gv;
                nvs[i]= nv;
            }
            int[] sinds= Algebra1D.getIndexForOrigin(nvs);
            File of= new File(od+"/motif_"+FileName.getBaseName(f)+".txt");
            BufferedWriter bw= new BufferedWriter(new FileWriter(of));
            for( int i=0; i<sinds.length; i++ ){
                int ind= sinds[sinds.length-1-i];
                bw.write(ks[ind]+"\t"+vs[ind]+"\t"+gvs[ind]+"\t"+nvs[ind]+"\n");
            }
            bw.close();
        }
       
    }
}



   
   
}
TOP

Related Classes of data.cerevisiae.test.regseq.FindMotif

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.