Package data.cerevisiae.tfpwm

Source Code of data.cerevisiae.tfpwm.ZhuToGene

/*
* To change this template, choose Tools | Templates
* and open the template in the editor.
*/
package data.cerevisiae.tfpwm;

import data.cerevisiae.tfpwm.ZhuToLandscape.PWMEntry;
import fork.lib.base.collection.NamedTable;
import fork.lib.base.file.FileName;
import fork.lib.base.file.management.Dirs;
import fork.lib.bio.anno.genomic.BedAttribute;
import fork.lib.bio.anno.genomic.BedReader;
import fork.lib.bio.anno.genomic.region.GenomicRegion;
import fork.lib.bio.anno.genomic.region.GenomicRegionsBuilder;
import fork.lib.bio.seq.FastaSequenceExtractor;
import fork.lib.bio.seq.NucleotideSequenceParser;
import fork.lib.bio.seq.SequenceExtractorEntry;
import fork.lib.bio.seq.align.PWM;
import java.io.File;
import java.util.Arrays;
import java.util.HashMap;

/**
*
* @author forksapien
*/
public class ZhuToGene {
   
   
public static File dir= Dirs.getFile("dir");
public static File sf= new File(dir+"/anno/genomes/sacCer1/sacCer1.fa");
   
protected File f;
protected HashMap<String,String> idseq= new HashMap<>();
protected NamedTable<String,String,String> tab= new NamedTable<>();
   
   
    public ZhuToGene(File f)throws Exception{
        this.f=f;
        init();
    }
   
   
protected void init() throws Exception{
    GenomicRegionsBuilder gb= new BedReader(f).getGenomicRegionsBuilder();
    FastaSequenceExtractor se= new FastaSequenceExtractor(sf, gb);
    SequenceExtractorEntry en;
    while((en=se.nextEntry())!=null){
        GenomicRegion reg= en.getGenomicRegion();
        String seq= en.getSequence();
        String id= reg.getID();
        idseq.put(id , seq);
        tab.appendEmptyRow(id);
    }
}
   
   
public void writeToFile(File out)throws Exception{
    out.getParentFile().mkdirs();
    tab.setNullValues("0");
    tab.writeToFile(out);
}



public void addTF(String tf, PWM mot) throws Exception{
    tab.appendEmptyColumn(tf);
    String[] ids= new String[idseq.keySet().size()];
    idseq.keySet().toArray(ids);
    Arrays.sort(ids);
    for(int i=0; i<ids.length ; i++){
        String id= ids[i];
        String seq= idseq.get(id);
        String cseq= NucleotideSequenceParser.parseSequence(seq).toString();
       
        double s1= mot.bestAlign(seq);
        double s2= mot.bestAlign(cseq);
        double score= Math.max(s1, s2);
        score= (double)Math.round(score*1000)/1000;
        if(score<0){
            score=0;
        }
        String sc= Double.toString(score);
        tab.setValueAt(sc, id, tf);
    }
}
   
   
   
   
public static void main(String[] args) throws Exception{ //debug
    File dir= Dirs.getFile("dir");
    File sf= new File(dir+"/anno/genomes/sacCer1/sacCer1.fa");
    File f= new File(dir+"/anno/bed/sacCer1-xu/sacCer1-xu_gene_five_150.0.bed");
    File tf= new File(dir+"/data/tf/zhu_2009_yeast-tf.txt");
   
   
    File od= new File(dir+"/data/tf");
   
    ZhuToGene zg= new ZhuToGene(f);
    ZhuToLandscape zz= new ZhuToLandscape(tf);
    PWMEntry en;
    int ind=0;
    while((en=zz.nextEntry())!=null){
        String tit= en.tit;
        PWM mot= en.pwm;
        System.out.println("add: "+ tit);
       
        zg.addTF(tit, mot);
        ind++;
        //if(ind>5){break;}
    }
   
    File of= new File(od+"/all_"+FileName.getBaseName(tf)+"_"+FileName.getBaseName(f)+".txt");
    zg.writeToFile(of);
   
}   
   
   
   
}
TOP

Related Classes of data.cerevisiae.tfpwm.ZhuToGene

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.