/*
* To change this license header, choose License Headers in Project Properties.
* To change this template file, choose Tools | Templates
* and open the template in the editor.
*/
package data.cerevisiae.test.compseq;
import fork.lib.base.file.FileName;
import fork.lib.base.file.io.txt.ReadTable;
import fork.lib.base.file.management.Dirs;
import fork.lib.bio.anno.genomic.BedReader;
import fork.lib.bio.anno.genomic.region.DirectionalGenomicRegion;
import fork.lib.bio.anno.genomic.region.GenomicRegion;
import fork.lib.bio.anno.genomic.region.GenomicRegionsBuilder;
import fork.lib.bio.seq.GetSequenceFasta;
import fork.lib.bio.seq.NucleotideSequenceParser;
import fork.lib.math.applied.stat.FrequencyCount;
import java.io.File;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
/**
*
* @author man-mqbpjmg4
*/
public class SeqComposition {
protected File bedf;
protected HashMap<String,GetSequenceFasta> chrGet= new HashMap<>();
protected HashMap<String,GenomicRegion> idReg;
public SeqComposition(File bedf)throws Exception {
this.bedf=bedf;
init();
}
protected void init() throws Exception {
File dir= new File("E:\\muxingu\\mystudy\\phd\\files\\anno\\genomes\\sacCer1");
File[] fs= dir.listFiles();
for( int i=0; i<fs.length; i++ ){
File f= fs[i];
String fn= FileName.getBaseName(f);
if(fn.indexOf("chr")==0){
chrGet.put(fn, new GetSequenceFasta(f));;
}
}
idReg= new BedReader(bedf).getIDToRegionMap();
}
public void printOutput(ArrayList<String> ids)throws Exception {
FrequencyCount<Character> fc= new FrequencyCount<>();
for( int i=0; i<ids.size(); i++ ){
String id = ids.get(i);
DirectionalGenomicRegion gr= (DirectionalGenomicRegion) idReg.get(id);
if(gr==null){
continue;
}
GetSequenceFasta gs= chrGet.get(gr.chr);
if(gs!=null){
String seq= gs.getSequence(gr);
if(gr.isOnReverseStrand()){
seq= NucleotideSequenceParser.parseSequence(seq).complementary().toString();
}
for( int j=0; j<seq.length(); j++ ){
fc.add(seq.charAt(j));
}
}
}
int tot= fc.totalCounts();
Object[] ks= fc.getSortedKeys();
for( int i=0; i<ks.length; i++ ){
Object k= ks[i];
int c= fc.getFrequencyCounts().get(k);
System.out.println(" "+k+" "+ (double)Math.round((double)c/tot*1000)/1000+" "+ c);
}
System.out.println();
}
public static void main(String[] args) throws Exception { //debug
File dir= Dirs.getFile("dir");
File bed= new File(dir+"/anno/bed/sacCer1-xu/sacCer1-xu_gene_five_0.150.bed");
SeqComposition cc= new SeqComposition(bed);
File d= new File("E:\\muxingu\\mystudy\\phd\\progs\\PhdProject\\plot");
//cc.printOutput(new ReadTable(new File(d+"/xxx_cz5.txt")).getTable().getColumn(0));
//cc.printOutput(new ReadTable(new File(d+"/xxx_cnz5.txt")).getTable().getColumn(0));
cc.printOutput(new ReadTable(bed).getTable().getColumn(3));
cc= new SeqComposition(new File(dir+"/anno/bed/sacCer1-xu/sacCer1-xu_gene_cds_300.300.bed"));
cc.printOutput(new ReadTable(bed).getTable().getColumn(3));
}
}