/*
* To change this license header, choose License Headers in Project Properties.
* To change this template file, choose Tools | Templates
* and open the template in the editor.
*/
package data.cerevisiae.lee2007.pref;
import fork.lib.base.file.management.Dirs;
import fork.lib.bio.anno.genomic.BedAttribute;
import fork.lib.bio.anno.genomic.BedExporter;
import fork.lib.bio.anno.genomic.BedReader;
import fork.lib.bio.anno.genomic.region.GenomicRegion;
import fork.lib.bio.anno.genomic.region.GenomicRegionsBuilder;
import fork.lib.math.applied.stat.Distribution;
import java.io.File;
import java.util.ArrayList;
import java.util.Iterator;
/**
*
* @author man-mqbpjmg4
*/
public class Bin {
protected GenomicRegionsBuilder gb;
protected Distribution dis= new Distribution();
protected int binn;
public Bin(GenomicRegionsBuilder gb, int binn)throws Exception {
this.gb=gb;
this.binn= binn;
init();
}
protected void init() throws Exception {
Iterator<GenomicRegion> it= gb.iterator();
while(it.hasNext()){
GenomicRegion gr= it.next();
double v= gr.getValue();
dis.add(v);
}
}
public void writeToDir(File od)throws Exception {
od.mkdirs();
ArrayList<Double> bs= dis.quantileBoundaries(binn);
ArrayList<GenomicRegionsBuilder> gbouts= new ArrayList<>();
for( int i=0; i<binn; i++ ){
gbouts.add(new GenomicRegionsBuilder());
}
Iterator<GenomicRegion> it= gb.iterator();
while(it.hasNext()){
GenomicRegion gr= it.next();
double v= gr.getValue() ;
for( int i=0; i<bs.size()-1; i++ ){
if( v>=bs.get(i) && v<bs.get(i+1) ){
gbouts.get(i).add(gr);
break;
}
}
}
for( int i=0; i<gbouts.size(); i++ ){
File of= new File(od+"/bin_"+i+".bed");
new BedExporter(gbouts.get(i)).writeToFile(of);
}
}
public static void main(String[] args) throws Exception { //debug
File dir= Dirs.getFile("dir");
File d = new File(dir+"/other_datasets/lee_2007_nucleosome/pref");
File f= new File(d+"/score_non-ol_10_sacCer1_chr.bed");
GenomicRegionsBuilder gb= new BedReader(f).getGenomicRegionsBuilder();
int binn = 10;
Bin bb= new Bin(gb, binn);
bb.writeToDir(d);
}
}