/*
* To change this license header, choose License Headers in Project Properties.
* To change this template file, choose Tools | Templates
* and open the template in the editor.
*/
package data.human.statham2014mods;
import data.cerevisiae.inhouse.a_norm.NormChipSeqRPM;
import fork.lib.base.file.FileName;
import fork.lib.base.file.management.Dirs;
import fork.lib.bio.anno.genomic.BedGraphBuffer;
import fork.lib.bio.anno.genomic.BedGraphExporter;
import fork.lib.bio.anno.genomic.BedGraphReader;
import fork.lib.bio.anno.genomic.LandscapeBuffer;
import fork.lib.bio.anno.genomic.LandscapeBuilder;
import fork.lib.bio.anno.genomic.LandscapeTransformer;
import fork.lib.math.algebra.elementary.set.continuous.Region;
import fork.lib.math.applied.buffer.RegionBuffer;
import fork.lib.math.applied.stat.Distribution;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileWriter;
import java.io.InputStreamReader;
import java.util.ArrayList;
/**
*
* @author man-mqbpjmg4
*/
public class NormTest {
protected File f, in, od, outRat, outSub, outSub2, out;
public NormTest(File f, File in, File od)throws Exception {
this.f=f;
this.in=in;
this.od=od;
init();
}
protected void init() throws Exception {
File odt= new File(od+"/subtract");
odt.mkdirs();
outRat= new File(odt+"/ratio_"+FileName.getBaseName(f)+".wig");
outSub= new File(odt+"/subtract_"+FileName.getBaseName(f)+".wig");
outSub2= new File(odt+"/subtract2_"+FileName.getBaseName(f)+".wig");
out= new File(od+"/norm_"+FileName.getBaseName(f)+".wig");
}
public void toRatio()throws Exception {
NormChipSeqRPM.toRatio(f, in, outRat);
}
public void subtract()throws Exception {
Distribution dis0 = new BedGraphBuffer(outRat).getDistributionNonZero();
Distribution dis= dis0.subsetHigherThan(1);
dis= dis.subsetLowerOrEqualTo(dis.quantileBoundaries(10).get(5));
double mean= dis.mean();
double sd= dis.standartDeviation();
double rat= mean+ 1.2 *sd;
System.out.println("m: "+ mean+" sd: "+sd+" rat: "+rat);
//double rat= 2.5;
/*
while(true){
try{
String l= new BufferedReader(new InputStreamReader(System.in)).readLine();
String[] ss= l.split(" ");
int n= Integer.parseInt(ss[0]);
int tar= Integer.parseInt(ss[1]);
if(n==-1){
break;
}
ArrayList<Double> qs= dis.quantileBoundaries(n);
Distribution ndis= dis.subsetLowerOrEqualTo(qs.get(tar));
System.out.println("mean: "+ ndis.mean()+" sd: "+ ndis.standartDeviation());
}catch(Exception e){}
}
*/
LandscapeBuffer lbf= new BedGraphBuffer(f);
LandscapeBuffer lbi= new BedGraphBuffer(in);
BufferedWriter bw= new BufferedWriter(new FileWriter(outSub));
BedGraphExporter.writeTitle(bw, FileName.getBaseName(outSub));
String[] chrs= lbf.getUnsortedChromosomeList();
for( int i=0; i<chrs.length; i++ ){
LandscapeBuilder lbout= new LandscapeBuilder();
String chr= chrs[i];
System.out.print(chr+" ");
ArrayList<Region> frs= lbf.getLandscape2DForChromosome(chr).getRegions();
ArrayList<Region> irs= lbi.getLandscape2DForChromosome(chr).getRegions();
RegionBuffer buf= new RegionBuffer(irs);
for( int j=0; j<frs.size(); j++ ){
Region fr= frs.get(j);
ArrayList<Region> bufrs= buf.reloadAndGetList(fr);
double ival= RegionBuffer.getOverlap(bufrs, fr) / (fr.getRange()+1);
double fval= (double)fr.attribute();
double nv= fval - ival*rat;
//System.out.println(fval+" "+ ival+" "+ fr.getRange());
nv= nv<0 ? 0 : nv;
fr.setAttribute(nv);
lbout.add(chr, fr);
}
BedGraphExporter be= new BedGraphExporter(lbout);
be.appendToFile(bw);
}
bw.close();
System.out.println();
}
public void subtract2()throws Exception {
Distribution dis= new BedGraphBuffer(outSub).getDistributionNonZero();
dis= dis.subsetLowerThan(dis.quantileBoundaries(10).get(7));
double med= dis.median();
double sd= dis.standartDeviation();
double thr= med+ sd* 2;
System.out.println("med: "+ med+" sd: "+ sd+" "+ thr);
BufferedWriter bw= new BufferedWriter(new FileWriter(outSub2));
BedGraphExporter.writeTitle(bw, FileName.getBaseName(outSub2));
LandscapeBuffer lbb= new BedGraphBuffer(f);
String[] chrs= lbb.getUnsortedChromosomeList();
for( int k=0; k<chrs.length ; k++ ){
String chr = chrs[k];
System.out.print(chr+" ");
ArrayList<Region> rs= lbb.getLandscape2DForChromosome(chr).getRegions();
for( int i=1; i<rs.size() ; i++ ){
Region r= rs.get(i);
double val= Double.parseDouble(r.attribute().toString());
if(val>thr){
double v= val-thr;
bw.write(chr+"\t"+ (int)r.low+"\t"+ ((int)r.high+1)+"\t"+ (int)v+"\n");
}
}
}
System.out.println();
bw.close();
}
public void removeShort()throws Exception {
LandscapeBuilder lb= new BedGraphReader(outSub2).getLandscapeBuilder();
LandscapeTransformer lt= new LandscapeTransformer(lb);
lt.selectLength(150, Integer.MAX_VALUE);
double sum= lb.totalArea();
double fac= sum/Math.pow(10, 8);
lt.divideBy( fac );
new BedGraphExporter(lb).writeToFile(out);
}
public static void main(String[] args) throws Exception { //debug
File dir= Dirs.getFile("dir");
File d= new File(dir+"/other_datasets/statham2014_mcf7-mods");
File od= new File(d+"/norm");
File in= new File(d+"/wig_mcf7_input_SRR1282223_hg19.wig");
File[] fs= d.listFiles();
//for( int i=0; i<fs.length; i++ ){
for( int i=0; i<2; i++ ){
File f= fs[i];
if(FileName.getExt(f).equals("wig") && !f.getName().contains("input")){
NormTest nn= new NormTest(f, in, od);
//nn.toRatio();
//nn.subtract();
//nn.subtract2();
nn.removeShort();
}
}
}
}