/*
* To change this template, choose Tools | Templates
* and open the template in the editor.
*/
package solodel;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Iterator;
import java.util.List;
import java.util.Vector;
import net.sf.picard.util.Interval;
import net.sf.picard.util.IntervalList;
import net.sf.picard.util.SamLocusIterator;
import net.sf.samtools.SAMFileReader;
import org.apache.commons.math3.distribution.BinomialDistribution;
import org.apache.commons.math3.distribution.NormalDistribution;
/**
*
* @author kimjh
*/
public class bak {
public void depth_test() throws FileNotFoundException, IOException{
FileReader fr = new FileReader("/data/kimjh/WGS/simulation_test_chr1/seperate_10p/gatk/depth.txt");
BufferedReader br = new BufferedReader(fr);
String temp = br.readLine();
String[] t;
String bam_file = "/data/kimjh/WGS/simulation_test_chr1/seperate_10p/chr1_sim_70x_sorted_rmdup.bam";
String bam_index_file = "/data/kimjh/WGS/simulation_test_chr1/seperate_10p/chr1_sim_70x_sorted_rmdup.bam.bai";
File bam = new File(bam_file);
File bam_index = new File(bam_index_file);
SAMFileReader samReader = new SAMFileReader(bam, bam_index);
String chr_id = "chr1";
int start = 1;
int end = 20;
// int end = 247199724;
Interval interval = new Interval(chr_id, start, end);
IntervalList il = new IntervalList(samReader.getFileHeader());
il.add(interval);
SamLocusIterator sli = new SamLocusIterator(samReader, il, true);
FileWriter fw = new FileWriter("/data/kimjh/WGS/simulation_test_chr1/seperate_10p/gatk/depth_diff.txt");
BufferedWriter bw = new BufferedWriter(fw);
for (Iterator<SamLocusIterator.LocusInfo> iter = sli.iterator(); iter.hasNext();) {
SamLocusIterator.LocusInfo locusInfo = iter.next();
List recordList = locusInfo.getRecordAndPositions();
Iterator it = recordList.iterator();
String readBase = "";
String baseQuality = "";
while(it.hasNext()){
SamLocusIterator.RecordAndOffset record = (SamLocusIterator.RecordAndOffset) it.next();
if(baseQuality.equals("")){
// readBase += Byte.toString(record.getReadBase());
// baseQuality += record.getBaseQuality();
readBase += (char)(record.getReadBase()&0xff);
baseQuality += (int)((record.getBaseQuality()));
}else{
readBase += ","+(char)(record.getReadBase()&0xff);
baseQuality += ","+(int)((record.getBaseQuality()));
}
}
System.out.println("chr1\t"+locusInfo.getPosition()+"\t"+locusInfo.getRecordAndPositions().size()+"\t"+readBase+"\t"+baseQuality+"\n");
// bw.write("chr1\t"+locusInfo.getPosition()+"\t"+locusInfo.getRecordAndPositions().size()+"\n");
int pos = locusInfo.getPosition();
int size = locusInfo.getRecordAndPositions().size();
if (temp != null) {
t = temp.split("\t");
int sam_pos = Integer.parseInt(t[1]);
int sam_size = Integer.parseInt(t[2]);
while (sam_pos <= pos && temp != null) {
t = temp.split("\t");
sam_pos = Integer.parseInt(t[1]);
sam_size = Integer.parseInt(t[2]);
if (sam_pos == pos) {
if (sam_size != size) {
if(pos%1000000==0){
System.out.println(pos);
}
bw.write(pos + "\t" + size + "\t" + sam_size + "\n");
}
temp = br.readLine();
} else if(sam_pos>pos){
}else{
temp = br.readLine();
}
}
}
}
br.close();
fr.close();
bw.flush();
bw.close();
fw.close();
sli.close();
samReader.close();
}
public void EMtest(double thres) throws FileNotFoundException, IOException {
String input = "/data/kimjh/WGS/breakdancer/S49/Cellularity_del_list_S49_raw_BD.txt";
// String input = "/data/kimjh/WGS/sdd/simulation_2p/breakdancer/Cellularity_del_list_2p_raw_BD.txt";
FileReader fr = new FileReader(input);
BufferedReader br = new BufferedReader(fr);
String temp = br.readLine(); // header
temp = br.readLine();
String[] s;
ArrayList celList = new ArrayList();
while (temp != null) {
s = temp.split("\t");
celList.add(s[9]);
temp = br.readLine();
}
double[] cellularity = new double[celList.size()];
for (int i = 0; i < cellularity.length; i++) {
cellularity[i] = Double.parseDouble((String) celList.get(i));
}
Arrays.sort(cellularity);
double[] cellularityOverZero = getSubArray(cellularity, 0, true);
double[] cellularityOverZeroBelowThres = getSubArray(cellularityOverZero, thres, false);
// double [][] data = new double[cellularityOverZeroBelowThres.length][2];
// for(int i=0;i<cellularityOverZeroBelowThres.length;i++){
// data[i][0] = cellularityOverZeroBelowThres[i];
// data[i][1] = Math.random();
// }
// double start = 0;
// double end = thres;
// double binSize = 0.05;
// double [][] data = getHistogram(cellularityOverZeroBelowThres, start, end, binSize);
PVector[] points = new PVector[cellularityOverZeroBelowThres.length];
for (int i = 0; i < points.length; i++) {
points[i] = new PVector(1);
points[i].array[0] = cellularityOverZeroBelowThres[i];
}
int numComponents = 2;
MixtureModel mmc;
int iter = 0;
do {
iter++;
Vector<PVector>[] clusters = KMeans.run(points, numComponents);
mmc = ExpectationMaximization.initialize(clusters);
mmc = ExpectationMaximization.run(points, mmc);
} while (mmc.getMeanDiff() < 0.05 && iter < 10); // consider meanDiff with MaxIter=10
for (int i = 0; i < mmc.size; i++) {
System.out.println("\n" + mmc.getWeight(i));
System.out.println(mmc.getMean(i));
System.out.println(mmc.getStd(i) + "\n");
}
MixtureModel g = new MixtureModel(1);
ExponentialFamily ef = new UnivariateGaussian();
g.setEF(ef);
double[] weight = new double[1];
weight[0] = 1;
g.setWeight(weight);
PVector gParam = new PVector(2);
gParam.array[0] = 0.5;
gParam.array[1] = 0.5;
Parameter[] p = new Parameter[1];
p[0] = gParam;
g.setParam(p);
g = ExpectationMaximization.runWithFixedMean(points, g, 0.5);
for (int i = 0; i < g.size; i++) {
System.out.println("\n" + g.getWeight(i));
System.out.println(g.getMean(i));
System.out.println(g.getStd(i));
}
}
public double[] getSubArray(double[] array, double thres, boolean direction) { // false : <= thres, true : > thres
int idx = 0;
double[] subArray;
for (int i = 0; i < array.length; i++) {
if (array[i] > thres) {
break;
}
idx++;
}
if (direction == false) {
subArray = Arrays.copyOfRange(array, 0, idx);
} else {
subArray = Arrays.copyOfRange(array, idx, array.length);
}
return subArray;
}
public double getGermlineProbability(DeletionInfo di) {
double pval = 0;
double N = di.getAvgDepthWindow();
double p = 0.5;
if (N > 30) {
NormalDistribution n = new NormalDistribution(N * p, Math.sqrt(N * p * (1 - p)));
} else {
BinomialDistribution bn = new BinomialDistribution((int) N, p);
double observed = di.getDeletionDepth();
}
return pval;
}
public void importDeletionInfo(String breakdancerResult, String inputBam, String inputBamIndex, int insertSize) throws FileNotFoundException, IOException {
ArrayList deletionList = new ArrayList();
FileReader fr = new FileReader(breakdancerResult);
// FileReader fr = new FileReader(InputParameter.getBreakdancerOutput());
BufferedReader br = new BufferedReader(fr);
String temp = br.readLine();
if (temp.indexOf("#") != -1) { // header
temp = br.readLine();
}
String[] s;
File bamFile = new File(inputBam);
// File bamFile = new File(InputParameter.getBamFile());
File bamIndexFile = new File(inputBamIndex);
// File bamIndexFile = new File(InputParameter.getBamIndexFile());
SAMFileReader samReader = new SAMFileReader(bamFile, bamIndexFile);
samReader.setValidationStringency(SAMFileReader.ValidationStringency.LENIENT);
int i = 0;
while (temp != null) {
if (!temp.equals("")) {
if (temp.contains("DEL")) {
DeletionInfo di = new DeletionInfo(temp);
if (di.getSize() > insertSize) {
// di.setDeletionInfo(samReader);
deletionList.add(di);
}
}
}
temp = br.readLine();
i++;
if (i % 100 == 0) {
System.out.println(i + " th deletion is imported ...");
}
}
samReader.close();
br.close();
fr.close();
}
public void homologyFilter(String id, ArrayList deletionList) throws IOException, InterruptedException {
String queryChr;
String queryLociStart;
String queryLociEnd;
String query;
String upstreamSeq;
String downstreamSeq;
String temp;
Runtime operator;
Process process;
InputStream is;
InputStreamReader isr;
BufferedReader br;
FileReader fr;
int homologyCnt = 0;
// String fastaDir = System.getProperty("user.dir") + "/fasta/";
// String pslDir = System.getProperty("user.dir") + "/psl/";
String idDir = System.getProperty("user.dir") + "/" + id;
String fastaDir = idDir + "/fasta/";
String pslDir = idDir + "/psl/";
query = "mkdir " + idDir;
operator = Runtime.getRuntime();
process = operator.exec(query);
try {
process.waitFor();
} finally {
process.destroy();
}
query = "mkdir " + fastaDir;
operator = Runtime.getRuntime();
process = operator.exec(query);
try {
process.waitFor();
} finally {
process.destroy();
}
query = "mkdir " + pslDir;
operator = Runtime.getRuntime();
process = operator.exec(query);
try {
process.waitFor();
} finally {
process.destroy();
}
int chrNo = deletionList.size();
for (int i = 1; i < chrNo; i++) {
ArrayList chrDel = (ArrayList) deletionList.get(i);
int chrDelSize = chrDel.size();
for (int j = 0; j < chrDelSize; j++) {
DeletionInfo di = (DeletionInfo) chrDel.get(j);
makeFastaForBlat(di, fastaDir);
boolean isHomology = isHomology(di, fastaDir, pslDir);
di.setHomology(isHomology);
if (isHomology) {
homologyCnt++;
}
if ((j + 1) % 100 == 0) {
System.out.println((i + 1) + " th deletion homology is searched ...");
}
}
}
// System.out.println("Num of passed deletions : " + (deletionList.size() - homologyCnt));
System.out.println("Num of filtered deletions by homology search : " + homologyCnt);
}
public void makeFastaForBlat(DeletionInfo di, String fastaDir) throws IOException, InterruptedException {
String query;
Runtime operator;
Process process;
InputStream is;
InputStreamReader isr;
BufferedReader br;
FileWriter fw;
BufferedWriter bw;
String upstreamSeq = "";
String downstreamSeq = "";
query = InputParameter.getFastahack() + " -r " + di.getChr() + ":" + (di.getPos1() - InputParameter.getInsertSize()) + ".." + (di.getPos1() - 1) + " " + InputParameter.getHumanRefFasta();
operator = Runtime.getRuntime();
process = operator.exec(query);
try {
process.waitFor();
is = process.getInputStream();
isr = new InputStreamReader(is);
br = new BufferedReader(isr);
String temp = br.readLine();
while (temp != null) {
upstreamSeq += temp;
temp = br.readLine();
}
br.close();
isr.close();
is.close();
} finally {
process.destroy();
}
query = InputParameter.getFastahack() + " -r " + di.getChr() + ":" + (di.getPos2() + 1) + ".." + (di.getPos2() + InputParameter.getInsertSize()) + " " + InputParameter.getHumanRefFasta();
operator = Runtime.getRuntime();
process = operator.exec(query);
try {
process.waitFor();
is = process.getInputStream();
isr = new InputStreamReader(is);
br = new BufferedReader(isr);
String temp = br.readLine();
while (temp != null) {
downstreamSeq += temp;
temp = br.readLine();
}
br.close();
isr.close();
is.close();
} finally {
process.destroy();
}
String delInfo = di.getChr() + "_" + di.getPos1() + "_" + di.getPos2();
String outputUpstream = delInfo + "_up.fa";
String outputDownstream = delInfo + "_down.fa";
fw = new FileWriter(fastaDir + outputUpstream);
bw = new BufferedWriter(fw);
bw.write(">" + delInfo + "\n");
bw.write(upstreamSeq + "\n");
bw.flush();
bw.close();
fw.close();
fw = new FileWriter(fastaDir + outputDownstream);
bw = new BufferedWriter(fw);
bw.write(">" + delInfo + "\n");
bw.write(downstreamSeq + "\n");
bw.flush();
bw.close();
fw.close();
}
public boolean isHomology(DeletionInfo di, String fastaDir, String pslDir) throws IOException, InterruptedException {
boolean isRepeat = false;
String query;
Runtime operator;
Process process;
InputStream is;
InputStreamReader isr;
FileReader fr;
BufferedReader br;
FileWriter fw;
BufferedWriter bw;
String[] s;
int searchSpace = 2 * InputParameter.getInsertSize();
String delInfo = di.getChr() + "_" + di.getPos1() + "_" + di.getPos2();
try {
fr = new FileReader(pslDir + delInfo + "_up.psl");
} catch (FileNotFoundException ex) {
query = InputParameter.getBlat() + " -t=dna -q=dna " + InputParameter.getHumanRef2bit() + " " + fastaDir + delInfo + "_up.fa " + pslDir + delInfo + "_up.psl";
operator = Runtime.getRuntime();
process = operator.exec(query);
try {
process.waitFor();
} finally {
process.destroy();
}
fr = new FileReader(pslDir + delInfo + "_up.psl");
}
br = new BufferedReader(fr);
boolean isHeader = true;
String temp = br.readLine();
while (temp != null && !isRepeat) {
if (temp.indexOf("---------------") != -1) {
isHeader = false;
temp = br.readLine(); // candidates start
}
if (!isHeader) {
if (temp != null) {
s = temp.split("\t");
String candidate_chr = s[13];
if (candidate_chr.equals(di.getChr())) {
int matchLen = Integer.parseInt(s[0]);
String strand = s[8];
if ((matchLen != InputParameter.getInsertSize())) {
if ((matchLen >= InputParameter.getReadLength() * 0.9) && strand.contains("+")) { // matched read > readLength*0.9 bp, + strand
int estimatedHomologyPos = di.getPos2() - (InputParameter.getInsertSize() - InputParameter.getReadLength());
int searchSpaceUp = estimatedHomologyPos - searchSpace;
int searchSpaceDown = estimatedHomologyPos + searchSpace;
// int searchSpaceDown = di.getPos2();
String[] block_size = s[18].split(",");
String[] block_start = s[20].split(",");
for (int i = 0; i < block_size.length; i++) {
if (Integer.parseInt(block_size[i]) >= InputParameter.getReadLength() * 0.9) {
int candidate = Integer.parseInt(block_start[i]);
if (((searchSpaceUp <= candidate) && (searchSpaceDown >= candidate))) {
isRepeat = true;
break;
}
}
}
}
}
}
}
}
temp = br.readLine();
}
br.close();
fr.close();
if (!isRepeat) {
try {
fr = new FileReader(pslDir + delInfo + "_down.psl");
} catch (FileNotFoundException ex) {
query = InputParameter.getBlat() + " -t=dna -q=dna " + InputParameter.getHumanRef2bit() + " " + fastaDir + delInfo + "_down.fa " + pslDir + delInfo + "_down.psl";
operator = Runtime.getRuntime();
process = operator.exec(query);
try {
process.waitFor();
} finally {
process.destroy();
}
fr = new FileReader(pslDir + delInfo + "_down.psl");
}
br = new BufferedReader(fr);
isHeader = true;
temp = br.readLine();
while (temp != null && !isRepeat) {
if (temp.indexOf("---------------") != -1) {
isHeader = false;
temp = br.readLine(); // candidates start
}
if (!isHeader) {
if (temp != null) {
s = temp.split("\t");
String candidate_chr = s[13];
if (candidate_chr.equals(di.getChr())) {
int matchLen = Integer.parseInt(s[0]);
String strand = s[8];
if ((matchLen != InputParameter.getInsertSize())) {
if ((matchLen >= InputParameter.getReadLength() * 0.9) && strand.contains("+")) { // matched read > readLength*0.9 bp, + strand
int estimatedHomologyPos = di.getPos1() + (InputParameter.getInsertSize() - InputParameter.getReadLength());
int searchSpaceUp = estimatedHomologyPos - searchSpace;
// int searchSpaceUp = di.getPos1();
int searchSpaceDown = estimatedHomologyPos + searchSpace;
String[] block_size = s[18].split(",");
String[] block_start = s[20].split(",");
for (int i = 0; i < block_size.length; i++) {
if (Integer.parseInt(block_size[i]) >= InputParameter.getReadLength() * 0.9) {
int candidate = Integer.parseInt(block_start[i]);
if (((searchSpaceUp <= candidate) && (searchSpaceDown >= candidate))) {
isRepeat = true;
break;
}
}
}
}
}
}
}
}
temp = br.readLine();
}
br.close();
fr.close();
}
return isRepeat;
}
// Estimate the parameters of mixture model (k=2)
public void estimateMixtureModel(ArrayList deletionList, MixtureModel mm) {
ArrayList sampleList = new ArrayList();
int chrNo = deletionList.size();
for (int i = 1; i < chrNo; i++) {
ArrayList chrDel = (ArrayList) deletionList.get(i);
int chrDelSize = chrDel.size();
for (int j = 0; j < chrDelSize; j++) {
DeletionInfo di = (DeletionInfo) chrDel.get(j);
if (!di.isHomology() && !di.isGap()) {
if (di.getCellularity() < InputParameter.getCellularityThrd() && di.getCellularity() > 0) {
sampleList.add(di);
}
}
}
}
// double[] coefficientOfVariation = new double[sampleList.size()];
// for (int i = 0; i < coefficientOfVariation.length; i++) {
// coefficientOfVariation[i] = ((DeletionInfo) sampleList.get(i)).getCoefficientOfvariation();
// }
//
// Arrays.sort(coefficientOfVariation);
// int cutoffIdx = (int) (coefficientOfVariation.length * (1 - InputParameter.getCoefficientOfvariationCutoff())) - 1;
// double cutoff = coefficientOfVariation[cutoffIdx];
//
// ArrayList filteredSampleList = new ArrayList();
// for (int i = 0; i < sampleList.size(); i++) {
// DeletionInfo di = (DeletionInfo) sampleList.get(i);
// if (di.getCoefficientOfvariation() <= cutoff) {
// filteredSampleList.add(di);
// }
// }
double[] quartileArray = new double[sampleList.size()];
for (int i = 0; i < quartileArray.length; i++) {
quartileArray[i] = ((DeletionInfo) sampleList.get(i)).getCellularity();
}
Arrays.sort(quartileArray);
int upperQuartileIdx = (int) (quartileArray.length * 0.75) - 1;
int lowerQuartileIdx = (int) (quartileArray.length * 0.25) - 1;
double upperQuartile = quartileArray[upperQuartileIdx];
double lowerQuartile = quartileArray[lowerQuartileIdx];
double interQuartileRange = upperQuartile - lowerQuartile;
double upperCutoff = upperQuartile + interQuartileRange;
// double lowerCutoff = lowerQuartile - interQuartileRange;
System.out.println("UpperCutoff = "+upperCutoff);
// System.out.println("LowerCutoff = "+lowerCutoff);
int upperFilteredCnt = 0;
// int lowerFilteredCnt = 0;
ArrayList filteredSampleList = new ArrayList();
for (int i = 0; i < sampleList.size(); i++) {
DeletionInfo di = (DeletionInfo) sampleList.get(i);
// if (di.getCellularity() > lowerCutoff && di.getCellularity() < upperCutoff) {
if (di.getCellularity() < upperCutoff) {
filteredSampleList.add(di);
}else{
// if(di.getCellularity()<=lowerCutoff){
// lowerFilteredCnt++;
// }else{
upperFilteredCnt++;
// }
}
}
// System.out.println("\nTotal lower filtered count : "+lowerFilteredCnt+"\n");
System.out.println("\nTotal upper filtered count : "+upperFilteredCnt+"\n");
// double minCellularity = 0.5;
// double maxCellularity = 0;
double[] cellularity = new double[filteredSampleList.size()];
for (int i = 0; i < cellularity.length; i++) {
cellularity[i] = ((DeletionInfo) filteredSampleList.get(i)).getCellularity();
// if(cellularity[i]<minCellularity) minCellularity = cellularity[i];
// if(cellularity[i]>maxCellularity) maxCellularity = cellularity[i];
}
PVector[] points = new PVector[cellularity.length];
for (int i = 0; i < points.length; i++) {
points[i] = new PVector(1);
points[i].array[0] = cellularity[i];
}
int numComponents = 2;
int iter = 0;
do {
iter++;
System.out.println(iter);
Vector<PVector>[] clusters = KMeans.run(points, numComponents);
mm = ExpectationMaximization.initialize(clusters);
mm = ExpectationMaximization.run(points, mm);
} while (mm.getMeanDiff() < InputParameter.getMixedMeanDiffThrd() && iter < 10);
for (int i = 0; i < numComponents; i++) {
System.out.println(mm.getWeight(i));
System.out.println(mm.getMean(i));
System.out.println(mm.getStd(i) + "\n");
}
}
}