/*
* To change this template, choose Tools | Templates
* and open the template in the editor.
*/
package solodel;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.text.DecimalFormat;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Vector;
import net.sf.samtools.SAMFileReader;
import org.apache.commons.math3.distribution.BinomialDistribution;
import org.apache.commons.math3.distribution.NormalDistribution;
/**
*
* @author kimjh
*/
public class SoloDelCaller {
ArrayList deletionList;
MixtureModel mm;
int totalDelCnt;
double germLogLikelihood;
double mixedLogLikelihood;
public SoloDelCaller() {
deletionList = null;
mm = null;
totalDelCnt = 0;
germLogLikelihood = 0;
mixedLogLikelihood = 0;
String jarPath = getClass().getProtectionDomain().getCodeSource().getLocation().getPath();
String jarDir = jarPath.substring(0, jarPath.lastIndexOf("/") + 1);
InputParameter.setJarDir(jarDir);
}
// Import deletion information form Breakdancer result file
public void importDeletionInfo(String breakdancerResult) throws FileNotFoundException, IOException {
deletionList = new ArrayList();
for (int i = 0; i <= 25; i++) {
ArrayList chrDel = new ArrayList();
deletionList.add(chrDel);
}
FileReader fr = new FileReader(breakdancerResult);
// FileReader fr = new FileReader(InputParameter.getBreakdancerOutput());
BufferedReader br = new BufferedReader(fr);
String temp = br.readLine();
if (temp.indexOf("#") != -1) { // header
temp = br.readLine();
}
String[] s;
int i = 0;
while (temp != null) {
if (!temp.equals("")) {
if (temp.contains("DEL")) {
DeletionInfo di = new DeletionInfo(temp);
if (di.getSize() > InputParameter.getInsertSize()) {
// di.setDeletionInfo(samReader);
String chr = di.getChr();
chr = chr.replaceAll("chr", "");
int chrom;
if (chr.equals("X")) {
chrom = 23;
} else if (chr.equals("Y")) {
chrom = 24;
} else if (chr.equals("M")) {
chrom = 25;
} else {
chrom = Integer.parseInt(chr);
}
ArrayList chrDel = (ArrayList) deletionList.get(chrom);
chrDel.add(di);
totalDelCnt++;
}
}
}
temp = br.readLine();
i++;
if (i % 100 == 0) {
System.out.println(i + " th deletion is imported ...");
}
}
br.close();
fr.close();
System.out.println("\nTotal number of deletion : " + totalDelCnt + "\n");
}
public void calculateDeletionDepth(String inputBam, String inputBamIndex) throws FileNotFoundException, IOException {
String gaps = InputParameter.getJarDir() + InputParameter.getHumanGenomeBuild() + "_gaps.txt";
FileReader fr = new FileReader(gaps);
BufferedReader br = new BufferedReader(fr);
String temp = br.readLine(); // header
temp = br.readLine();
String[] s;
while (temp != null) {
if (!temp.equals("")) {
s = temp.split("\t");
String chr = s[0].replaceAll("chr", "");
int start = Integer.parseInt(s[1]);
int stop = Integer.parseInt(s[2]);
int size = Integer.parseInt(s[3]);
DeletionInfo di = new DeletionInfo();
di.setChr(s[0]);
di.setPos1(start);
di.setPos2(stop);
di.setSize(size);
di.setGap(true);
int chrom;
if (chr.equals("X")) {
chrom = 23;
} else if (chr.equals("Y")) {
chrom = 24;
} else if (chr.equals("M")) {
chrom = 25;
} else {
chrom = Integer.parseInt(chr);
}
ArrayList chrDel = (ArrayList) deletionList.get(chrom);
chrDel.add(di);
}
temp = br.readLine();
}
br.close();
fr.close();
File bamFile = new File(inputBam);
// File bamFile = new File(InputParameter.getBamFile());
File bamIndexFile = new File(inputBamIndex);
// File bamIndexFile = new File(InputParameter.getBamIndexFile());
SAMFileReader samReader = new SAMFileReader(bamFile, bamIndexFile);
samReader.setValidationStringency(SAMFileReader.ValidationStringency.LENIENT);
int chrNo = deletionList.size();
for (int i = 1; i < chrNo; i++) {
ArrayList<DeletionInfo> chrDel = (ArrayList) deletionList.get(i);
Collections.sort(chrDel, new DeletionComparator());
}
int delNo = 0;
for (int i = 1; i < chrNo; i++) {
ArrayList<DeletionInfo> chrDel = (ArrayList) deletionList.get(i);
int chrDelSize = chrDel.size();
for (int j = 0; j < chrDelSize; j++) {
DeletionInfo sdi = (DeletionInfo) chrDel.get(j);
if (!sdi.isGap()) {
int window1 = sdi.getWindowStart();
int window2 = sdi.getWindowEnd();
ArrayList overlappedAreaList = new ArrayList();
for (int k = 0; k < chrDelSize; k++) {
if (j != k) {
DeletionInfo tdi = (DeletionInfo) chrDel.get(k);
int tgt1 = tdi.getPos1();
int tgt2 = tdi.getPos2();
if (tgt1 > window2) {
break;
}
if ((window1 <= tgt1 && window2 >= tgt1) || (window1 <= tgt2 && window2 >= tgt2)) {
Integer[] overlappedArea = getOverlappedArea(window1, window2, tgt1, tgt2);
overlappedAreaList.add(overlappedArea);
}
}
}
sdi.setDeletionInfo(samReader, overlappedAreaList);
delNo++;
if (delNo % 100 == 0) {
System.out.println(delNo + " th deletion's information is analyzed ...");
}
}
}
}
samReader.close();
}
public void gapFilter() throws FileNotFoundException, IOException {
String gaps = InputParameter.getJarDir() + InputParameter.getHumanGenomeBuild() + "_gaps.txt";
FileReader fr = new FileReader(gaps);
BufferedReader br = new BufferedReader(fr);
String temp = br.readLine(); // header
temp = br.readLine();
String[] s;
ArrayList gapList = new ArrayList();
for (int i = 0; i <= 25; i++) {
ArrayList chrGap = new ArrayList();
gapList.add(chrGap);
}
while (temp != null) {
if (!temp.equals("")) {
s = temp.split("\t");
String chr = s[0].replaceAll("chr", "");
int start = Integer.parseInt(s[1]);
int stop = Integer.parseInt(s[2]);
int size = Integer.parseInt(s[3]);
DeletionInfo di = new DeletionInfo();
di.setChr(s[0]);
di.setPos1(start);
di.setPos2(stop);
di.setSize(size);
int chrom;
if (chr.equals("X")) {
chrom = 23;
} else if (chr.equals("Y")) {
chrom = 24;
} else if (chr.equals("M")) {
chrom = 25;
} else {
chrom = Integer.parseInt(chr);
}
ArrayList chrGap = (ArrayList) gapList.get(chrom);
chrGap.add(di);
}
temp = br.readLine();
}
br.close();
fr.close();
int gapCnt = 0;
int chrNo = deletionList.size();
for (int i = 1; i < chrNo; i++) {
ArrayList chrDel = (ArrayList) deletionList.get(i);
int chrDelSize = chrDel.size();
for (int j = 0; j < chrDelSize; j++) {
DeletionInfo di = (DeletionInfo) chrDel.get(j);
double delSize = di.getSize();
double delStart = di.getPos1();
double delStop = di.getPos2();
boolean b = true;
ArrayList chrGap = (ArrayList) gapList.get(i);
int chrGapSize = chrGap.size();
for (int k = 0; k < chrGapSize; k++) {
if (b) {
DeletionInfo gi = (DeletionInfo) chrGap.get(k);
double gapSize = gi.getSize();
double gapStart = gi.getPos1();
double gapStop = gi.getPos2();
double thres = 0.5;
double upperBound = gapStart + gapSize * thres;
double lowerBound = gapStop - gapSize * thres;
if (delSize >= (gapSize * thres)) {
if (((delStart <= gapStart) && (delStop >= upperBound)) || ((delStop >= gapStop) && (delStart <= lowerBound)) || ((delStart <= gapStart) && (delStop >= gapStop))) {
di.setGap(true);
gapCnt++;
b = false;
}
}
} else {
break;
}
}
}
}
System.out.println("Num of filtered deletions by gap : " + gapCnt);
}
// Estimate the parameters of mixture model (k=2)
public void estimateMixtureModel() {
ArrayList sampleList = new ArrayList();
int chrNo = deletionList.size();
if(chrNo > 22){
chrNo = 22;
}
for (int i = 1; i < chrNo; i++) {
ArrayList chrDel = (ArrayList) deletionList.get(i);
int chrDelSize = chrDel.size();
for (int j = 0; j < chrDelSize; j++) {
DeletionInfo di = (DeletionInfo) chrDel.get(j);
if (!di.isHomology() && !di.isGap()) {
if (di.getCellularity() < InputParameter.getCellularityThrd() && di.getCellularity() > 0 && di.getCellularity() > (1.0/di.getAvgDepthWindow())) {
sampleList.add(di);
}
}
}
}
double[] cellularity = new double[sampleList.size()];
for (int i = 0; i < cellularity.length; i++) {
cellularity[i] = ((DeletionInfo) sampleList.get(i)).getCellularity();
}
PVector[] points = new PVector[cellularity.length];
for (int i = 0; i < points.length; i++) {
points[i] = new PVector(1);
points[i].array[0] = cellularity[i];
}
int numComponents = 2;
int iter = 0;
boolean estimated = false;
do {
iter++;
System.out.println(iter);
Vector<PVector>[] clusters = KMeans.run(points, numComponents);
mm = ExpectationMaximization.initialize(clusters);
mm = ExpectationMaximization.run(points, mm);
if (mm.getMeanDiff() > InputParameter.getMixedMeanDiffThrd()) {
estimated = true;
}
} while (!estimated && iter < 10);
if (!estimated) {
double[] quartileArray = new double[sampleList.size()];
for (int i = 0; i < quartileArray.length; i++) {
quartileArray[i] = ((DeletionInfo) sampleList.get(i)).getCellularity();
}
Arrays.sort(quartileArray);
int upperQuartileIdx = (int) (quartileArray.length * 0.75) - 1;
int lowerQuartileIdx = (int) (quartileArray.length * 0.25) - 1;
double upperQuartile = quartileArray[upperQuartileIdx];
double lowerQuartile = quartileArray[lowerQuartileIdx];
double interQuartileRange = upperQuartile - lowerQuartile;
double upperCutoff = upperQuartile + interQuartileRange;
double lowerCutoff = lowerQuartile - interQuartileRange;
System.out.println("UpperCutoff = " + upperCutoff);
System.out.println("LowerCutoff = " + lowerCutoff);
int upperFilteredCnt = 0;
int lowerFilteredCnt = 0;
ArrayList filteredSampleList = new ArrayList();
for (int i = 0; i < sampleList.size(); i++) {
DeletionInfo di = (DeletionInfo) sampleList.get(i);
if (di.getCellularity() > lowerCutoff && di.getCellularity() < upperCutoff) {
filteredSampleList.add(di);
} else {
if (di.getCellularity() <= lowerCutoff) {
lowerFilteredCnt++;
} else {
upperFilteredCnt++;
}
}
}
System.out.println("\nTotal lower filtered count : " + lowerFilteredCnt + "\n");
System.out.println("\nTotal upper filtered count : " + upperFilteredCnt + "\n");
double[] filteredCellularity = new double[filteredSampleList.size()];
for (int i = 0; i < filteredCellularity.length; i++) {
filteredCellularity[i] = ((DeletionInfo) filteredSampleList.get(i)).getCellularity();
}
PVector[] filteredPoints = new PVector[filteredCellularity.length];
for (int i = 0; i < filteredPoints.length; i++) {
filteredPoints[i] = new PVector(1);
filteredPoints[i].array[0] = filteredCellularity[i];
}
iter = 0;
do {
iter++;
System.out.println(iter);
Vector<PVector>[] clusters = KMeans.run(filteredPoints, numComponents);
mm = ExpectationMaximization.initialize(clusters);
mm = ExpectationMaximization.run(filteredPoints, mm);
} while (mm.getMeanDiff() < InputParameter.getMixedMeanDiffThrd() && iter < 10);
}
for (int i = 0; i < numComponents; i++) {
System.out.println(mm.getWeight(i));
System.out.println(mm.getMean(i));
System.out.println(mm.getStd(i) + "\n");
}
}
// Calculate deletion probabilities based on the germline model and the mixture model.
public void calculateDeletionProbabilities(BufferedWriter germLL, BufferedWriter mixedLL) throws IOException {
int chrNo = deletionList.size();
for (int i = 1; i < chrNo; i++) {
ArrayList chrDel = (ArrayList) deletionList.get(i);
int chrDelSize = chrDel.size();
for (int j = 0; j < chrDelSize; j++) {
DeletionInfo di = (DeletionInfo) chrDel.get(j);
if (!di.isHomology() && !di.isGap()) {
if (di.getCellularity() > InputParameter.getCellularityThrd()) {
di.setHomozygousDel(true);
} else if (di.getAvgDepthDeletion() < InputParameter.getDuplicateThrd() && di.getAvgDepthWindow() < InputParameter.getDuplicateThrd()) {
double N = di.getAvgDepthWindow(); // mean
double X = N - di.getAvgDepthDeletion(); // mean
// double N = di.getMedianDepthWindow(); // median
// double X = N - di.getMedianDepthDeletion(); // median
if (X < 0) {
// System.out.println(di.print());
X = 0;
}
double germP = 0.5;
ExponentialFamily germDist;
if (N > 30) {
germDist = new UnivariateGaussian();
PVector gParam = new PVector(2);
gParam.array[0] = N * germP; // mean
gParam.array[1] = N * germP * (1 - germP); // var
PVector observed = new PVector(1);
observed.array[0] = X;
double germDelProbability = germDist.density(observed, gParam);
di.setGermlineProbability(germDelProbability);
germLogLikelihood += Math.log(germDelProbability);
germLL.write(di.print() + "\t" + Math.log(germDelProbability) + "\n");
MixtureModel mixedDist = new MixtureModel(2);
ExponentialFamily ef = new UnivariateGaussian();
mixedDist.setEF(ef);
mixedDist.setWeight(mm.getWeight());
PVector[] mParam = new PVector[2];
for (int k = 0; k < 2; k++) {
mParam[k] = new PVector(2);
mParam[k].array[0] = N * mm.getMean(k);
mParam[k].array[1] = N * mm.getMean(k) * (1 - mm.getMean(k));
}
mixedDist.setParam(mParam);
double mixedDelProbability = mixedDist.density(observed);
di.setMixedProbability(mixedDelProbability);
mixedLogLikelihood += Math.log(mixedDelProbability);
mixedLL.write(di.print() + "\t" + Math.log(mixedDelProbability) + "\n");
double[] mixedDensity = mixedDist.getDensities(observed); // order : som, germ
di.setMixedSomaticProbability(mixedDensity[0] / (mixedDensity[1] * di.getCoefficientOfvariation())); // ratio_over_CV_B, 140401
if (mixedDensity[0] > mixedDensity[1]) {
di.setSomatic(true);
// di.setMixedSomaticProbability(mixedDensity[0]);
// di.setMixedSomaticProbability(mixedDensity[0]-mixedDensity[1]); // diff, 140326
// di.setMixedSomaticProbability(Math.log(mixedDensity[0] / mixedDensity[1])); // ratio, 140327
// di.setMixedSomaticProbability(Math.log(mixedDensity[0]/mixedDensity[1])/di.getCoefficientOfvariation()); // ratio_over_CV_A, 140327
// di.setMixedSomaticProbability(Math.log(mixedDensity[0] / (mixedDensity[1] * di.getCoefficientOfvariation()))); // ratio_over_CV_B, 140327
// di.setMixedSomaticProbability(Math.log(mixedDensity[0]/(mixedDensity[1]*di.getCoefficientOfvariation()*(di.getStdWindow()/di.getAvgDepthWindow())))); // ratio_over_CV_B_with_window, 140327
// di.setMixedSomaticProbability(Math.log((mixedDensity[0]/(1-mixedDensity[0]))/((mixedDensity[1]/(1-mixedDensity[1]))*di.getCoefficientOfvariation()))); // odds_ratio_over_CV_B, 140331
}
// System.out.print(di.getCellularity() + "\t" + Math.log(germDelProbability) + "\t");
// System.out.println(Math.log(mixedDelProbability) + "\t" + di.getAvgDepthDeletion() + "\t" + di.getAvgDepthWindow() + "\t" + di.getPos1() + "\t" + di.getPos2());
} else {
germDist = new BinomialFixedN((int) N);
PVector gParam = new PVector(1);
gParam.array[0] = germP; // p
PVector observed = new PVector(1);
observed.array[0] = (int) X;
double germDelProbability = germDist.density(observed, gParam);
di.setGermlineProbability(germDelProbability);
germLogLikelihood += Math.log(germDelProbability);
germLL.write(di.print() + "\t" + Math.log(germDelProbability) + "\n");
ExponentialFamily mixedDist = new BinomialFixedN((int) N);
PVector mixedGermParam = new PVector(1);
PVector mixedSomParam = new PVector(1);
double mixedGermDelProbability;
double mixedSomDelProbability;
if (mm.getMean(0) > mm.getMean(1)) { // mm[0] = p_g, mm[1] = p_s
mixedGermParam.array[0] = mm.getMean(0);
mixedSomParam.array[0] = mm.getMean(1);
mixedGermDelProbability = mm.getWeight(0) * mixedDist.density(observed, mixedGermParam);
mixedSomDelProbability = mm.getWeight(1) * mixedDist.density(observed, mixedSomParam);
} else {// mm[0] = p_s, mm[1] = p_g
mixedGermParam.array[0] = mm.getMean(1);
mixedSomParam.array[0] = mm.getMean(0);
mixedGermDelProbability = mm.getWeight(1) * mixedDist.density(observed, mixedGermParam);
mixedSomDelProbability = mm.getWeight(0) * mixedDist.density(observed, mixedSomParam);
}
double mixedDelProbability = mixedGermDelProbability + mixedSomDelProbability;
di.setMixedProbability(mixedDelProbability);
mixedLogLikelihood += Math.log(mixedDelProbability);
mixedLL.write(di.print() + "\t" + Math.log(mixedDelProbability) + "\n");
di.setMixedSomaticProbability(mixedSomDelProbability / (mixedGermDelProbability * di.getCoefficientOfvariation())); // ratio_over_CV_B, 140401
if (mixedSomDelProbability > mixedGermDelProbability) {
di.setSomatic(true);
// di.setMixedSomaticProbability(mixedSomDelProbability);
// di.setMixedSomaticProbability(mixedSomDelProbability - mixedGermDelProbability); // diff, 140326
// di.setMixedSomaticProbability(Math.log(mixedSomDelProbability / mixedGermDelProbability)); // ratio, 140327
// di.setMixedSomaticProbability(Math.log(mixedSomDelProbability/mixedGermDelProbability)/di.getCoefficientOfvariation()); // ratio_over_CV_A, 140327
// di.setMixedSomaticProbability(Math.log(mixedSomDelProbability / (mixedGermDelProbability * di.getCoefficientOfvariation()))); // ratio_over_CV_B, 140327
// di.setMixedSomaticProbability(Math.log(mixedSomDelProbability/(mixedGermDelProbability*di.getCoefficientOfvariation()*(di.getStdWindow()/di.getAvgDepthWindow())))); // ratio_over_CV_B_with_window, 140327
// di.setMixedSomaticProbability(Math.log((mixedSomDelProbability/(1-mixedSomDelProbability))/((mixedGermDelProbability/(1-mixedGermDelProbability))*di.getCoefficientOfvariation()))); // odds_ratio_over_CV_B, 140331
}
}
}
}
}
}
// germLL.flush();
// germLL.close();
//
// mixedLL.flush();
// mixedLL.close();
System.out.println("germLogLikelihood = " + germLogLikelihood);
System.out.println("mixedLogLikelihood = " + mixedLogLikelihood);
}
// public void homologyFilter(String id) throws IOException, InterruptedException {
public void homologyFilter() throws IOException, InterruptedException {
String queryChr;
String queryLociStart;
String queryLociEnd;
String query;
String upstreamSeq;
String downstreamSeq;
String temp;
Runtime operator;
Process process;
InputStream is;
InputStreamReader isr;
BufferedReader br;
FileReader fr;
int homologyCnt = 0;
// String fastaDir = System.getProperty("user.dir") + "/fasta/";
// String pslDir = System.getProperty("user.dir") + "/psl/";
// String idDir = System.getProperty("user.dir") + "/" + id;
String idDir = InputParameter.getWorkingDirectory() + InputParameter.getOutputHeader();
String fastaDir = idDir + "/fasta/";
String queryDir = fastaDir + "query/";
String dbDir = fastaDir + "database/";
String pslDir = idDir + "/psl/";
query = "mkdir " + idDir;
operator = Runtime.getRuntime();
process = operator.exec(query);
try {
process.waitFor();
} finally {
process.destroy();
}
query = "mkdir " + fastaDir;
operator = Runtime.getRuntime();
process = operator.exec(query);
try {
process.waitFor();
} finally {
process.destroy();
}
query = "mkdir " + queryDir;
operator = Runtime.getRuntime();
process = operator.exec(query);
try {
process.waitFor();
} finally {
process.destroy();
}
query = "mkdir " + dbDir;
operator = Runtime.getRuntime();
process = operator.exec(query);
try {
process.waitFor();
} finally {
process.destroy();
}
query = "mkdir " + pslDir;
operator = Runtime.getRuntime();
process = operator.exec(query);
try {
process.waitFor();
} finally {
process.destroy();
}
int chrNo = deletionList.size();
int totalDel = 0;
for (int i = 1; i < chrNo; i++) {
ArrayList chrDel = (ArrayList) deletionList.get(i);
int chrDelSize = chrDel.size();
for (int j = 0; j < chrDelSize; j++) {
totalDel++;
DeletionInfo di = (DeletionInfo) chrDel.get(j);
if (!di.isGap()) {
makeFastaForBlat(di, queryDir, dbDir);
boolean isHomology = isHomology(di, queryDir, dbDir, pslDir);
di.setHomology(isHomology);
if (isHomology) {
homologyCnt++;
di.setCellularity(1);
}
if (totalDel % 100 == 0) {
System.out.println(totalDel + " th deletion homology is searched ...");
}
}
}
}
// System.out.println("Num of passed deletions : " + (deletionList.size() - homologyCnt));
System.out.println("Num of filtered deletions by homology search : " + homologyCnt);
}
public void makeFastaForBlat(DeletionInfo di, String queryDir, String dbDir) throws IOException, InterruptedException {
String query;
Runtime operator;
Process process;
InputStream is;
InputStreamReader isr;
BufferedReader br;
FileWriter fw;
BufferedWriter bw;
String upstreamSeq = "";
String downstreamSeq = "";
query = InputParameter.getFastahack() + " -r " + di.getChr() + ":" + (di.getPos1() - InputParameter.getInsertSize()) + ".." + (di.getPos1() - 1) + " " + InputParameter.getHumanRefFasta();
operator = Runtime.getRuntime();
process = operator.exec(query);
try {
process.waitFor();
is = process.getInputStream();
isr = new InputStreamReader(is);
br = new BufferedReader(isr);
String temp = br.readLine();
while (temp != null) {
upstreamSeq += temp;
temp = br.readLine();
}
br.close();
isr.close();
is.close();
} finally {
process.destroy();
}
query = InputParameter.getFastahack() + " -r " + di.getChr() + ":" + (di.getPos2() + 1) + ".." + (di.getPos2() + InputParameter.getInsertSize()) + " " + InputParameter.getHumanRefFasta();
operator = Runtime.getRuntime();
process = operator.exec(query);
try {
process.waitFor();
is = process.getInputStream();
isr = new InputStreamReader(is);
br = new BufferedReader(isr);
String temp = br.readLine();
while (temp != null) {
downstreamSeq += temp;
temp = br.readLine();
}
br.close();
isr.close();
is.close();
} finally {
process.destroy();
}
String delInfo = di.getChr() + "_" + di.getPos1() + "_" + di.getPos2();
String outputUpstream = delInfo + "_up.fa";
String outputDownstream = delInfo + "_down.fa";
fw = new FileWriter(queryDir + outputUpstream);
bw = new BufferedWriter(fw);
bw.write(">" + delInfo + "\n");
bw.write(upstreamSeq + "\n");
bw.flush();
bw.close();
fw.close();
fw = new FileWriter(queryDir + outputDownstream);
bw = new BufferedWriter(fw);
bw.write(">" + delInfo + "\n");
bw.write(downstreamSeq + "\n");
bw.flush();
bw.close();
fw.close();
upstreamSeq = "";
downstreamSeq = "";
int searchSpace = 2 * InputParameter.getInsertSize();
int estimatedHomologyPos = di.getPos2() - (InputParameter.getInsertSize() - InputParameter.getReadLength());
int searchSpaceUp = estimatedHomologyPos - searchSpace;
if (di.getPos1() > searchSpaceUp) {
searchSpaceUp = di.getPos1() + 1;
}
int searchSpaceDown = estimatedHomologyPos + searchSpace;
query = InputParameter.getFastahack() + " -r " + di.getChr() + ":" + searchSpaceUp + ".." + searchSpaceDown + " " + InputParameter.getHumanRefFasta();
operator = Runtime.getRuntime();
process = operator.exec(query);
try {
process.waitFor();
is = process.getInputStream();
isr = new InputStreamReader(is);
br = new BufferedReader(isr);
String temp = br.readLine();
while (temp != null) {
upstreamSeq += temp;
temp = br.readLine();
}
br.close();
isr.close();
is.close();
} finally {
process.destroy();
}
fw = new FileWriter(dbDir + outputUpstream);
bw = new BufferedWriter(fw);
bw.write(">" + di.getChr() + "_" + searchSpaceUp + "_" + searchSpaceDown + "\n");
bw.write(upstreamSeq + "\n");
bw.flush();
bw.close();
fw.close();
estimatedHomologyPos = di.getPos1() + (InputParameter.getInsertSize() - InputParameter.getReadLength());
searchSpaceUp = estimatedHomologyPos - searchSpace;
searchSpaceDown = estimatedHomologyPos + searchSpace;
if (di.getPos2() < searchSpaceDown) {
searchSpaceDown = di.getPos2() - 1;
}
query = InputParameter.getFastahack() + " -r " + di.getChr() + ":" + searchSpaceUp + ".." + searchSpaceDown + " " + InputParameter.getHumanRefFasta();
operator = Runtime.getRuntime();
process = operator.exec(query);
try {
process.waitFor();
is = process.getInputStream();
isr = new InputStreamReader(is);
br = new BufferedReader(isr);
String temp = br.readLine();
while (temp != null) {
downstreamSeq += temp;
temp = br.readLine();
}
br.close();
isr.close();
is.close();
} finally {
process.destroy();
}
fw = new FileWriter(dbDir + outputDownstream);
bw = new BufferedWriter(fw);
bw.write(">" + di.getChr() + "_" + searchSpaceUp + "_" + searchSpaceDown + "\n");
bw.write(downstreamSeq + "\n");
bw.flush();
bw.close();
fw.close();
}
public boolean isHomology(DeletionInfo di, String queryDir, String dbDir, String pslDir) throws IOException, InterruptedException {
boolean isRepeat = false;
String query;
Runtime operator;
Process process;
InputStream is;
InputStreamReader isr;
FileReader fr;
BufferedReader br;
FileWriter fw;
BufferedWriter bw;
String[] s;
String delInfo = di.getChr() + "_" + di.getPos1() + "_" + di.getPos2();
try {
fr = new FileReader(pslDir + delInfo + "_up.psl");
} catch (FileNotFoundException ex) {
query = InputParameter.getBlat() + " -t=dna -q=dna " + dbDir + delInfo + "_up.fa " + queryDir + delInfo + "_up.fa " + pslDir + delInfo + "_up.psl";
operator = Runtime.getRuntime();
process = operator.exec(query);
try {
process.waitFor();
} finally {
process.destroy();
}
fr = new FileReader(pslDir + delInfo + "_up.psl");
}
br = new BufferedReader(fr);
boolean isHeader = true;
String temp = br.readLine();
while (temp != null && !isRepeat) {
if (temp.indexOf("---------------") != -1) {
isHeader = false;
temp = br.readLine(); // candidates start
}
if (!isHeader) {
if (temp != null) {
s = temp.split("\t");
int matchLen = Integer.parseInt(s[0]);
String strand = s[8];
if ((matchLen != InputParameter.getInsertSize())) {
if ((matchLen >= InputParameter.getReadLength() * 0.9) && strand.contains("+")) { // matched read > readLength*0.9 bp, + strand
String[] block_size = s[18].split(",");
for (int i = 0; i < block_size.length; i++) {
if (Integer.parseInt(block_size[i]) >= InputParameter.getReadLength() * 0.9) {
isRepeat = true;
break;
}
}
}
}
}
}
temp = br.readLine();
}
br.close();
fr.close();
if (!isRepeat) {
try {
fr = new FileReader(pslDir + delInfo + "_down.psl");
} catch (FileNotFoundException ex) {
query = InputParameter.getBlat() + " -t=dna -q=dna " + dbDir + delInfo + "_down.fa " + queryDir + delInfo + "_down.fa " + pslDir + delInfo + "_down.psl";
operator = Runtime.getRuntime();
process = operator.exec(query);
try {
process.waitFor();
} finally {
process.destroy();
}
fr = new FileReader(pslDir + delInfo + "_down.psl");
}
br = new BufferedReader(fr);
isHeader = true;
temp = br.readLine();
while (temp != null && !isRepeat) {
if (temp.indexOf("---------------") != -1) {
isHeader = false;
temp = br.readLine(); // candidates start
}
if (!isHeader) {
if (temp != null) {
s = temp.split("\t");
int matchLen = Integer.parseInt(s[0]);
String strand = s[8];
if ((matchLen != InputParameter.getInsertSize())) {
if ((matchLen >= InputParameter.getReadLength() * 0.9) && strand.contains("+")) { // matched read > readLength*0.9 bp, + strand
String[] block_size = s[18].split(",");
for (int i = 0; i < block_size.length; i++) {
if (Integer.parseInt(block_size[i]) >= InputParameter.getReadLength() * 0.9) {
isRepeat = true;
break;
}
}
}
}
}
}
temp = br.readLine();
}
br.close();
fr.close();
}
return isRepeat;
}
public void reportEstimatedModel(BufferedWriter bw) throws IOException {
DecimalFormat df = new DecimalFormat("#.###");
// FileWriter fw = new FileWriter(InputParameter.getSomaticCallResult());
bw.write("lambda_1\tlambda_2\tmu_1\tmu_2\tgermLogLikelihood\tmixedLogLikelihood\n");
bw.write(mm.getWeight(0) + "\t");
bw.write(mm.getWeight(1) + "\t");
bw.write(mm.getMean(0) + "\t");
bw.write(mm.getMean(1) + "\t");
bw.write(germLogLikelihood + "\t");
bw.write(mixedLogLikelihood + "\n");
}
public void callFinalVariants(BufferedWriter bw) throws IOException {
if (mixedLogLikelihood > germLogLikelihood) {
DecimalFormat df = new DecimalFormat("#.###");
bw.write("Chr\tStart\tEnd\tSize\tNumOfAnomalousReads\tavgDepthOfDeletion\tavgDepthOfWindow\tcellularity\tSomaticScore\n");
int chrNo = deletionList.size();
for (int i = 1; i < chrNo; i++) {
ArrayList chrDel = (ArrayList) deletionList.get(i);
int chrDelSize = chrDel.size();
for (int j = 0; j < chrDelSize; j++) {
DeletionInfo di = (DeletionInfo) chrDel.get(j);
if (di.isSomatic()) {
bw.write(di.getChr() + "\t");
bw.write(di.getPos1() + "\t");
bw.write(di.getPos2() + "\t");
bw.write(di.getSize() + "\t");
bw.write(di.getNum_read() + "\t");
bw.write(df.format(di.getAvgDepthDeletion()) + "\t");
bw.write(df.format(di.getAvgDepthWindow()) + "\t");
bw.write(df.format(di.getCellularity()) + "\t");
bw.write(df.format(di.getMixedSomaticProbability()) + "\n");
}
}
}
}
}
public void test() {
BinomialDistribution bn = new BinomialDistribution(120, 0.5);
NormalDistribution n = new NormalDistribution(60, Math.sqrt(30));
PVector param_norm = new PVector(2);
param_norm.array[0] = 60; // mean
param_norm.array[1] = 30; // var
PVector param_bino = new PVector(2);
param_bino.array[0] = 120; // n
param_bino.array[1] = 0.5; // p
ExponentialFamily normal = new UnivariateGaussian();
PVector p = new PVector(1);
p.array[0] = 32;
System.out.println(bn.cumulativeProbability(32));
System.out.println(n.cumulativeProbability(32));
System.out.println(normal.density(p, param_norm));
p.array[0] = 27;
System.out.println(bn.cumulativeProbability(27));
System.out.println(n.cumulativeProbability(27));
System.out.println(n.density(27));
System.out.println(normal.density(p, param_norm));
p.array[0] = 60;
System.out.println(bn.cumulativeProbability(60));
System.out.println(n.cumulativeProbability(60));
System.out.println(n.density(60));
System.out.println(normal.density(p, param_norm));
}
public void EMtest(double thres) throws FileNotFoundException, IOException {
String input = "/data2/kimjh/WGS/breakdancer/S49/Cellularity_del_list_S49_raw_BD.txt";
// String input = "/data/kimjh/WGS/sdd/simulation_2p/breakdancer/Cellularity_del_list_2p_raw_BD.txt";
FileReader fr = new FileReader(input);
BufferedReader br = new BufferedReader(fr);
String temp = br.readLine(); // header
temp = br.readLine();
String[] s;
ArrayList celList = new ArrayList();
while (temp != null) {
s = temp.split("\t");
celList.add(s[9]);
temp = br.readLine();
}
double[] cellularity = new double[celList.size()];
for (int i = 0; i < cellularity.length; i++) {
cellularity[i] = Double.parseDouble((String) celList.get(i));
}
Arrays.sort(cellularity);
double[] cellularityOverZero = getSubArray(cellularity, 0, true);
double[] cellularityOverZeroBelowThres = getSubArray(cellularityOverZero, thres, false);
// double [][] data = new double[cellularityOverZeroBelowThres.length][2];
// for(int i=0;i<cellularityOverZeroBelowThres.length;i++){
// data[i][0] = cellularityOverZeroBelowThres[i];
// data[i][1] = Math.random();
// }
// double start = 0;
// double end = thres;
// double binSize = 0.05;
// double [][] data = getHistogram(cellularityOverZeroBelowThres, start, end, binSize);
PVector[] points = new PVector[cellularityOverZeroBelowThres.length];
for (int i = 0; i < points.length; i++) {
points[i] = new PVector(1);
points[i].array[0] = cellularityOverZeroBelowThres[i];
}
int numComponents = 2;
MixtureModel mmc;
int iter = 0;
do {
iter++;
Vector<PVector>[] clusters = KMeans.run(points, numComponents);
mmc = ExpectationMaximization.initialize(clusters);
mmc = ExpectationMaximization.run(points, mmc);
} while (mmc.getMeanDiff() < 0.05 && iter < 10); // consider meanDiff with MaxIter=10
for (int i = 0; i < mmc.size; i++) {
System.out.println("\n" + mmc.getWeight(i));
System.out.println(mmc.getMean(i));
System.out.println(mmc.getStd(i) + "\n");
}
MixtureModel g = new MixtureModel(1);
ExponentialFamily ef = new UnivariateGaussian();
g.setEF(ef);
double[] weight = new double[1];
weight[0] = 1;
g.setWeight(weight);
PVector gParam = new PVector(2);
gParam.array[0] = 0.5;
gParam.array[1] = 0.5;
Parameter[] p = new Parameter[1];
p[0] = gParam;
g.setParam(p);
g = ExpectationMaximization.runWithFixedMean(points, g, 0.5);
for (int i = 0; i < g.size; i++) {
System.out.println("\n" + g.getWeight(i));
System.out.println(g.getMean(i));
System.out.println(g.getStd(i));
}
// int numComponents = 2;
// int maxIterations = 1000;
// double epsilon = 1e-08;
// MultivariateNormalMixtureExpectationMaximization em = new MultivariateNormalMixtureExpectationMaximization(data);
// MixtureMultivariateNormalDistribution mnd = em.estimate(data, numComponents);
//// List initialDist;
//// MultivariateNormalDistribution d1 = new MultivariateNormalDistribution
//// Pair p1 = new Pair(0.5,);
//// initialDist.add();
//// MixtureMultivariateNormalDistribution mnd = new MixtureMultivariateNormalDistribution()
// em.fit(mnd, maxIterations, epsilon);
// List distList = mnd.getComponents();
// for(int i=0;i<distList.size();i++){
// Pair p = (Pair) distList.get(i);
// System.out.println(p.getKey());
// MultivariateNormalDistribution mn = (MultivariateNormalDistribution) p.getValue();
// double [] means = mn.getMeans();
// for(int j=0;j<means.length;j++){
// System.out.println("mean = "+means[j]);
// }
// double [] sds = mn.getStandardDeviations();
// for(int j=0;j<sds.length;j++){
// System.out.println("sd = "+sds[j]);
// }
// }
}
// public double getGermlineProbability(DeletionInfo di) {
// double pval = 0;
// double N = di.getAvgDepthWindow();
// double p = 0.5;
//
// if (N > 30) {
// NormalDistribution n = new NormalDistribution(N * p, Math.sqrt(N * p * (1 - p)));
// } else {
// BinomialDistribution bn = new BinomialDistribution((int) N, p);
// double observed = di.getDeletionDepth();
// }
//
// return pval;
// }
public double[] getSubArray(double[] array, double thres, boolean direction) { // false : <= thres, true : > thres
int idx = 0;
double[] subArray;
for (int i = 0; i < array.length; i++) {
if (array[i] > thres) {
break;
}
idx++;
}
if (direction == false) {
subArray = Arrays.copyOfRange(array, 0, idx);
} else {
subArray = Arrays.copyOfRange(array, idx, array.length);
}
return subArray;
}
public double[][] getHistogram(double[] sortedData, double start, double end, double binSize) {
int binNum = (int) ((start + end) / binSize);
double[][] hist = new double[binNum][2];
int total = 0;
for (int i = 0; i < binNum; i++) {
double binStart = binSize * i;
double binEnd = binSize * (i + 1);
int cnt = 0;
for (int j = 0; j < sortedData.length; j++) {
if (sortedData[j] > binStart && sortedData[j] <= binEnd) {
cnt++;
}
if (sortedData[j] > binEnd) {
break;
}
}
hist[i][0] = binEnd;
hist[i][1] = cnt;
total += cnt;
}
return hist;
}
public Integer[] getOverlappedArea(int src1, int src2, int tgt1, int tgt2) {
int[] totalPos = {src1, src2, tgt1, tgt2};
Arrays.sort(totalPos);
Integer[] overlappedArea = new Integer[2];
overlappedArea[0] = totalPos[1];
overlappedArea[1] = totalPos[2];
return overlappedArea;
}
}