Package solodel

Source Code of solodel.SoloDelCaller

/*
* To change this template, choose Tools | Templates
* and open the template in the editor.
*/
package solodel;

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.text.DecimalFormat;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Vector;
import net.sf.samtools.SAMFileReader;
import org.apache.commons.math3.distribution.BinomialDistribution;
import org.apache.commons.math3.distribution.NormalDistribution;

/**
*
* @author kimjh
*/
public class SoloDelCaller {

    ArrayList deletionList;
    MixtureModel mm;
    int totalDelCnt;
    double germLogLikelihood;
    double mixedLogLikelihood;

    public SoloDelCaller() {
        deletionList = null;
        mm = null;
        totalDelCnt = 0;
        germLogLikelihood = 0;
        mixedLogLikelihood = 0;
       
        String jarPath = getClass().getProtectionDomain().getCodeSource().getLocation().getPath();
        String jarDir = jarPath.substring(0, jarPath.lastIndexOf("/") + 1);
        InputParameter.setJarDir(jarDir);
    }

    // Import deletion information form Breakdancer result file
    public void importDeletionInfo(String breakdancerResult) throws FileNotFoundException, IOException {
        deletionList = new ArrayList();
        for (int i = 0; i <= 25; i++) {
            ArrayList chrDel = new ArrayList();
            deletionList.add(chrDel);
        }

        FileReader fr = new FileReader(breakdancerResult);
//        FileReader fr = new FileReader(InputParameter.getBreakdancerOutput());
        BufferedReader br = new BufferedReader(fr);
        String temp = br.readLine();
        if (temp.indexOf("#") != -1) {  // header
            temp = br.readLine();
        }
        String[] s;

        int i = 0;
        while (temp != null) {
            if (!temp.equals("")) {
                if (temp.contains("DEL")) {
                    DeletionInfo di = new DeletionInfo(temp);
                    if (di.getSize() > InputParameter.getInsertSize()) {
//                        di.setDeletionInfo(samReader);
                        String chr = di.getChr();
                        chr = chr.replaceAll("chr", "");
                        int chrom;
                        if (chr.equals("X")) {
                            chrom = 23;
                        } else if (chr.equals("Y")) {
                            chrom = 24;
                        } else if (chr.equals("M")) {
                            chrom = 25;
                        } else {
                            chrom = Integer.parseInt(chr);
                        }
                        ArrayList chrDel = (ArrayList) deletionList.get(chrom);
                        chrDel.add(di);
                        totalDelCnt++;
                    }
                }
            }
            temp = br.readLine();
            i++;
            if (i % 100 == 0) {
                System.out.println(i + " th deletion is imported ...");
            }
        }

        br.close();
        fr.close();
        System.out.println("\nTotal number of deletion : " + totalDelCnt + "\n");
    }

    public void calculateDeletionDepth(String inputBam, String inputBamIndex) throws FileNotFoundException, IOException {
        String gaps = InputParameter.getJarDir() + InputParameter.getHumanGenomeBuild() + "_gaps.txt";
        FileReader fr = new FileReader(gaps);
        BufferedReader br = new BufferedReader(fr);

        String temp = br.readLine();    // header
        temp = br.readLine();
        String[] s;

        while (temp != null) {
            if (!temp.equals("")) {
                s = temp.split("\t");
                String chr = s[0].replaceAll("chr", "");
                int start = Integer.parseInt(s[1]);
                int stop = Integer.parseInt(s[2]);
                int size = Integer.parseInt(s[3]);

                DeletionInfo di = new DeletionInfo();
                di.setChr(s[0]);
                di.setPos1(start);
                di.setPos2(stop);
                di.setSize(size);
                di.setGap(true);

                int chrom;
                if (chr.equals("X")) {
                    chrom = 23;
                } else if (chr.equals("Y")) {
                    chrom = 24;
                } else if (chr.equals("M")) {
                    chrom = 25;
                } else {
                    chrom = Integer.parseInt(chr);
                }

                ArrayList chrDel = (ArrayList) deletionList.get(chrom);
                chrDel.add(di);
            }
            temp = br.readLine();
        }
        br.close();
        fr.close();

        File bamFile = new File(inputBam);
//        File bamFile = new File(InputParameter.getBamFile());
        File bamIndexFile = new File(inputBamIndex);
//        File bamIndexFile = new File(InputParameter.getBamIndexFile());
        SAMFileReader samReader = new SAMFileReader(bamFile, bamIndexFile);
        samReader.setValidationStringency(SAMFileReader.ValidationStringency.LENIENT);

        int chrNo = deletionList.size();
        for (int i = 1; i < chrNo; i++) {
            ArrayList<DeletionInfo> chrDel = (ArrayList) deletionList.get(i);
            Collections.sort(chrDel, new DeletionComparator());
        }

        int delNo = 0;
        for (int i = 1; i < chrNo; i++) {
            ArrayList<DeletionInfo> chrDel = (ArrayList) deletionList.get(i);
            int chrDelSize = chrDel.size();
            for (int j = 0; j < chrDelSize; j++) {
                DeletionInfo sdi = (DeletionInfo) chrDel.get(j);
                if (!sdi.isGap()) {
                    int window1 = sdi.getWindowStart();
                    int window2 = sdi.getWindowEnd();
                    ArrayList overlappedAreaList = new ArrayList();

                    for (int k = 0; k < chrDelSize; k++) {
                        if (j != k) {
                            DeletionInfo tdi = (DeletionInfo) chrDel.get(k);
                            int tgt1 = tdi.getPos1();
                            int tgt2 = tdi.getPos2();
                            if (tgt1 > window2) {
                                break;
                            }
                            if ((window1 <= tgt1 && window2 >= tgt1) || (window1 <= tgt2 && window2 >= tgt2)) {
                                Integer[] overlappedArea = getOverlappedArea(window1, window2, tgt1, tgt2);
                                overlappedAreaList.add(overlappedArea);
                            }
                        }
                    }

                    sdi.setDeletionInfo(samReader, overlappedAreaList);
                    delNo++;
                    if (delNo % 100 == 0) {
                        System.out.println(delNo + " th deletion's information is analyzed ...");
                    }
                }
            }
        }

        samReader.close();
    }

    public void gapFilter() throws FileNotFoundException, IOException {
        String gaps = InputParameter.getJarDir() + InputParameter.getHumanGenomeBuild() + "_gaps.txt";
        FileReader fr = new FileReader(gaps);
        BufferedReader br = new BufferedReader(fr);

        String temp = br.readLine();    // header
        temp = br.readLine();
        String[] s;

        ArrayList gapList = new ArrayList();
        for (int i = 0; i <= 25; i++) {
            ArrayList chrGap = new ArrayList();
            gapList.add(chrGap);
        }

        while (temp != null) {
            if (!temp.equals("")) {
                s = temp.split("\t");
                String chr = s[0].replaceAll("chr", "");
                int start = Integer.parseInt(s[1]);
                int stop = Integer.parseInt(s[2]);
                int size = Integer.parseInt(s[3]);

                DeletionInfo di = new DeletionInfo();
                di.setChr(s[0]);
                di.setPos1(start);
                di.setPos2(stop);
                di.setSize(size);

                int chrom;
                if (chr.equals("X")) {
                    chrom = 23;
                } else if (chr.equals("Y")) {
                    chrom = 24;
                } else if (chr.equals("M")) {
                    chrom = 25;
                } else {
                    chrom = Integer.parseInt(chr);
                }

                ArrayList chrGap = (ArrayList) gapList.get(chrom);
                chrGap.add(di);
            }
            temp = br.readLine();
        }
        br.close();
        fr.close();

        int gapCnt = 0;
        int chrNo = deletionList.size();
        for (int i = 1; i < chrNo; i++) {
            ArrayList chrDel = (ArrayList) deletionList.get(i);
            int chrDelSize = chrDel.size();

            for (int j = 0; j < chrDelSize; j++) {
                DeletionInfo di = (DeletionInfo) chrDel.get(j);
                double delSize = di.getSize();
                double delStart = di.getPos1();
                double delStop = di.getPos2();
                boolean b = true;

                ArrayList chrGap = (ArrayList) gapList.get(i);
                int chrGapSize = chrGap.size();
                for (int k = 0; k < chrGapSize; k++) {
                    if (b) {
                        DeletionInfo gi = (DeletionInfo) chrGap.get(k);

                        double gapSize = gi.getSize();
                        double gapStart = gi.getPos1();
                        double gapStop = gi.getPos2();
                        double thres = 0.5;
                        double upperBound = gapStart + gapSize * thres;
                        double lowerBound = gapStop - gapSize * thres;

                        if (delSize >= (gapSize * thres)) {
                            if (((delStart <= gapStart) && (delStop >= upperBound)) || ((delStop >= gapStop) && (delStart <= lowerBound)) || ((delStart <= gapStart) && (delStop >= gapStop))) {
                                di.setGap(true);
                                gapCnt++;
                                b = false;
                            }
                        }
                    } else {
                        break;
                    }
                }
            }
        }
        System.out.println("Num of filtered deletions by gap : " + gapCnt);
    }

    // Estimate the parameters of mixture model (k=2)
    public void estimateMixtureModel() {
        ArrayList sampleList = new ArrayList();
        int chrNo = deletionList.size();
        if(chrNo > 22){
            chrNo = 22;
        }
        for (int i = 1; i < chrNo; i++) {
            ArrayList chrDel = (ArrayList) deletionList.get(i);
            int chrDelSize = chrDel.size();
            for (int j = 0; j < chrDelSize; j++) {
                DeletionInfo di = (DeletionInfo) chrDel.get(j);
                if (!di.isHomology() && !di.isGap()) {
                    if (di.getCellularity() < InputParameter.getCellularityThrd() && di.getCellularity() > 0 && di.getCellularity() > (1.0/di.getAvgDepthWindow())) {
                        sampleList.add(di);
                    }
                }
            }
        }

        double[] cellularity = new double[sampleList.size()];
        for (int i = 0; i < cellularity.length; i++) {
            cellularity[i] = ((DeletionInfo) sampleList.get(i)).getCellularity();
        }

        PVector[] points = new PVector[cellularity.length];
        for (int i = 0; i < points.length; i++) {
            points[i] = new PVector(1);
            points[i].array[0] = cellularity[i];
        }

        int numComponents = 2;
        int iter = 0;
        boolean estimated = false;
        do {
            iter++;
            System.out.println(iter);
            Vector<PVector>[] clusters = KMeans.run(points, numComponents);

            mm = ExpectationMaximization.initialize(clusters);
            mm = ExpectationMaximization.run(points, mm);
            if (mm.getMeanDiff() > InputParameter.getMixedMeanDiffThrd()) {
                estimated = true;
            }
        } while (!estimated && iter < 10);

        if (!estimated) {
            double[] quartileArray = new double[sampleList.size()];
            for (int i = 0; i < quartileArray.length; i++) {
                quartileArray[i] = ((DeletionInfo) sampleList.get(i)).getCellularity();
            }

            Arrays.sort(quartileArray);

            int upperQuartileIdx = (int) (quartileArray.length * 0.75) - 1;
            int lowerQuartileIdx = (int) (quartileArray.length * 0.25) - 1;
            double upperQuartile = quartileArray[upperQuartileIdx];
            double lowerQuartile = quartileArray[lowerQuartileIdx];
            double interQuartileRange = upperQuartile - lowerQuartile;
            double upperCutoff = upperQuartile + interQuartileRange;
            double lowerCutoff = lowerQuartile - interQuartileRange;

            System.out.println("UpperCutoff = " + upperCutoff);
            System.out.println("LowerCutoff = " + lowerCutoff);

            int upperFilteredCnt = 0;
            int lowerFilteredCnt = 0;

            ArrayList filteredSampleList = new ArrayList();
            for (int i = 0; i < sampleList.size(); i++) {
                DeletionInfo di = (DeletionInfo) sampleList.get(i);
                if (di.getCellularity() > lowerCutoff && di.getCellularity() < upperCutoff) {
                    filteredSampleList.add(di);
                } else {
                    if (di.getCellularity() <= lowerCutoff) {
                        lowerFilteredCnt++;
                    } else {
                        upperFilteredCnt++;
                    }

                }
            }

            System.out.println("\nTotal lower filtered count : " + lowerFilteredCnt + "\n");
            System.out.println("\nTotal upper filtered count : " + upperFilteredCnt + "\n");

            double[] filteredCellularity = new double[filteredSampleList.size()];
            for (int i = 0; i < filteredCellularity.length; i++) {
                filteredCellularity[i] = ((DeletionInfo) filteredSampleList.get(i)).getCellularity();
            }

            PVector[] filteredPoints = new PVector[filteredCellularity.length];
            for (int i = 0; i < filteredPoints.length; i++) {
                filteredPoints[i] = new PVector(1);
                filteredPoints[i].array[0] = filteredCellularity[i];
            }

            iter = 0;
            do {
                iter++;
                System.out.println(iter);
                Vector<PVector>[] clusters = KMeans.run(filteredPoints, numComponents);

                mm = ExpectationMaximization.initialize(clusters);
                mm = ExpectationMaximization.run(filteredPoints, mm);
            } while (mm.getMeanDiff() < InputParameter.getMixedMeanDiffThrd() && iter < 10);
        }

        for (int i = 0; i < numComponents; i++) {
            System.out.println(mm.getWeight(i));
            System.out.println(mm.getMean(i));
            System.out.println(mm.getStd(i) + "\n");
        }
    }

    // Calculate deletion probabilities based on the germline model and the mixture model.
    public void calculateDeletionProbabilities(BufferedWriter germLL, BufferedWriter mixedLL) throws IOException {
        int chrNo = deletionList.size();
        for (int i = 1; i < chrNo; i++) {
            ArrayList chrDel = (ArrayList) deletionList.get(i);
            int chrDelSize = chrDel.size();
            for (int j = 0; j < chrDelSize; j++) {
                DeletionInfo di = (DeletionInfo) chrDel.get(j);
                if (!di.isHomology() && !di.isGap()) {
                    if (di.getCellularity() > InputParameter.getCellularityThrd()) {
                        di.setHomozygousDel(true);
                    } else if (di.getAvgDepthDeletion() < InputParameter.getDuplicateThrd() && di.getAvgDepthWindow() < InputParameter.getDuplicateThrd()) {
                        double N = di.getAvgDepthWindow()// mean
                        double X = N - di.getAvgDepthDeletion();    // mean
//                        double N = di.getMedianDepthWindow(); // median
//                        double X = N - di.getMedianDepthDeletion();   // median


                        if (X < 0) {
//                            System.out.println(di.print());
                            X = 0;
                        }
                        double germP = 0.5;
                        ExponentialFamily germDist;

                        if (N > 30) {
                            germDist = new UnivariateGaussian();
                            PVector gParam = new PVector(2);
                            gParam.array[0] = N * germP;  // mean
                            gParam.array[1] = N * germP * (1 - germP)// var

                            PVector observed = new PVector(1);
                            observed.array[0] = X;

                            double germDelProbability = germDist.density(observed, gParam);
                            di.setGermlineProbability(germDelProbability);
                            germLogLikelihood += Math.log(germDelProbability);
                            germLL.write(di.print() + "\t" + Math.log(germDelProbability) + "\n");

                            MixtureModel mixedDist = new MixtureModel(2);
                            ExponentialFamily ef = new UnivariateGaussian();
                            mixedDist.setEF(ef);
                            mixedDist.setWeight(mm.getWeight());
                            PVector[] mParam = new PVector[2];
                            for (int k = 0; k < 2; k++) {
                                mParam[k] = new PVector(2);
                                mParam[k].array[0] = N * mm.getMean(k);
                                mParam[k].array[1] = N * mm.getMean(k) * (1 - mm.getMean(k));
                            }
                            mixedDist.setParam(mParam);

                            double mixedDelProbability = mixedDist.density(observed);
                            di.setMixedProbability(mixedDelProbability);
                            mixedLogLikelihood += Math.log(mixedDelProbability);
                            mixedLL.write(di.print() + "\t" + Math.log(mixedDelProbability) + "\n");

                            double[] mixedDensity = mixedDist.getDensities(observed)// order : som, germ                    
                            di.setMixedSomaticProbability(mixedDensity[0] / (mixedDensity[1] * di.getCoefficientOfvariation())); // ratio_over_CV_B, 140401
                            if (mixedDensity[0] > mixedDensity[1]) {
                                di.setSomatic(true);
//                                di.setMixedSomaticProbability(mixedDensity[0]);
//                                di.setMixedSomaticProbability(mixedDensity[0]-mixedDensity[1]); // diff, 140326
//                                di.setMixedSomaticProbability(Math.log(mixedDensity[0] / mixedDensity[1])); // ratio, 140327
//                                di.setMixedSomaticProbability(Math.log(mixedDensity[0]/mixedDensity[1])/di.getCoefficientOfvariation()); // ratio_over_CV_A, 140327
//                                di.setMixedSomaticProbability(Math.log(mixedDensity[0] / (mixedDensity[1] * di.getCoefficientOfvariation()))); // ratio_over_CV_B, 140327
//                                di.setMixedSomaticProbability(Math.log(mixedDensity[0]/(mixedDensity[1]*di.getCoefficientOfvariation()*(di.getStdWindow()/di.getAvgDepthWindow())))); // ratio_over_CV_B_with_window, 140327
//                                di.setMixedSomaticProbability(Math.log((mixedDensity[0]/(1-mixedDensity[0]))/((mixedDensity[1]/(1-mixedDensity[1]))*di.getCoefficientOfvariation()))); // odds_ratio_over_CV_B, 140331
                            }
//                    System.out.print(di.getCellularity() + "\t" + Math.log(germDelProbability) + "\t");
//                    System.out.println(Math.log(mixedDelProbability) + "\t" + di.getAvgDepthDeletion() + "\t" + di.getAvgDepthWindow() + "\t" + di.getPos1() + "\t" + di.getPos2());
                        } else {
                            germDist = new BinomialFixedN((int) N);
                            PVector gParam = new PVector(1);
                            gParam.array[0] = germP;    // p

                            PVector observed = new PVector(1);
                            observed.array[0] = (int) X;

                            double germDelProbability = germDist.density(observed, gParam);
                            di.setGermlineProbability(germDelProbability);
                            germLogLikelihood += Math.log(germDelProbability);
                            germLL.write(di.print() + "\t" + Math.log(germDelProbability) + "\n");

                            ExponentialFamily mixedDist = new BinomialFixedN((int) N);
                            PVector mixedGermParam = new PVector(1);
                            PVector mixedSomParam = new PVector(1);
                            double mixedGermDelProbability;
                            double mixedSomDelProbability;
                            if (mm.getMean(0) > mm.getMean(1)) {    // mm[0] = p_g, mm[1] = p_s
                                mixedGermParam.array[0] = mm.getMean(0);
                                mixedSomParam.array[0] = mm.getMean(1);
                                mixedGermDelProbability = mm.getWeight(0) * mixedDist.density(observed, mixedGermParam);
                                mixedSomDelProbability = mm.getWeight(1) * mixedDist.density(observed, mixedSomParam);
                            } else {// mm[0] = p_s, mm[1] = p_g
                                mixedGermParam.array[0] = mm.getMean(1);
                                mixedSomParam.array[0] = mm.getMean(0);
                                mixedGermDelProbability = mm.getWeight(1) * mixedDist.density(observed, mixedGermParam);
                                mixedSomDelProbability = mm.getWeight(0) * mixedDist.density(observed, mixedSomParam);
                            }
                            double mixedDelProbability = mixedGermDelProbability + mixedSomDelProbability;
                            di.setMixedProbability(mixedDelProbability);
                            mixedLogLikelihood += Math.log(mixedDelProbability);
                            mixedLL.write(di.print() + "\t" + Math.log(mixedDelProbability) + "\n");

                            di.setMixedSomaticProbability(mixedSomDelProbability / (mixedGermDelProbability * di.getCoefficientOfvariation())); // ratio_over_CV_B, 140401
                            if (mixedSomDelProbability > mixedGermDelProbability) {
                                di.setSomatic(true);
//                                di.setMixedSomaticProbability(mixedSomDelProbability);
//                                di.setMixedSomaticProbability(mixedSomDelProbability - mixedGermDelProbability); // diff, 140326
//                                di.setMixedSomaticProbability(Math.log(mixedSomDelProbability / mixedGermDelProbability)); // ratio, 140327
//                                di.setMixedSomaticProbability(Math.log(mixedSomDelProbability/mixedGermDelProbability)/di.getCoefficientOfvariation()); // ratio_over_CV_A, 140327
//                                di.setMixedSomaticProbability(Math.log(mixedSomDelProbability / (mixedGermDelProbability * di.getCoefficientOfvariation()))); // ratio_over_CV_B, 140327
//                                di.setMixedSomaticProbability(Math.log(mixedSomDelProbability/(mixedGermDelProbability*di.getCoefficientOfvariation()*(di.getStdWindow()/di.getAvgDepthWindow())))); // ratio_over_CV_B_with_window, 140327
//                                di.setMixedSomaticProbability(Math.log((mixedSomDelProbability/(1-mixedSomDelProbability))/((mixedGermDelProbability/(1-mixedGermDelProbability))*di.getCoefficientOfvariation()))); // odds_ratio_over_CV_B, 140331
                            }
                        }
                    }
                }
            }
        }
       
//        germLL.flush();
//        germLL.close();
//       
//        mixedLL.flush();
//        mixedLL.close();
       
        System.out.println("germLogLikelihood = " + germLogLikelihood);
        System.out.println("mixedLogLikelihood = " + mixedLogLikelihood);
    }

//    public void homologyFilter(String id) throws IOException, InterruptedException {
    public void homologyFilter() throws IOException, InterruptedException {   
        String queryChr;
        String queryLociStart;
        String queryLociEnd;
        String query;
        String upstreamSeq;
        String downstreamSeq;
        String temp;
        Runtime operator;
        Process process;
        InputStream is;
        InputStreamReader isr;
        BufferedReader br;
        FileReader fr;
        int homologyCnt = 0;

//        String fastaDir = System.getProperty("user.dir") + "/fasta/";
//        String pslDir = System.getProperty("user.dir") + "/psl/";
//        String idDir = System.getProperty("user.dir") + "/" + id;
        String idDir = InputParameter.getWorkingDirectory() + InputParameter.getOutputHeader();
        String fastaDir = idDir + "/fasta/";
        String queryDir = fastaDir + "query/";
        String dbDir = fastaDir + "database/";
        String pslDir = idDir + "/psl/";

        query = "mkdir " + idDir;
        operator = Runtime.getRuntime();
        process = operator.exec(query);
        try {
            process.waitFor();
        } finally {
            process.destroy();
        }

        query = "mkdir " + fastaDir;
        operator = Runtime.getRuntime();
        process = operator.exec(query);
        try {
            process.waitFor();
        } finally {
            process.destroy();
        }

        query = "mkdir " + queryDir;
        operator = Runtime.getRuntime();
        process = operator.exec(query);
        try {
            process.waitFor();
        } finally {
            process.destroy();
        }

        query = "mkdir " + dbDir;
        operator = Runtime.getRuntime();
        process = operator.exec(query);
        try {
            process.waitFor();
        } finally {
            process.destroy();
        }

        query = "mkdir " + pslDir;
        operator = Runtime.getRuntime();
        process = operator.exec(query);
        try {
            process.waitFor();
        } finally {
            process.destroy();
        }

        int chrNo = deletionList.size();
        int totalDel = 0;
        for (int i = 1; i < chrNo; i++) {
            ArrayList chrDel = (ArrayList) deletionList.get(i);
            int chrDelSize = chrDel.size();
            for (int j = 0; j < chrDelSize; j++) {
                totalDel++;
                DeletionInfo di = (DeletionInfo) chrDel.get(j);
                if (!di.isGap()) {
                    makeFastaForBlat(di, queryDir, dbDir);
                    boolean isHomology = isHomology(di, queryDir, dbDir, pslDir);
                    di.setHomology(isHomology);
                    if (isHomology) {
                        homologyCnt++;
                        di.setCellularity(1);
                    }

                    if (totalDel % 100 == 0) {
                        System.out.println(totalDel + " th deletion homology is searched ...");
                    }
                }
            }
        }

//        System.out.println("Num of passed deletions : " + (deletionList.size() - homologyCnt));
        System.out.println("Num of filtered deletions by homology search : " + homologyCnt);

    }

    public void makeFastaForBlat(DeletionInfo di, String queryDir, String dbDir) throws IOException, InterruptedException {
        String query;
        Runtime operator;
        Process process;
        InputStream is;
        InputStreamReader isr;
        BufferedReader br;
        FileWriter fw;
        BufferedWriter bw;

        String upstreamSeq = "";
        String downstreamSeq = "";

        query = InputParameter.getFastahack() + " -r " + di.getChr() + ":" + (di.getPos1() - InputParameter.getInsertSize()) + ".." + (di.getPos1() - 1) + " " + InputParameter.getHumanRefFasta();
        operator = Runtime.getRuntime();
        process = operator.exec(query);
        try {
            process.waitFor();
            is = process.getInputStream();
            isr = new InputStreamReader(is);
            br = new BufferedReader(isr);
            String temp = br.readLine();
            while (temp != null) {
                upstreamSeq += temp;
                temp = br.readLine();
            }
            br.close();
            isr.close();
            is.close();
        } finally {
            process.destroy();
        }

        query = InputParameter.getFastahack() + " -r " + di.getChr() + ":" + (di.getPos2() + 1) + ".." + (di.getPos2() + InputParameter.getInsertSize()) + " " + InputParameter.getHumanRefFasta();
        operator = Runtime.getRuntime();
        process = operator.exec(query);
        try {
            process.waitFor();
            is = process.getInputStream();
            isr = new InputStreamReader(is);
            br = new BufferedReader(isr);
            String temp = br.readLine();
            while (temp != null) {
                downstreamSeq += temp;
                temp = br.readLine();
            }
            br.close();
            isr.close();
            is.close();
        } finally {
            process.destroy();
        }

        String delInfo = di.getChr() + "_" + di.getPos1() + "_" + di.getPos2();
        String outputUpstream = delInfo + "_up.fa";
        String outputDownstream = delInfo + "_down.fa";

        fw = new FileWriter(queryDir + outputUpstream);
        bw = new BufferedWriter(fw);
        bw.write(">" + delInfo + "\n");
        bw.write(upstreamSeq + "\n");
        bw.flush();
        bw.close();
        fw.close();

        fw = new FileWriter(queryDir + outputDownstream);
        bw = new BufferedWriter(fw);
        bw.write(">" + delInfo + "\n");
        bw.write(downstreamSeq + "\n");
        bw.flush();
        bw.close();
        fw.close();

        upstreamSeq = "";
        downstreamSeq = "";
        int searchSpace = 2 * InputParameter.getInsertSize();

        int estimatedHomologyPos = di.getPos2() - (InputParameter.getInsertSize() - InputParameter.getReadLength());
        int searchSpaceUp = estimatedHomologyPos - searchSpace;
        if (di.getPos1() > searchSpaceUp) {
            searchSpaceUp = di.getPos1() + 1;
        }
        int searchSpaceDown = estimatedHomologyPos + searchSpace;

        query = InputParameter.getFastahack() + " -r " + di.getChr() + ":" + searchSpaceUp + ".." + searchSpaceDown + " " + InputParameter.getHumanRefFasta();
        operator = Runtime.getRuntime();
        process = operator.exec(query);
        try {
            process.waitFor();
            is = process.getInputStream();
            isr = new InputStreamReader(is);
            br = new BufferedReader(isr);
            String temp = br.readLine();
            while (temp != null) {
                upstreamSeq += temp;
                temp = br.readLine();
            }
            br.close();
            isr.close();
            is.close();
        } finally {
            process.destroy();
        }

        fw = new FileWriter(dbDir + outputUpstream);
        bw = new BufferedWriter(fw);
        bw.write(">" + di.getChr() + "_" + searchSpaceUp + "_" + searchSpaceDown + "\n");
        bw.write(upstreamSeq + "\n");
        bw.flush();
        bw.close();
        fw.close();

        estimatedHomologyPos = di.getPos1() + (InputParameter.getInsertSize() - InputParameter.getReadLength());
        searchSpaceUp = estimatedHomologyPos - searchSpace;
        searchSpaceDown = estimatedHomologyPos + searchSpace;
        if (di.getPos2() < searchSpaceDown) {
            searchSpaceDown = di.getPos2() - 1;
        }

        query = InputParameter.getFastahack() + " -r " + di.getChr() + ":" + searchSpaceUp + ".." + searchSpaceDown + " " + InputParameter.getHumanRefFasta();
        operator = Runtime.getRuntime();
        process = operator.exec(query);
        try {
            process.waitFor();
            is = process.getInputStream();
            isr = new InputStreamReader(is);
            br = new BufferedReader(isr);
            String temp = br.readLine();
            while (temp != null) {
                downstreamSeq += temp;
                temp = br.readLine();
            }
            br.close();
            isr.close();
            is.close();
        } finally {
            process.destroy();
        }

        fw = new FileWriter(dbDir + outputDownstream);
        bw = new BufferedWriter(fw);
        bw.write(">" + di.getChr() + "_" + searchSpaceUp + "_" + searchSpaceDown + "\n");
        bw.write(downstreamSeq + "\n");
        bw.flush();
        bw.close();
        fw.close();
    }

    public boolean isHomology(DeletionInfo di, String queryDir, String dbDir, String pslDir) throws IOException, InterruptedException {
        boolean isRepeat = false;
        String query;
        Runtime operator;
        Process process;
        InputStream is;
        InputStreamReader isr;
        FileReader fr;
        BufferedReader br;
        FileWriter fw;
        BufferedWriter bw;
        String[] s;

        String delInfo = di.getChr() + "_" + di.getPos1() + "_" + di.getPos2();

        try {
            fr = new FileReader(pslDir + delInfo + "_up.psl");
        } catch (FileNotFoundException ex) {
            query = InputParameter.getBlat() + " -t=dna -q=dna " + dbDir + delInfo + "_up.fa " + queryDir + delInfo + "_up.fa " + pslDir + delInfo + "_up.psl";
            operator = Runtime.getRuntime();
            process = operator.exec(query);
            try {
                process.waitFor();
            } finally {
                process.destroy();
            }
            fr = new FileReader(pslDir + delInfo + "_up.psl");
        }
        br = new BufferedReader(fr);
        boolean isHeader = true;
        String temp = br.readLine();
        while (temp != null && !isRepeat) {
            if (temp.indexOf("---------------") != -1) {
                isHeader = false;
                temp = br.readLine(); // candidates start
            }
            if (!isHeader) {
                if (temp != null) {
                    s = temp.split("\t");
                    int matchLen = Integer.parseInt(s[0]);
                    String strand = s[8];
                    if ((matchLen != InputParameter.getInsertSize())) {
                        if ((matchLen >= InputParameter.getReadLength() * 0.9) && strand.contains("+")) {    // matched read > readLength*0.9 bp, + strand
                            String[] block_size = s[18].split(",");
                            for (int i = 0; i < block_size.length; i++) {
                                if (Integer.parseInt(block_size[i]) >= InputParameter.getReadLength() * 0.9) {
                                    isRepeat = true;
                                    break;
                                }
                            }
                        }
                    }
                }
            }
            temp = br.readLine();
        }
        br.close();
        fr.close();

        if (!isRepeat) {
            try {
                fr = new FileReader(pslDir + delInfo + "_down.psl");
            } catch (FileNotFoundException ex) {
                query = InputParameter.getBlat() + " -t=dna -q=dna " + dbDir + delInfo + "_down.fa " + queryDir + delInfo + "_down.fa " + pslDir + delInfo + "_down.psl";
                operator = Runtime.getRuntime();
                process = operator.exec(query);
                try {
                    process.waitFor();
                } finally {
                    process.destroy();
                }
                fr = new FileReader(pslDir + delInfo + "_down.psl");
            }
            br = new BufferedReader(fr);
            isHeader = true;
            temp = br.readLine();
            while (temp != null && !isRepeat) {
                if (temp.indexOf("---------------") != -1) {
                    isHeader = false;
                    temp = br.readLine(); // candidates start
                }
                if (!isHeader) {
                    if (temp != null) {
                        s = temp.split("\t");
                        int matchLen = Integer.parseInt(s[0]);
                        String strand = s[8];
                        if ((matchLen != InputParameter.getInsertSize())) {
                            if ((matchLen >= InputParameter.getReadLength() * 0.9) && strand.contains("+")) {    // matched read > readLength*0.9 bp, + strand                               
                                String[] block_size = s[18].split(",");
                                for (int i = 0; i < block_size.length; i++) {
                                    if (Integer.parseInt(block_size[i]) >= InputParameter.getReadLength() * 0.9) {
                                        isRepeat = true;
                                        break;
                                    }
                                }
                            }
                        }
                    }
                }
                temp = br.readLine();
            }
            br.close();
            fr.close();
        }
        return isRepeat;
    }

    public void reportEstimatedModel(BufferedWriter bw) throws IOException {
        DecimalFormat df = new DecimalFormat("#.###");
//            FileWriter fw = new FileWriter(InputParameter.getSomaticCallResult());       
        bw.write("lambda_1\tlambda_2\tmu_1\tmu_2\tgermLogLikelihood\tmixedLogLikelihood\n");
        bw.write(mm.getWeight(0) + "\t");
        bw.write(mm.getWeight(1) + "\t");
        bw.write(mm.getMean(0) + "\t");
        bw.write(mm.getMean(1) + "\t");
        bw.write(germLogLikelihood + "\t");
        bw.write(mixedLogLikelihood + "\n");
    }

    public void callFinalVariants(BufferedWriter bw) throws IOException {
        if (mixedLogLikelihood > germLogLikelihood) {
            DecimalFormat df = new DecimalFormat("#.###");
            bw.write("Chr\tStart\tEnd\tSize\tNumOfAnomalousReads\tavgDepthOfDeletion\tavgDepthOfWindow\tcellularity\tSomaticScore\n");

            int chrNo = deletionList.size();
            for (int i = 1; i < chrNo; i++) {
                ArrayList chrDel = (ArrayList) deletionList.get(i);
                int chrDelSize = chrDel.size();
                for (int j = 0; j < chrDelSize; j++) {
                    DeletionInfo di = (DeletionInfo) chrDel.get(j);
                    if (di.isSomatic()) {
                        bw.write(di.getChr() + "\t");
                        bw.write(di.getPos1() + "\t");
                        bw.write(di.getPos2() + "\t");
                        bw.write(di.getSize() + "\t");
                        bw.write(di.getNum_read() + "\t");
                        bw.write(df.format(di.getAvgDepthDeletion()) + "\t");
                        bw.write(df.format(di.getAvgDepthWindow()) + "\t");
                        bw.write(df.format(di.getCellularity()) + "\t");
                        bw.write(df.format(di.getMixedSomaticProbability()) + "\n");
                    }
                }
            }
        }
    }

    public void test() {
        BinomialDistribution bn = new BinomialDistribution(120, 0.5);
        NormalDistribution n = new NormalDistribution(60, Math.sqrt(30));

        PVector param_norm = new PVector(2);
        param_norm.array[0] = 60;   // mean
        param_norm.array[1] = 30;   // var

        PVector param_bino = new PVector(2);
        param_bino.array[0] = 120;   // n
        param_bino.array[1] = 0.5;   // p

        ExponentialFamily normal = new UnivariateGaussian();
        PVector p = new PVector(1);
        p.array[0] = 32;

        System.out.println(bn.cumulativeProbability(32));
        System.out.println(n.cumulativeProbability(32));
        System.out.println(normal.density(p, param_norm));

        p.array[0] = 27;
        System.out.println(bn.cumulativeProbability(27));
        System.out.println(n.cumulativeProbability(27));
        System.out.println(n.density(27));
        System.out.println(normal.density(p, param_norm));

        p.array[0] = 60;
        System.out.println(bn.cumulativeProbability(60));
        System.out.println(n.cumulativeProbability(60));
        System.out.println(n.density(60));
        System.out.println(normal.density(p, param_norm));



    }

    public void EMtest(double thres) throws FileNotFoundException, IOException {
        String input = "/data2/kimjh/WGS/breakdancer/S49/Cellularity_del_list_S49_raw_BD.txt";
//        String input = "/data/kimjh/WGS/sdd/simulation_2p/breakdancer/Cellularity_del_list_2p_raw_BD.txt";

        FileReader fr = new FileReader(input);
        BufferedReader br = new BufferedReader(fr);
        String temp = br.readLine();    // header
        temp = br.readLine();
        String[] s;
        ArrayList celList = new ArrayList();
        while (temp != null) {
            s = temp.split("\t");
            celList.add(s[9]);
            temp = br.readLine();
        }

        double[] cellularity = new double[celList.size()];
        for (int i = 0; i < cellularity.length; i++) {
            cellularity[i] = Double.parseDouble((String) celList.get(i));
        }

        Arrays.sort(cellularity);
        double[] cellularityOverZero = getSubArray(cellularity, 0, true);
        double[] cellularityOverZeroBelowThres = getSubArray(cellularityOverZero, thres, false);

//        double [][] data = new double[cellularityOverZeroBelowThres.length][2];
//        for(int i=0;i<cellularityOverZeroBelowThres.length;i++){
//            data[i][0] = cellularityOverZeroBelowThres[i];
//            data[i][1] = Math.random();
//        }
//        double start = 0;
//        double end = thres;
//        double binSize = 0.05;
//        double [][] data = getHistogram(cellularityOverZeroBelowThres, start, end, binSize);

        PVector[] points = new PVector[cellularityOverZeroBelowThres.length];
        for (int i = 0; i < points.length; i++) {
            points[i] = new PVector(1);
            points[i].array[0] = cellularityOverZeroBelowThres[i];
        }

        int numComponents = 2;
        MixtureModel mmc;
        int iter = 0;
        do {
            iter++;
            Vector<PVector>[] clusters = KMeans.run(points, numComponents);
            mmc = ExpectationMaximization.initialize(clusters);
            mmc = ExpectationMaximization.run(points, mmc);
        } while (mmc.getMeanDiff() < 0.05 && iter < 10);    // consider meanDiff with MaxIter=10

        for (int i = 0; i < mmc.size; i++) {
            System.out.println("\n" + mmc.getWeight(i));
            System.out.println(mmc.getMean(i));
            System.out.println(mmc.getStd(i) + "\n");
        }

        MixtureModel g = new MixtureModel(1);
        ExponentialFamily ef = new UnivariateGaussian();
        g.setEF(ef);
        double[] weight = new double[1];
        weight[0] = 1;
        g.setWeight(weight);
        PVector gParam = new PVector(2);
        gParam.array[0] = 0.5;
        gParam.array[1] = 0.5;
        Parameter[] p = new Parameter[1];
        p[0] = gParam;
        g.setParam(p);
        g = ExpectationMaximization.runWithFixedMean(points, g, 0.5);

        for (int i = 0; i < g.size; i++) {
            System.out.println("\n" + g.getWeight(i));
            System.out.println(g.getMean(i));
            System.out.println(g.getStd(i));
        }

//        int numComponents = 2;
//        int maxIterations = 1000;
//        double epsilon = 1e-08;
//        MultivariateNormalMixtureExpectationMaximization em = new MultivariateNormalMixtureExpectationMaximization(data);
//        MixtureMultivariateNormalDistribution mnd = em.estimate(data, numComponents);
////        List initialDist;
////        MultivariateNormalDistribution d1 = new MultivariateNormalDistribution
////        Pair p1 = new Pair(0.5,);
////        initialDist.add();
////        MixtureMultivariateNormalDistribution mnd = new MixtureMultivariateNormalDistribution()
//        em.fit(mnd, maxIterations, epsilon);
//        List distList = mnd.getComponents();
//        for(int i=0;i<distList.size();i++){
//            Pair p = (Pair) distList.get(i);
//            System.out.println(p.getKey());
//            MultivariateNormalDistribution mn = (MultivariateNormalDistribution) p.getValue();
//            double [] means = mn.getMeans();
//            for(int j=0;j<means.length;j++){
//                System.out.println("mean = "+means[j]);
//            }
//            double [] sds = mn.getStandardDeviations();
//            for(int j=0;j<sds.length;j++){
//                System.out.println("sd = "+sds[j]);
//            }
//        }
    }

//    public double getGermlineProbability(DeletionInfo di) {
//        double pval = 0;
//        double N = di.getAvgDepthWindow();
//        double p = 0.5;
//
//        if (N > 30) {
//            NormalDistribution n = new NormalDistribution(N * p, Math.sqrt(N * p * (1 - p)));
//        } else {
//            BinomialDistribution bn = new BinomialDistribution((int) N, p);
//            double observed = di.getDeletionDepth();
//        }
//
//        return pval;
//    }
    public double[] getSubArray(double[] array, double thres, boolean direction) {  // false : <= thres, true : > thres
        int idx = 0;
        double[] subArray;
        for (int i = 0; i < array.length; i++) {
            if (array[i] > thres) {
                break;
            }
            idx++;
        }
        if (direction == false) {
            subArray = Arrays.copyOfRange(array, 0, idx);
        } else {
            subArray = Arrays.copyOfRange(array, idx, array.length);
        }
        return subArray;
    }

    public double[][] getHistogram(double[] sortedData, double start, double end, double binSize) {
        int binNum = (int) ((start + end) / binSize);
        double[][] hist = new double[binNum][2];
        int total = 0;
        for (int i = 0; i < binNum; i++) {
            double binStart = binSize * i;
            double binEnd = binSize * (i + 1);
            int cnt = 0;
            for (int j = 0; j < sortedData.length; j++) {
                if (sortedData[j] > binStart && sortedData[j] <= binEnd) {
                    cnt++;
                }
                if (sortedData[j] > binEnd) {
                    break;
                }
            }
            hist[i][0] = binEnd;
            hist[i][1] = cnt;
            total += cnt;
        }
        return hist;
    }

    public Integer[] getOverlappedArea(int src1, int src2, int tgt1, int tgt2) {
        int[] totalPos = {src1, src2, tgt1, tgt2};
        Arrays.sort(totalPos);
        Integer[] overlappedArea = new Integer[2];
        overlappedArea[0] = totalPos[1];
        overlappedArea[1] = totalPos[2];
        return overlappedArea;
    }
}
TOP

Related Classes of solodel.SoloDelCaller

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.