Package solodel

Source Code of solodel.bak

/*
* To change this template, choose Tools | Templates
* and open the template in the editor.
*/
package solodel;

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Iterator;
import java.util.List;
import java.util.Vector;
import net.sf.picard.util.Interval;
import net.sf.picard.util.IntervalList;
import net.sf.picard.util.SamLocusIterator;
import net.sf.samtools.SAMFileReader;
import org.apache.commons.math3.distribution.BinomialDistribution;
import org.apache.commons.math3.distribution.NormalDistribution;

/**
*
* @author kimjh
*/
public class bak {
    public void depth_test() throws FileNotFoundException, IOException{
        FileReader fr = new FileReader("/data/kimjh/WGS/simulation_test_chr1/seperate_10p/gatk/depth.txt");
        BufferedReader br = new BufferedReader(fr);
        String temp = br.readLine();
        String[] t;

        String bam_file = "/data/kimjh/WGS/simulation_test_chr1/seperate_10p/chr1_sim_70x_sorted_rmdup.bam";
        String bam_index_file = "/data/kimjh/WGS/simulation_test_chr1/seperate_10p/chr1_sim_70x_sorted_rmdup.bam.bai";
        File bam = new File(bam_file);
        File bam_index = new File(bam_index_file);
        SAMFileReader samReader = new SAMFileReader(bam, bam_index);
        String chr_id = "chr1";
        int start = 1;
        int end = 20;
//        int end = 247199724;
        Interval interval = new Interval(chr_id, start, end);
        IntervalList il = new IntervalList(samReader.getFileHeader());
        il.add(interval);
       
        SamLocusIterator sli = new SamLocusIterator(samReader, il, true);

        FileWriter fw = new FileWriter("/data/kimjh/WGS/simulation_test_chr1/seperate_10p/gatk/depth_diff.txt");
        BufferedWriter bw = new BufferedWriter(fw);

        for (Iterator<SamLocusIterator.LocusInfo> iter = sli.iterator(); iter.hasNext();) {
            SamLocusIterator.LocusInfo locusInfo = iter.next();
            List recordList = locusInfo.getRecordAndPositions();
            Iterator it = recordList.iterator();
            String readBase = "";
            String baseQuality = "";
            while(it.hasNext()){
                SamLocusIterator.RecordAndOffset record = (SamLocusIterator.RecordAndOffset) it.next();
                if(baseQuality.equals("")){
//                    readBase += Byte.toString(record.getReadBase());
//                    baseQuality += record.getBaseQuality();
                    readBase += (char)(record.getReadBase()&0xff);
                    baseQuality += (int)((record.getBaseQuality()));
                }else{
                    readBase += ","+(char)(record.getReadBase()&0xff);
                    baseQuality += ","+(int)((record.getBaseQuality()));
                }
            }

            System.out.println("chr1\t"+locusInfo.getPosition()+"\t"+locusInfo.getRecordAndPositions().size()+"\t"+readBase+"\t"+baseQuality+"\n");
//            bw.write("chr1\t"+locusInfo.getPosition()+"\t"+locusInfo.getRecordAndPositions().size()+"\n");
            int pos = locusInfo.getPosition();
            int size = locusInfo.getRecordAndPositions().size();

            if (temp != null) {
                t = temp.split("\t");
                int sam_pos = Integer.parseInt(t[1]);
                int sam_size = Integer.parseInt(t[2]);

                while (sam_pos <= pos && temp != null) {
                    t = temp.split("\t");
                    sam_pos = Integer.parseInt(t[1]);
                    sam_size = Integer.parseInt(t[2]);
                    if (sam_pos == pos) {
                        if (sam_size != size) {
                            if(pos%1000000==0){
                                System.out.println(pos);
                            }                           
                            bw.write(pos + "\t" + size + "\t" + sam_size + "\n");
                        }
                        temp = br.readLine();
                    } else if(sam_pos>pos){                       
                    }else{
                        temp = br.readLine();
                    }
                }
            }
        }
        br.close();
        fr.close();

        bw.flush();
        bw.close();
        fw.close();

        sli.close();
        samReader.close();
    }
   
    public void EMtest(double thres) throws FileNotFoundException, IOException {
        String input = "/data/kimjh/WGS/breakdancer/S49/Cellularity_del_list_S49_raw_BD.txt";
//        String input = "/data/kimjh/WGS/sdd/simulation_2p/breakdancer/Cellularity_del_list_2p_raw_BD.txt";

        FileReader fr = new FileReader(input);
        BufferedReader br = new BufferedReader(fr);
        String temp = br.readLine();    // header
        temp = br.readLine();
        String[] s;
        ArrayList celList = new ArrayList();
        while (temp != null) {
            s = temp.split("\t");
            celList.add(s[9]);
            temp = br.readLine();
        }

        double[] cellularity = new double[celList.size()];
        for (int i = 0; i < cellularity.length; i++) {
            cellularity[i] = Double.parseDouble((String) celList.get(i));
        }

        Arrays.sort(cellularity);
        double[] cellularityOverZero = getSubArray(cellularity, 0, true);
        double[] cellularityOverZeroBelowThres = getSubArray(cellularityOverZero, thres, false);

//        double [][] data = new double[cellularityOverZeroBelowThres.length][2];
//        for(int i=0;i<cellularityOverZeroBelowThres.length;i++){
//            data[i][0] = cellularityOverZeroBelowThres[i];
//            data[i][1] = Math.random();
//        }
//        double start = 0;
//        double end = thres;
//        double binSize = 0.05;
//        double [][] data = getHistogram(cellularityOverZeroBelowThres, start, end, binSize);

        PVector[] points = new PVector[cellularityOverZeroBelowThres.length];
        for (int i = 0; i < points.length; i++) {
            points[i] = new PVector(1);
            points[i].array[0] = cellularityOverZeroBelowThres[i];
        }

        int numComponents = 2;
        MixtureModel mmc;
        int iter = 0;
        do {
            iter++;
            Vector<PVector>[] clusters = KMeans.run(points, numComponents);
            mmc = ExpectationMaximization.initialize(clusters);
            mmc = ExpectationMaximization.run(points, mmc);
        } while (mmc.getMeanDiff() < 0.05 && iter < 10);    // consider meanDiff with MaxIter=10

        for (int i = 0; i < mmc.size; i++) {
            System.out.println("\n" + mmc.getWeight(i));
            System.out.println(mmc.getMean(i));
            System.out.println(mmc.getStd(i) + "\n");
        }

        MixtureModel g = new MixtureModel(1);
        ExponentialFamily ef = new UnivariateGaussian();
        g.setEF(ef);
        double[] weight = new double[1];
        weight[0] = 1;
        g.setWeight(weight);
        PVector gParam = new PVector(2);
        gParam.array[0] = 0.5;
        gParam.array[1] = 0.5;
        Parameter[] p = new Parameter[1];
        p[0] = gParam;
        g.setParam(p);
        g = ExpectationMaximization.runWithFixedMean(points, g, 0.5);

        for (int i = 0; i < g.size; i++) {
            System.out.println("\n" + g.getWeight(i));
            System.out.println(g.getMean(i));
            System.out.println(g.getStd(i));
        }
    }
   
    public double[] getSubArray(double[] array, double thres, boolean direction) {  // false : <= thres, true : > thres
        int idx = 0;
        double[] subArray;
        for (int i = 0; i < array.length; i++) {
            if (array[i] > thres) {
                break;
            }
            idx++;
        }
        if (direction == false) {
            subArray = Arrays.copyOfRange(array, 0, idx);
        } else {
            subArray = Arrays.copyOfRange(array, idx, array.length);
        }
        return subArray;
    }
   
    public double getGermlineProbability(DeletionInfo di) {
        double pval = 0;
        double N = di.getAvgDepthWindow();
        double p = 0.5;

        if (N > 30) {
            NormalDistribution n = new NormalDistribution(N * p, Math.sqrt(N * p * (1 - p)));
        } else {
            BinomialDistribution bn = new BinomialDistribution((int) N, p);
            double observed = di.getDeletionDepth();
        }

        return pval;
    }
   
    public void importDeletionInfo(String breakdancerResult, String inputBam, String inputBamIndex, int insertSize) throws FileNotFoundException, IOException {
        ArrayList deletionList = new ArrayList();

        FileReader fr = new FileReader(breakdancerResult);
//        FileReader fr = new FileReader(InputParameter.getBreakdancerOutput());
        BufferedReader br = new BufferedReader(fr);       
        String temp = br.readLine();
        if (temp.indexOf("#") != -1) {  // header
            temp = br.readLine();
        }
        String[] s;

        File bamFile = new File(inputBam);
//        File bamFile = new File(InputParameter.getBamFile());
        File bamIndexFile = new File(inputBamIndex);
//        File bamIndexFile = new File(InputParameter.getBamIndexFile());
        SAMFileReader samReader = new SAMFileReader(bamFile, bamIndexFile);
        samReader.setValidationStringency(SAMFileReader.ValidationStringency.LENIENT);

        int i = 0;
        while (temp != null) {
            if (!temp.equals("")) {
                if (temp.contains("DEL")) {
                    DeletionInfo di = new DeletionInfo(temp);
                    if (di.getSize() > insertSize) {
//                        di.setDeletionInfo(samReader);
                        deletionList.add(di);
                    }
                }
            }
            temp = br.readLine();
            i++;
            if (i % 100 == 0) {
                System.out.println(i + " th deletion is imported ...");
            }
        }

        samReader.close();

        br.close();
        fr.close();
    }
   
        public void homologyFilter(String id, ArrayList deletionList) throws IOException, InterruptedException {
        String queryChr;
        String queryLociStart;
        String queryLociEnd;
        String query;
        String upstreamSeq;
        String downstreamSeq;
        String temp;
        Runtime operator;
        Process process;
        InputStream is;
        InputStreamReader isr;
        BufferedReader br;
        FileReader fr;
        int homologyCnt = 0;

//        String fastaDir = System.getProperty("user.dir") + "/fasta/";
//        String pslDir = System.getProperty("user.dir") + "/psl/";
        String idDir = System.getProperty("user.dir") + "/" + id;
        String fastaDir = idDir + "/fasta/";
        String pslDir = idDir + "/psl/";

        query = "mkdir " + idDir;
        operator = Runtime.getRuntime();
        process = operator.exec(query);
        try {
            process.waitFor();
        } finally {
            process.destroy();
        }

        query = "mkdir " + fastaDir;
        operator = Runtime.getRuntime();
        process = operator.exec(query);
        try {
            process.waitFor();
        } finally {
            process.destroy();
        }

        query = "mkdir " + pslDir;
        operator = Runtime.getRuntime();
        process = operator.exec(query);
        try {
            process.waitFor();
        } finally {
            process.destroy();
        }

        int chrNo = deletionList.size();
        for (int i = 1; i < chrNo; i++) {
            ArrayList chrDel = (ArrayList) deletionList.get(i);
            int chrDelSize = chrDel.size();
            for (int j = 0; j < chrDelSize; j++) {
                DeletionInfo di = (DeletionInfo) chrDel.get(j);
                makeFastaForBlat(di, fastaDir);
                boolean isHomology = isHomology(di, fastaDir, pslDir);
                di.setHomology(isHomology);
                if (isHomology) {
                    homologyCnt++;
                }

                if ((j + 1) % 100 == 0) {
                    System.out.println((i + 1) + " th deletion homology is searched ...");
                }
            }
        }

//        System.out.println("Num of passed deletions : " + (deletionList.size() - homologyCnt));
        System.out.println("Num of filtered deletions by homology search : " + homologyCnt);

    }

    public void makeFastaForBlat(DeletionInfo di, String fastaDir) throws IOException, InterruptedException {
        String query;
        Runtime operator;
        Process process;
        InputStream is;
        InputStreamReader isr;
        BufferedReader br;
        FileWriter fw;
        BufferedWriter bw;

        String upstreamSeq = "";
        String downstreamSeq = "";

        query = InputParameter.getFastahack() + " -r " + di.getChr() + ":" + (di.getPos1() - InputParameter.getInsertSize()) + ".." + (di.getPos1() - 1) + " " + InputParameter.getHumanRefFasta();
        operator = Runtime.getRuntime();
        process = operator.exec(query);
        try {
            process.waitFor();
            is = process.getInputStream();
            isr = new InputStreamReader(is);
            br = new BufferedReader(isr);
            String temp = br.readLine();
            while (temp != null) {
                upstreamSeq += temp;
                temp = br.readLine();
            }
            br.close();
            isr.close();
            is.close();
        } finally {
            process.destroy();
        }

        query = InputParameter.getFastahack() + " -r " + di.getChr() + ":" + (di.getPos2() + 1) + ".." + (di.getPos2() + InputParameter.getInsertSize()) + " " + InputParameter.getHumanRefFasta();
        operator = Runtime.getRuntime();
        process = operator.exec(query);
        try {
            process.waitFor();
            is = process.getInputStream();
            isr = new InputStreamReader(is);
            br = new BufferedReader(isr);
            String temp = br.readLine();
            while (temp != null) {
                downstreamSeq += temp;
                temp = br.readLine();
            }
            br.close();
            isr.close();
            is.close();
        } finally {
            process.destroy();
        }

        String delInfo = di.getChr() + "_" + di.getPos1() + "_" + di.getPos2();
        String outputUpstream = delInfo + "_up.fa";
        String outputDownstream = delInfo + "_down.fa";

        fw = new FileWriter(fastaDir + outputUpstream);
        bw = new BufferedWriter(fw);
        bw.write(">" + delInfo + "\n");
        bw.write(upstreamSeq + "\n");
        bw.flush();
        bw.close();
        fw.close();

        fw = new FileWriter(fastaDir + outputDownstream);
        bw = new BufferedWriter(fw);
        bw.write(">" + delInfo + "\n");
        bw.write(downstreamSeq + "\n");
        bw.flush();
        bw.close();
        fw.close();
    }

    public boolean isHomology(DeletionInfo di, String fastaDir, String pslDir) throws IOException, InterruptedException {
        boolean isRepeat = false;
        String query;
        Runtime operator;
        Process process;
        InputStream is;
        InputStreamReader isr;
        FileReader fr;
        BufferedReader br;
        FileWriter fw;
        BufferedWriter bw;
        String[] s;

        int searchSpace = 2 * InputParameter.getInsertSize();

        String delInfo = di.getChr() + "_" + di.getPos1() + "_" + di.getPos2();

        try {
            fr = new FileReader(pslDir + delInfo + "_up.psl");
        } catch (FileNotFoundException ex) {
            query = InputParameter.getBlat() + " -t=dna -q=dna " + InputParameter.getHumanRef2bit() + " " + fastaDir + delInfo + "_up.fa " + pslDir + delInfo + "_up.psl";
            operator = Runtime.getRuntime();
            process = operator.exec(query);
            try {
                process.waitFor();
            } finally {
                process.destroy();
            }
            fr = new FileReader(pslDir + delInfo + "_up.psl");
        }
        br = new BufferedReader(fr);
        boolean isHeader = true;
        String temp = br.readLine();
        while (temp != null && !isRepeat) {
            if (temp.indexOf("---------------") != -1) {
                isHeader = false;
                temp = br.readLine(); // candidates start
            }
            if (!isHeader) {
                if (temp != null) {
                    s = temp.split("\t");
                    String candidate_chr = s[13];
                    if (candidate_chr.equals(di.getChr())) {
                        int matchLen = Integer.parseInt(s[0]);
                        String strand = s[8];
                        if ((matchLen != InputParameter.getInsertSize())) {
                            if ((matchLen >= InputParameter.getReadLength() * 0.9) && strand.contains("+")) {    // matched read > readLength*0.9 bp, + strand
                                int estimatedHomologyPos = di.getPos2() - (InputParameter.getInsertSize() - InputParameter.getReadLength());
                                int searchSpaceUp = estimatedHomologyPos - searchSpace;
                                int searchSpaceDown = estimatedHomologyPos + searchSpace;
//                                int searchSpaceDown = di.getPos2();
                                String[] block_size = s[18].split(",");
                                String[] block_start = s[20].split(",");
                                for (int i = 0; i < block_size.length; i++) {
                                    if (Integer.parseInt(block_size[i]) >= InputParameter.getReadLength() * 0.9) {
                                        int candidate = Integer.parseInt(block_start[i]);
                                        if (((searchSpaceUp <= candidate) && (searchSpaceDown >= candidate))) {
                                            isRepeat = true;
                                            break;
                                        }
                                    }
                                }
                            }
                        }
                    }
                }
            }
            temp = br.readLine();
        }
        br.close();
        fr.close();

        if (!isRepeat) {
            try {
                fr = new FileReader(pslDir + delInfo + "_down.psl");
            } catch (FileNotFoundException ex) {
                query = InputParameter.getBlat() + " -t=dna -q=dna " + InputParameter.getHumanRef2bit() + " " + fastaDir + delInfo + "_down.fa " + pslDir + delInfo + "_down.psl";
                operator = Runtime.getRuntime();
                process = operator.exec(query);
                try {
                    process.waitFor();
                } finally {
                    process.destroy();
                }
                fr = new FileReader(pslDir + delInfo + "_down.psl");
            }
            br = new BufferedReader(fr);
            isHeader = true;
            temp = br.readLine();
            while (temp != null && !isRepeat) {
                if (temp.indexOf("---------------") != -1) {
                    isHeader = false;
                    temp = br.readLine(); // candidates start
                }
                if (!isHeader) {
                    if (temp != null) {
                        s = temp.split("\t");
                        String candidate_chr = s[13];
                        if (candidate_chr.equals(di.getChr())) {
                            int matchLen = Integer.parseInt(s[0]);
                            String strand = s[8];
                            if ((matchLen != InputParameter.getInsertSize())) {
                                if ((matchLen >= InputParameter.getReadLength() * 0.9) && strand.contains("+")) {    // matched read > readLength*0.9 bp, + strand
                                    int estimatedHomologyPos = di.getPos1() + (InputParameter.getInsertSize() - InputParameter.getReadLength());
                                    int searchSpaceUp = estimatedHomologyPos - searchSpace;
//                                    int searchSpaceUp = di.getPos1();
                                    int searchSpaceDown = estimatedHomologyPos + searchSpace;
                                    String[] block_size = s[18].split(",");
                                    String[] block_start = s[20].split(",");
                                    for (int i = 0; i < block_size.length; i++) {
                                        if (Integer.parseInt(block_size[i]) >= InputParameter.getReadLength() * 0.9) {
                                            int candidate = Integer.parseInt(block_start[i]);
                                            if (((searchSpaceUp <= candidate) && (searchSpaceDown >= candidate))) {
                                                isRepeat = true;
                                                break;
                                            }
                                        }
                                    }
                                }
                            }
                        }
                    }
                }
                temp = br.readLine();
            }
            br.close();
            fr.close();
        }
        return isRepeat;
    }
   
    // Estimate the parameters of mixture model (k=2)
    public void estimateMixtureModel(ArrayList deletionList, MixtureModel mm) {
        ArrayList sampleList = new ArrayList();
        int chrNo = deletionList.size();
        for (int i = 1; i < chrNo; i++) {
            ArrayList chrDel = (ArrayList) deletionList.get(i);
            int chrDelSize = chrDel.size();
            for (int j = 0; j < chrDelSize; j++) {
                DeletionInfo di = (DeletionInfo) chrDel.get(j);
                if (!di.isHomology() && !di.isGap()) {
                    if (di.getCellularity() < InputParameter.getCellularityThrd() && di.getCellularity() > 0) {
                        sampleList.add(di);
                    }
                }
            }
        }

//        double[] coefficientOfVariation = new double[sampleList.size()];
//        for (int i = 0; i < coefficientOfVariation.length; i++) {
//            coefficientOfVariation[i] = ((DeletionInfo) sampleList.get(i)).getCoefficientOfvariation();
//        }
//
//        Arrays.sort(coefficientOfVariation);
//        int cutoffIdx = (int) (coefficientOfVariation.length * (1 - InputParameter.getCoefficientOfvariationCutoff())) - 1;
//        double cutoff = coefficientOfVariation[cutoffIdx];
//
//        ArrayList filteredSampleList = new ArrayList();
//        for (int i = 0; i < sampleList.size(); i++) {
//            DeletionInfo di = (DeletionInfo) sampleList.get(i);
//            if (di.getCoefficientOfvariation() <= cutoff) {
//                filteredSampleList.add(di);
//            }
//        }
       
        double[] quartileArray = new double[sampleList.size()];
        for (int i = 0; i < quartileArray.length; i++) {
            quartileArray[i] = ((DeletionInfo) sampleList.get(i)).getCellularity();
        }

        Arrays.sort(quartileArray);
       
        int upperQuartileIdx = (int) (quartileArray.length * 0.75) - 1;
        int lowerQuartileIdx = (int) (quartileArray.length * 0.25) - 1;
        double upperQuartile = quartileArray[upperQuartileIdx];
        double lowerQuartile = quartileArray[lowerQuartileIdx];
        double interQuartileRange = upperQuartile - lowerQuartile;
        double upperCutoff = upperQuartile + interQuartileRange;
//        double lowerCutoff = lowerQuartile - interQuartileRange;
       
        System.out.println("UpperCutoff = "+upperCutoff);
//        System.out.println("LowerCutoff = "+lowerCutoff);
       
        int upperFilteredCnt = 0;
//        int lowerFilteredCnt = 0;

        ArrayList filteredSampleList = new ArrayList();
        for (int i = 0; i < sampleList.size(); i++) {
            DeletionInfo di = (DeletionInfo) sampleList.get(i);
//            if (di.getCellularity() > lowerCutoff && di.getCellularity() < upperCutoff) {
            if (di.getCellularity() < upperCutoff) {
                filteredSampleList.add(di);
            }else{
//                if(di.getCellularity()<=lowerCutoff){
//                    lowerFilteredCnt++;
//                }else{
                    upperFilteredCnt++;
//                }
               
            }
        }
       
//        System.out.println("\nTotal lower filtered count : "+lowerFilteredCnt+"\n");
        System.out.println("\nTotal upper filtered count : "+upperFilteredCnt+"\n");

//        double minCellularity = 0.5;
//        double maxCellularity = 0;
       
        double[] cellularity = new double[filteredSampleList.size()];
        for (int i = 0; i < cellularity.length; i++) {
            cellularity[i] = ((DeletionInfo) filteredSampleList.get(i)).getCellularity();
           
//            if(cellularity[i]<minCellularity) minCellularity = cellularity[i];
//            if(cellularity[i]>maxCellularity) maxCellularity = cellularity[i];
        }

        PVector[] points = new PVector[cellularity.length];       
        for (int i = 0; i < points.length; i++) {
            points[i] = new PVector(1);
            points[i].array[0] = cellularity[i];
        }

        int numComponents = 2;
        int iter = 0;
        do {
            iter++;
            System.out.println(iter);
            Vector<PVector>[] clusters = KMeans.run(points, numComponents);           
          
            mm = ExpectationMaximization.initialize(clusters);
            mm = ExpectationMaximization.run(points, mm);
        } while (mm.getMeanDiff() < InputParameter.getMixedMeanDiffThrd() && iter < 10);

        for (int i = 0; i < numComponents; i++) {
            System.out.println(mm.getWeight(i));
            System.out.println(mm.getMean(i));
            System.out.println(mm.getStd(i) + "\n");
        }
    }
}
TOP

Related Classes of solodel.bak

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.