Package org.broadinstitute.gatk.engine.datasources.reads

Examples of org.broadinstitute.gatk.engine.datasources.reads.IntervalSharder


        else
            intervalSortedSet = GenomeLocSortedSet.createSetFromSequenceDictionary(refReader.getSequenceDictionary());

        logger.info(String.format("PROGRESS: Calculating mean and variance: Contig\tRegion.Start\tRegion.Stop\tSize"));       

        IntervalSharder sharder = IntervalSharder.shardOverIntervals(dataSource,intervalSortedSet,IntervalMergingRule.ALL);
        while(sharder.hasNext()) {
            FilePointer filePointer = sharder.next();

            // Size of the file pointer.
            final long size = filePointer.size();           

            BigInteger bigSize = BigInteger.valueOf(size);
            sumOfSquares = sumOfSquares.add(bigSize.pow(2));
            sum = sum.add(bigSize);
            numberOfShards++;

            if(numberOfShards % 1000 == 0) {
                GenomeLoc boundingRegion = getBoundingRegion(filePointer,genomeLocParser);
                logger.info(String.format("PROGRESS: Calculating mean and variance: %s\t%d\t%d\t%d",boundingRegion.getContig(),boundingRegion.getStart(),boundingRegion.getStop(),size));
            }

        }

        // Print out the stddev: (sum(x^2) - (1/N)*sum(x)^2)/N
        long mean = sum.divide(BigInteger.valueOf(numberOfShards)).longValue();
        long stddev = (long)(Math.sqrt(sumOfSquares.subtract(sum.pow(2).divide(BigInteger.valueOf(numberOfShards))).divide(BigInteger.valueOf(numberOfShards)).doubleValue()));
        logger.info(String.format("Number of shards: %d; mean uncompressed size = %d; stddev uncompressed size  = %d%n",numberOfShards,mean,stddev));

        // Crank through the shards again, this time reporting on the shards significantly larger than the mean.
        long threshold = mean + stddev*5;
        logger.warn(String.format("PROGRESS: Searching for large shards: Contig\tRegion.Start\tRegion.Stop\tSize"));
        out.printf("Contig\tRegion.Start\tRegion.Stop\tSize%n");

        sharder =  IntervalSharder.shardOverIntervals(dataSource,intervalSortedSet,IntervalMergingRule.ALL);
        while(sharder.hasNext()) {
            FilePointer filePointer = sharder.next();

            // Bounding region.
            GenomeLoc boundingRegion = getBoundingRegion(filePointer,genomeLocParser);

            // Size of the file pointer.
View Full Code Here

TOP

Related Classes of org.broadinstitute.gatk.engine.datasources.reads.IntervalSharder

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.