Examples of org.broadinstitute.gatk.utils.sam.GATKSAMRecord

Package org.broadinstitute.gatk.utils.sam

Examples of org.broadinstitute.gatk.utils.sam.GATKSAMRecord

org.broadinstitute.gatk.utils.sam.GATKSAMRecord
@author ebanks, depristoGATKSAMRecord this class extends the samtools BAMRecord class (and SAMRecord) and caches important (and oft-accessed) data that's not already cached by the SAMRecord class IMPORTANT NOTE: Because ReadGroups are not set through the SAMRecord, if they are ever modified externally then one must also invoke the setReadGroup() method here to ensure that the cache is kept up-to-date. WARNING -- GATKSAMRecords cache several values (that are expensive to compute) that depending on the inferred insert size and alignment starts and stops of this read and its mate. Changing these values in any way will invalidate the cached value. However, we do not monitor those setter functions, so modifying a GATKSAMRecord in any way may result in stale cached values.

        }


        final int[] hapReadsNotInReference = new int[haplotypes.size()];


        for (int i = 0; i < readCount; i++) {
            final GATKSAMRecord r = as.readList().get(i);


            final int hapNumber = i % haplotypes.size();
            final int offset = i % (haplotypes.get(hapNumber).length() - readLength);
            Assert.assertEquals(r.getReadString(),haplotypes.get(hapNumber).getBaseString().substring(offset,offset+readLength));
            if (as.getReference().indexOf(r.getReadString()) == -1) {
                hapReadsNotInReference[hapNumber]++;
            }
        }


        Assert.assertEquals(hapReadsNotInReference[0],0);

View Full Code Here

            correctedReads.addAll(reads);
        }
        else {
            computeKmerCorrectionMap();
            for (final GATKSAMRecord read: reads) {
                final GATKSAMRecord correctedRead = correctRead(read);
                if (trimLowQualityBases)
                    correctedReads.add(ReadClipper.hardClipLowQualEnds(correctedRead, minTailQuality));
                else
                    correctedReads.add(correctedRead);
            }

View Full Code Here

                inputRead.setReadBases(correctedBases);
                inputRead.setBaseQualities(correctedQuals);
                return inputRead;
            }
            else {
                GATKSAMRecord correctedRead = new GATKSAMRecord(inputRead);


                //  do the actual correction
                // todo - do we need to clone anything else from read?
                correctedRead.setBaseQualities(inputRead.getBaseQualities());
                correctedRead.setIsStrandless(inputRead.isStrandless());
                correctedRead.setReadBases(inputRead.getReadBases());
                correctedRead.setReadString(inputRead.getReadString());
                correctedRead.setReadGroup(inputRead.getReadGroup());
                return correctedRead;
            }
        }
        else {
            readErrorCorrectionStats.numReadsUncorrected++;

View Full Code Here

            return;


        if ( isLeftOverhang(read.loc, splice.loc) ) {
            final int overhang = splice.loc.getStop() - read.loc.getStart() + 1;
            if ( overhangingBasesMismatch(read.read.getReadBases(), 0, splice.reference, splice.reference.length - overhang, overhang) ) {
                final GATKSAMRecord clippedRead = ReadClipper.hardClipByReadCoordinates(read.read, 0, overhang - 1);
                read.setRead(clippedRead);
            }
        }
        else if ( isRightOverhang(read.loc, splice.loc) ) {
            final int overhang = read.loc.getStop() - splice.loc.getStart() + 1;
            if ( overhangingBasesMismatch(read.read.getReadBases(), read.read.getReadLength() - overhang, splice.reference, 0, overhang) ) {
                final GATKSAMRecord clippedRead = ReadClipper.hardClipByReadCoordinates(read.read, read.read.getReadLength() - overhang, read.read.getReadLength() - 1);
                read.setRead(clippedRead);
            }
        }
    }

View Full Code Here


                if (DEBUG) {
                    System.out.format("Read Name:%s, aln start:%d aln stop:%d orig cigar:%s\n",p.getRead().getReadName(), p.getRead().getAlignmentStart(), p.getRead().getAlignmentEnd(), p.getRead().getCigarString());
                }


                GATKSAMRecord read = ReadClipper.hardClipAdaptorSequence(p.getRead());


                // if the read extends beyond the downstream (right) end of the reference window, clip it
                if ( mustClipDownstream(read, refWindowStop) )
                    read = ReadClipper.hardClipByReadCoordinates(read, refWindowStop - read.getSoftStart() + 1, read.getReadLength() - 1);


                // if the read extends beyond the upstream (left) end of the reference window, clip it
                if ( mustClipUpstream(read, refWindowStart) )
                    read = ReadClipper.hardClipByReferenceCoordinatesLeftTail(read, refWindowStart);


                if (read.isEmpty())
                    continue;


                // hard-clip low quality ends - this may introduce extra H elements in CIGAR string
                read = ReadClipper.hardClipLowQualEnds(read, (byte) BASE_QUAL_THRESHOLD );


                if (read.isEmpty())
                    continue;


                // get bases of candidate haplotypes that overlap with reads
                final long readStart = read.getSoftStart();
                final long readEnd = read.getSoftEnd();


                // see if we want to use soft clipped bases. Aligners may soft clip all bases at insertions because they don't match,
                // but they're actually consistent with the insertion!
                // Rule: if a read starts in interval [eventStart-eventLength,eventStart+1] and we are at an insertion, we'll use all soft clipped bases at the beginning.
                // Conversely, if a read ends at [eventStart,eventStart+eventLength] we'll use all soft clipped bases in the end of the read.
                final long eventStartPos = ref.getLocus().getStart();


                // compute total number of clipped bases (soft or hard clipped) and only use them if necessary
                final boolean softClips = useSoftClippedBases(read, eventStartPos, eventLength);
                final int numStartSoftClippedBases = softClips ? read.getAlignmentStart()- read.getSoftStart() : 0;
                final int numEndSoftClippedBases = softClips ? read.getSoftEnd()- read.getAlignmentEnd() : 0 ;
                final byte [] unclippedReadBases = read.getReadBases();
                final byte [] unclippedReadQuals = read.getBaseQualities();


                /**
                 * Compute genomic locations that candidate haplotypes will span.
                 * Read start and stop locations (variables readStart and readEnd) are the original unclipped positions from SAMRecord,
                 * adjusted by hard clips from Cigar string and by qual-based soft-clipping performed above.
                 * We will propose haplotypes that overlap the read with some padding.
                 * True read start = readStart + numStartSoftClippedBases - ReadUtils.getFirstInsertionOffset(read)
                 * Last term is because if a read starts with an insertion then these bases are not accounted for in readStart.
                 * trailingBases is a padding constant(=3) and we additionally add abs(eventLength) to both sides of read to be able to
                 * differentiate context between two haplotypes
                 */
                final int absEventLength = Math.abs(eventLength);
                long startLocationInRefForHaplotypes = Math.max(readStart + numStartSoftClippedBases - trailingBases - ReadUtils.getFirstInsertionOffset(read) - absEventLength, 0);
                long stopLocationInRefForHaplotypes = readEnd - numEndSoftClippedBases + trailingBases + ReadUtils.getLastInsertionOffset(read) + absEventLength;


                if (DEBUG)
                    System.out.format("orig Start:%d orig stop: %d\n", startLocationInRefForHaplotypes, stopLocationInRefForHaplotypes);


                int readLength = read.getReadLength()-numStartSoftClippedBases-numEndSoftClippedBases;


                if (startLocationInRefForHaplotypes < ref.getWindow().getStart()) {
                    startLocationInRefForHaplotypes = ref.getWindow().getStart();                                       // read starts before haplotype: read will have to be cut numStartSoftClippedBases += ref.getWindow().getStart() - startLocationInRefForHaplotypes;
                }
                else if (startLocationInRefForHaplotypes > ref.getWindow().getStop()) {
                    startLocationInRefForHaplotypes = ref.getWindow().getStop();                                        // read starts after haplotype: read will have to be clipped completely;
                }


                // candidate haplotype cannot go beyond reference context
                if (stopLocationInRefForHaplotypes > ref.getWindow().getStop()) {
                    stopLocationInRefForHaplotypes = ref.getWindow().getStop();                                         // check also if end of read will go beyond reference context
                }


                if (stopLocationInRefForHaplotypes <= startLocationInRefForHaplotypes + readLength) {
                    stopLocationInRefForHaplotypes = startLocationInRefForHaplotypes + readLength-1;                    // if there's an insertion in the read, the read stop position will be less than start + read legnth, but we want to compute likelihoods in the whole region that a read might overlap
                }


                // ok, we now figured out the total number of clipped bases on both ends.
                // Figure out where we want to place the haplotype to score read against


                if (DEBUG)
                    System.out.format("numStartSoftClippedBases: %d numEndSoftClippedBases: %d WinStart:%d WinStop:%d start: %d stop: %d readLength: %d\n",
                            numStartSoftClippedBases, numEndSoftClippedBases, ref.getWindow().getStart(), ref.getWindow().getStop(), startLocationInRefForHaplotypes, stopLocationInRefForHaplotypes, read.getReadLength());


               // LinkedHashMap<Allele,Double> readEl = new LinkedHashMap<Allele,Double>();


                /**
                 * Check if we'll end up with an empty read once all clipping is done
                 */
                if (numStartSoftClippedBases + numEndSoftClippedBases >= unclippedReadBases.length) {
                    int j=0;
                    for (Allele a: haplotypeMap.keySet()) {
                        perReadAlleleLikelihoodMap.add(p,a,0.0);
                        readLikelihoods[readIdx][j++] = 0.0;
                    }
                }
                else {
                    final int endOfCopy = unclippedReadBases.length - numEndSoftClippedBases;
                    final byte[] readBases = Arrays.copyOfRange(unclippedReadBases, numStartSoftClippedBases, endOfCopy);
                    final byte[] readQuals = Arrays.copyOfRange(unclippedReadQuals, numStartSoftClippedBases, endOfCopy);


                    int j=0;


                    final byte[] contextLogGapOpenProbabilities = new byte[readBases.length];
                    final byte[] contextLogGapContinuationProbabilities  = new byte[readBases.length];


                    // get homopolymer length profile for current haplotype
                    final int[] hrunProfile = new int[readBases.length];
                    getContextHomopolymerLength(readBases,hrunProfile);
                    fillGapProbabilities(hrunProfile, contextLogGapOpenProbabilities, contextLogGapContinuationProbabilities);


                    // get the base insertion and deletion qualities to use
                    final byte[] baseInsertionQualities, baseDeletionQualities;
                    if ( read.hasBaseIndelQualities() ) {
                        baseInsertionQualities = Arrays.copyOfRange(read.getBaseInsertionQualities(), numStartSoftClippedBases, endOfCopy);
                        baseDeletionQualities = Arrays.copyOfRange(read.getBaseDeletionQualities(), numStartSoftClippedBases, endOfCopy);
                    } else {
                        baseInsertionQualities = contextLogGapOpenProbabilities;
                        baseDeletionQualities = contextLogGapOpenProbabilities;
                    }


                    // Create a new read based on the current one, but with trimmed bases/quals, for use in the HMM
                    final GATKSAMRecord processedRead = GATKSAMRecord.createQualityModifiedRead(read, readBases, readQuals, baseInsertionQualities, baseDeletionQualities);


                    // Pack the shortened read and its associated gap-continuation-penalty array into a map, as required by PairHMM
                    final Map<GATKSAMRecord,byte[]> readGCPArrayMap = Collections.singletonMap(processedRead,contextLogGapContinuationProbabilities);


                    // Create a map of alleles to a new set of haplotypes, whose bases have been trimmed to the appropriate genomic locations

View Full Code Here

        }
    }


    @Override
    public GATKSAMRecord map(final ReferenceContext ref, final GATKSAMRecord read, final RefMetaDataTracker metaDataTracker) {
        GATKSAMRecord workingRead = read;


        for ( final RNAReadTransformer transformer : rnaReadTransformers ) {
            workingRead = transformer.apply(workingRead);                    // TODO: when a read transformer can be called directly from the command line we won't need that mechanism any more
        }

View Full Code Here

     * @param isAmongBestHaplotypes true if among the best haplotypes, false if it was just one possible but not so good
     */
    private void writeHaplotype(final Haplotype haplotype,
                                final GenomeLoc paddedRefLoc,
                                final boolean isAmongBestHaplotypes) {
        final GATKSAMRecord record = new GATKSAMRecord(output.getHeader());
        record.setReadBases(haplotype.getBases());
        record.setAlignmentStart(paddedRefLoc.getStart() + haplotype.getAlignmentStartHapwrtRef());
        record.setBaseQualities(Utils.dupBytes((byte) '!', haplotype.getBases().length));
        record.setCigar(AlignmentUtils.consolidateCigar(haplotype.getCigar()));
        record.setMappingQuality(isAmongBestHaplotypes ? 60 : 0);
        record.setReadName("HC" + uniqueNameCounter++);
        record.setAttribute(AlignmentUtils.HAPLOTYPE_TAG,haplotype.hashCode());
        record.setReadUnmappedFlag(false);
        record.setReferenceIndex(paddedRefLoc.getContigIndex());
        record.setAttribute(SAMTag.RG.toString(), READ_GROUP_ID);
        record.setFlags(16);
        output.add(record);
    }

View Full Code Here

    }


    @Test
    public void realignAtContigBorderTest() {
        final int contigEnd = header.getSequence(0).getSequenceLength();
        final GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(header, "goodRead", 0, contigEnd - 1, 2);
        read.setCigarString("2M");
        Assert.assertEquals(IndelRealigner.realignmentProducesBadAlignment(read, contigEnd), false);
        read.setCigarString("1M1D1M");
        Assert.assertEquals(IndelRealigner.realignmentProducesBadAlignment(read, contigEnd), true);
    }

View Full Code Here

    }


    @Test(dataProvider = "ClipUpstreamProvider", enabled = true)
    public void clipUpstreamTest(final int readStart, final int readLength, final int delLength) {


        final GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(header, "basicRead", 0, readStart, readLength);
        if ( delLength == 0 )
            read.setCigarString(readLength + "M");
        else
            read.setCigarString((readLength / 2) + "M" + delLength + "D" + (readLength / 2) + "M");


        final boolean result = PairHMMIndelErrorModel.mustClipUpstream(read, refWindowStart);
        Assert.assertEquals(result, read.getSoftStart() < refWindowStart && read.getSoftEnd() > refWindowStart);
    }

View Full Code Here

    }


    @Test(dataProvider = "ClipDownstreamProvider", enabled = true)
    public void clipDownstreamTest(final int readStart, final int readLength, final int delLength) {


        final GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(header, "basicRead", 0, readStart, readLength);
        if ( delLength == 0 )
            read.setCigarString(readLength + "M");
        else
            read.setCigarString((readLength / 2) + "M" + delLength + "D" + (readLength / 2) + "M");


        final boolean result = PairHMMIndelErrorModel.mustClipDownstream(read, refWindowEnd);
        Assert.assertEquals(result, read.getSoftStart() < refWindowEnd && read.getSoftStart() + readLength > refWindowEnd);
    }

View Full Code Here

0 1 2 3 4 5 6 7 8 9

TOP

Related Classes of org.broadinstitute.gatk.utils.sam.GATKSAMRecord

bcbio.gatk.tools.walkers.annotator.DepthOfCoverage

bcbio.gatk.tools.walkers.annotator.FisherStrand

org.broadinstitute.gatk.engine.datasources.providers.LocusViewTemplate

org.broadinstitute.gatk.engine.downsampling.AlleleBiasedDownsamplingUtilsUnitTest

org.broadinstitute.gatk.engine.filters.BadCigarFilterUnitTest

org.broadinstitute.gatk.engine.filters.MalformedReadFilterUnitTest

org.broadinstitute.gatk.engine.io.stubs.SAMFileWriterStub

org.broadinstitute.gatk.engine.traversals.TraverseDuplicates

org.broadinstitute.gatk.tools.walkers.annotator.DepthPerAlleleBySample

org.broadinstitute.gatk.tools.walkers.annotator.HaplotypeScore

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.