Package org.broadinstitute.gatk.utils.sam

Examples of org.broadinstitute.gatk.utils.sam.GATKSAMRecord


            if ( perReadAlleleLikelihoodMap.size() == 0 )
                return null;

            for (PerReadAlleleLikelihoodMap maps : perReadAlleleLikelihoodMap.values() ) {
                for (Map.Entry<GATKSAMRecord,Map<Allele,Double>> el : maps.getLikelihoodReadMap().entrySet()) {
                    final GATKSAMRecord read = el.getKey();
                    depth += 1;
                }
            }
        }
        else
View Full Code Here


                boolean isFW = el.getKey().getReadNegativeStrandFlag();

                int row = matchesRef ? 0 : 1;
                int column = isFW ? 0 : 1;

                final GATKSAMRecord read = el.getKey();
                table[row][column] += 1;
            }
        }

        return table;
View Full Code Here

        final String[] readGroups = {"RG1", "RG2", "RGbla"};
        for (int idx = 0; idx < NUM_READS; idx++) {
            for (final String rgs : readGroups) {
                final int length = 10 + rnd.nextInt(100); // random read length, at least 10 bp long
                final GATKSAMRecord read = ReadUtils.createRandomRead(length, false);
                final GATKSAMReadGroupRecord rg = new GATKSAMReadGroupRecord(rgs);
                rg.setPlatform("illumina");
                read.setReadGroup(rg);
                read.setReadNegativeStrandFlag(rnd.nextBoolean());
                final byte[] mQuals = read.getBaseQualities(EventType.BASE_SUBSTITUTION);
                final byte[] iQuals = read.getBaseQualities(EventType.BASE_INSERTION);
                final byte[] dQuals = read.getBaseQualities(EventType.BASE_DELETION);
                ReadCovariates rc = RecalUtils.computeCovariates(read, requestedCovariates);

                // check that the length is correct
                Assert.assertEquals(rc.getMismatchesKeySet().length, length);
                Assert.assertEquals(rc.getInsertionsKeySet().length, length);
View Full Code Here

    public void recordValues(final GATKSAMRecord read, final ReadCovariates values) {

        // store the original bases and then write Ns over low quality ones
        final byte[] originalBases = read.getReadBases().clone();
        // Write N's over the low quality tail of the reads to avoid adding them into the context
        final GATKSAMRecord clippedRead = ReadClipper.clipLowQualEnds(read, LOW_QUAL_TAIL, ClippingRepresentation.WRITE_NS);
       
        final boolean negativeStrand = clippedRead.getReadNegativeStrandFlag();
        byte[] bases = clippedRead.getReadBases();
        if (negativeStrand)
            bases = BaseUtils.simpleReverseComplement(bases);

        final ArrayList<Integer> mismatchKeys = contextWith(bases, mismatchesContextSize, mismatchesKeyMask);
        final ArrayList<Integer> indelKeys = contextWith(bases, indelsContextSize, indelsKeyMask);
View Full Code Here

     * Update the recalibration statistics using the information in recalInfo
     * @param recalInfo data structure holding information about the recalibration values for a single read
     */
    @Requires("recalInfo != null")
    public void updateDataForRead( final ReadRecalibrationInfo recalInfo ) {
        final GATKSAMRecord read = recalInfo.getRead();
        final ReadCovariates readCovariates = recalInfo.getCovariatesValues();
        final RecalibrationTables tables = getUpdatableRecalibrationTables();
        final NestedIntegerArray<RecalDatum> qualityScoreTable = tables.getQualityScoreTable();

        for( int offset = 0; offset < read.getReadBases().length; offset++ ) {
            if( ! recalInfo.skip(offset) ) {

                for (final EventType eventType : EventType.values()) {
                    final int[] keys = readCovariates.getKeySet(offset, eventType);
                    final int eventIndex = eventType.ordinal();
View Full Code Here

     * @param contextSize      the context size to use
     * @param locus            the position
     * @return possibly null Haplotype object constructed from the read
     */
    private Haplotype getHaplotypeFromRead(final PileupElement p, final int contextSize, final int locus) {
        final GATKSAMRecord read = p.getRead();
        if ( read.getCigar() == null )
            return null;

        final byte[] haplotypeBases = new byte[contextSize];
        Arrays.fill(haplotypeBases, (byte) REGEXP_WILDCARD);
        final byte[] baseQualities = new byte[contextSize];
        Arrays.fill(baseQualities, (byte)0);

        byte[] readBases = read.getReadBases();
        readBases = AlignmentUtils.readToAlignmentByteArray(read.getCigar(), readBases); // Adjust the read bases based on the Cigar string
        byte[] readQuals = read.getBaseQualities();
        readQuals = AlignmentUtils.readToAlignmentByteArray(read.getCigar(), readQuals); // Shift the location of the qual scores based on the Cigar string

        final int readOffsetFromPileup = AlignmentUtils.calcAlignmentByteArrayOffset(read.getCigar(), p, read.getAlignmentStart(), locus);
        final int baseOffsetStart = readOffsetFromPileup - (contextSize - 1) / 2;

        for (int i = 0; i < contextSize; i++) {
            final int baseOffset = i + baseOffsetStart;
            if (baseOffset < 0) {
View Full Code Here

    private double scoreReadAgainstHaplotype(final PileupElement p, final int contextSize, final Haplotype haplotype, final int locus) {
        double expected = 0.0;
        double mismatches = 0.0;

        final GATKSAMRecord read = p.getRead();
        if ( read.getCigar() == null )
            return 0.0;

        // What's the expected mismatch rate under the model that this read is actually sampled from
        // this haplotype?  Let's assume the consensus base c is a random choice one of A, C, G, or T, and that
        // the observed base is actually from a c with an error rate e.  Since e is the rate at which we'd
        // see a miscalled c, the expected mismatch rate is really e.  So the expected number of mismatches
        // is just sum_i e_i for i from 1..n for n sites
        //
        // Now, what's the probabilistic sum of mismatches?  Suppose that the base b is equal to c.  Well, it could
        // actually be a miscall in a matching direction, which would happen at a e / 3 rate.  If b != c, then
        // the chance that it is actually a mismatch is 1 - e, since any of the other 3 options would be a mismatch.
        // so the probability-weighted mismatch rate is sum_i ( matched ? e_i / 3 : 1 - e_i ) for i = 1 ... n
        final byte[] haplotypeBases = haplotype.getBases();
        byte[] readBases = read.getReadBases();

        readBases = AlignmentUtils.readToAlignmentByteArray(p.getRead().getCigar(), readBases); // Adjust the read bases based on the Cigar string
        byte[] readQuals = read.getBaseQualities();
        readQuals = AlignmentUtils.readToAlignmentByteArray(p.getRead().getCigar(), readQuals); // Shift the location of the qual scores based on the Cigar string
        int readOffsetFromPileup = AlignmentUtils.calcAlignmentByteArrayOffset(p.getRead().getCigar(), p, read.getAlignmentStart(), locus);
        final int baseOffsetStart = readOffsetFromPileup - (contextSize - 1) / 2;

        for (int i = 0; i < contextSize; i++) {
            final int baseOffset = i + baseOffsetStart;
            if (baseOffset < 0) {
View Full Code Here

        ReadCovariates.clearKeysCache();
    }

    @Test(enabled = true)
    public void testSimpleContexts() {
        GATKSAMRecord read = ReadUtils.createRandomRead(1000);
        GATKSAMRecord clippedRead = ReadClipper.clipLowQualEnds(read, RAC.LOW_QUAL_TAIL, ClippingRepresentation.WRITE_NS);
        ReadCovariates readCovariates = new ReadCovariates(read.getReadLength(), 1);
        covariate.recordValues(read, readCovariates);

        verifyCovariateArray(readCovariates.getMismatchesKeySet(), RAC.MISMATCHES_CONTEXT_SIZE, clippedRead, covariate);
        verifyCovariateArray(readCovariates.getInsertionsKeySet(), RAC.INDELS_CONTEXT_SIZE, clippedRead, covariate);
View Full Code Here

        for ( final Allele allele : vc.getAlleles() ) { alleleCounts.put(allele, 0); }

        for ( final Map.Entry<GATKSAMRecord,Map<Allele,Double>> el : perReadAlleleLikelihoodMap.getLikelihoodReadMap().entrySet()) {
            final MostLikelyAllele a = PerReadAlleleLikelihoodMap.getMostLikelyAllele(el.getValue(), alleles);
            if (! a.isInformative() ) continue; // read is non-informative
            final GATKSAMRecord read = el.getKey();
            final int prevCount = alleleCounts.get(a.getMostLikelyAllele());
            alleleCounts.put(a.getMostLikelyAllele(), prevCount + 1);
        }

        final int[] counts = new int[alleleCounts.size()];
View Full Code Here

     * For each read at this locus get the various covariate values and increment that location in the map based on
     * whether or not the base matches the reference at this particular location
     */
    public Long map( final ReferenceContext ref, final GATKSAMRecord originalRead, final RefMetaDataTracker metaDataTracker ) {

        final GATKSAMRecord read = ReadClipper.hardClipSoftClippedBases( ReadClipper.hardClipAdaptorSequence(originalRead) );
        if( read.isEmpty() ) { return 0L; } // the whole read was inside the adaptor so skip it

        RecalUtils.parsePlatformForRead(read, RAC);
        if (!RecalUtils.isColorSpaceConsistent(RAC.SOLID_NOCALL_STRATEGY, read)) { // parse the solid color space and check for color no-calls
            return 0L; // skip this read completely
        }
View Full Code Here

TOP

Related Classes of org.broadinstitute.gatk.utils.sam.GATKSAMRecord

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.