Package htsjdk.samtools.util

Examples of htsjdk.samtools.util.ProgressLogger


        ////////////////////////////////////////////////////////////////////////////
        // Loop over the reference and the reads and calculate the basic metrics
        ////////////////////////////////////////////////////////////////////////////
        ReferenceSequence ref = null;
        final ProgressLogger
                progress = new ProgressLogger(log);
        while ((ref = referenceFile.nextSequence()) != null) {
            final byte[] refBases = ref.getBases();
            StringUtil.toUpperCase(refBases);
            final int refLength = refBases.length;
            final int lastWindowStart = refLength - WINDOW_SIZE;

            final byte[] gc = calculateAllGcs(refBases, windowsByGc, lastWindowStart);
            while (iterator.hasNext() && iterator.peek().getReferenceIndex() == ref.getContigIndex()) {
                final SAMRecord rec = iterator.next();

                if (!rec.getReadPairedFlag() || rec.getFirstOfPairFlag()) ++this.totalClusters;

                if (!rec.getReadUnmappedFlag()) {
                    final int pos = rec.getReadNegativeStrandFlag() ? rec.getAlignmentEnd() - WINDOW_SIZE : rec.getAlignmentStart();
                    ++this.totalAlignedReads;

                    if (pos > 0) {
                        final int windowGc = gc[pos];

                        if (windowGc >= 0) {
                            ++readsByGc[windowGc];
                            basesByGc[windowGc+= rec.getReadLength();
                            errorsByGc[windowGc] +=
                                    SequenceUtil.countMismatches(rec, refBases, IS_BISULFITE_SEQUENCED) +
                                            SequenceUtil.countInsertedBases(rec) + SequenceUtil.countDeletedBases(rec);
                        }
                    }
                }

                progress.record(rec);
            }

            log.info("Processed reference sequence: " + ref.getName());
        }
View Full Code Here


            program.setup(in.getFileHeader(), input);
            anyUseNoRefReads = anyUseNoRefReads || program.usesNoRefReads();
        }


        final ProgressLogger progress = new ProgressLogger(log);

        for (final SAMRecord rec : in) {
            final ReferenceSequence ref;
            if (walker == null || rec.getReferenceIndex() == SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX) {
                ref = null;
            }
            else {
                ref = walker.get(rec.getReferenceIndex());
            }

            for (final SinglePassSamProgram program : programs) {
                program.acceptRead(rec, ref);
            }

            progress.record(rec);

            // See if we need to terminate early?
            if (stopAfter > 0 && progress.getCount() >= stopAfter) {
                break;
            }

            // And see if we're into the unmapped reads at the end
            if (!anyUseNoRefReads && rec.getReferenceIndex() == SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX) {
View Full Code Here

        final FastqReader freader = new FastqReader(FASTQ,ALLOW_AND_IGNORE_EMPTY_LINES);
        final SAMFileHeader header = createFileHeader();
        final SAMFileWriter writer = new SAMFileWriterFactory().makeSAMOrBAMWriter(header, false, OUTPUT);

        int readCount = 0;
        final ProgressLogger progress = new ProgressLogger(LOG);
        for ( ; freader.hasNext()  ; readCount++) {
            final FastqRecord frec = freader.next();
            final SAMRecord srec = createSamRecord(header, getReadName(frec.getReadHeader(), false) , frec, false) ;
            srec.setReadPairedFlag(false);
            writer.addAlignment(srec);
            progress.record(srec);
        }

        writer.close();
        return readCount;
    }
View Full Code Here

        final FastqReader freader2 = new FastqReader(FASTQ2,ALLOW_AND_IGNORE_EMPTY_LINES);
        final SAMFileHeader header = createFileHeader() ;
        final SAMFileWriter writer = (new SAMFileWriterFactory()).makeSAMOrBAMWriter(header, false, OUTPUT);

        int readCount = 0;
        final ProgressLogger progress = new ProgressLogger(LOG);
        for ( ; freader1.hasNext() && freader2.hasNext() ; readCount++) {
            final FastqRecord frec1 = freader1.next();
            final FastqRecord frec2 = freader2.next();

            final String frec1Name = getReadName(frec1.getReadHeader(), true);
            final String frec2Name = getReadName(frec2.getReadHeader(), true);
            final String baseName = getBaseName(frec1Name, frec2Name, freader1, freader2);

            final SAMRecord srec1 = createSamRecord(header, baseName, frec1, true) ;
            srec1.setFirstOfPairFlag(true);
            srec1.setSecondOfPairFlag(false);
            writer.addAlignment(srec1);
            progress.record(srec1);

            final SAMRecord srec2 = createSamRecord(header, baseName, frec2, true) ;
            srec2.setFirstOfPairFlag(false);
            srec2.setSecondOfPairFlag(true);
            writer.addAlignment(srec2);
            progress.record(srec2);
        }

        writer.close();

        if (freader1.hasNext() || freader2.hasNext()) {
View Full Code Here

        final SAMFileWriter out = new SAMFileWriterFactory().makeSAMOrBAMWriter(outHeader, false, OUTPUT);

        // Iterate over the records, revert original base qualities, and push them into a SortingCollection by queryname
        final SortingCollection<SAMRecord> sorter = SortingCollection.newInstance(SAMRecord.class, new BAMRecordCodec(outHeader),
                new SAMRecordQueryNameComparator(), MAX_RECORDS_IN_RAM);
        final ProgressLogger revertingProgress = new ProgressLogger(log, 1000000, " reverted OQs");
        int numOriginalQualitiesRestored = 0;
        for (final SAMRecord record : in) {
            // Clean up reads that map off the end of the reference
            AbstractAlignmentMerger.createNewCigarsIfMapsOffEndOfReference(record);

            if (RESTORE_ORIGINAL_QUALITIES && null != record.getOriginalBaseQualities()) {
                // revert the original base qualities
                record.setBaseQualities(record.getOriginalBaseQualities());
                record.setOriginalBaseQualities(null);
                numOriginalQualitiesRestored++;
            }
            if (!foundPairedMappedReads && record.getReadPairedFlag() && !record.getReadUnmappedFlag()) foundPairedMappedReads = true;
            revertingProgress.record(record);
            sorter.add(record);
        }
        CloserUtil.close(in);
        log.info("Reverted the original base qualities for " + numOriginalQualitiesRestored + " records");

        /**
         * Iterator through sorting collection output
         * 1. Set mate cigar string and mate information
         * 2. push record into SAMFileWriter to the output
         */
        final SamPairUtil.SetMateInfoIterator sorterIterator = new SamPairUtil.SetMateInfoIterator(sorter.iterator(), true);
        final ProgressLogger sorterProgress = new ProgressLogger(log, 1000000, " mate cigars added");
        while (sorterIterator.hasNext()) {
            final SAMRecord record = sorterIterator.next();
            out.addAlignment(record);
            sorterProgress.record(record);
        }
        sorterIterator.close();
        CloserUtil.close(out);
        log.info("Updated " + sorterIterator.getNumMateCigarsAdded() + " records with mate cigar");
        if (!foundPairedMappedReads) log.info("Did not find any paired mapped reads.");
View Full Code Here

             * NB: BED is zero-based, but a BEDCodec by default (since it is returns tribble Features) has an offset of one,
             * so it returns 1-based starts.  Ugh.  Set to zero.
             */
            final FeatureReader<BEDFeature> bedReader = AbstractFeatureReader.getFeatureReader(INPUT.getAbsolutePath(), new BEDCodec(BEDCodec.StartOffset.ZERO), false);
            final CloseableTribbleIterator<BEDFeature> iterator = bedReader.iterator();
            final ProgressLogger progressLogger = new ProgressLogger(LOG, (int) 1e6);

            while (iterator.hasNext()) {
                final BEDFeature bedFeature = iterator.next();
                final String sequenceName = bedFeature.getChr();
                /**
                 * NB: BED is zero-based, so we need to add one here to make it one-based.  Please observe we set the start
                 * offset to zero when creating the BEDCodec.
                 */
                final int start = bedFeature.getStart() + 1;
                /**
                 * NB: BED is 0-based OPEN (which, for the end is equivalent to 1-based closed).
                 */
                final int end = bedFeature.getEnd();
                // NB: do not use an empty name within an interval
                String name = bedFeature.getName();
                if (name.isEmpty()) name = null;

                final SAMSequenceRecord sequenceRecord = header.getSequenceDictionary().getSequence(sequenceName);

                // Do some validation
                if (null == sequenceRecord) {
                    throw new PicardException(String.format("Sequence '%s' was not found in the sequence dictionary", sequenceName));
                }
                else if (start < 1) {
                    throw new PicardException(String.format("Start on sequence '%s' was less than one: %d", sequenceName, start));
                }
                else if (sequenceRecord.getSequenceLength() < start) {
                    throw new PicardException(String.format("Start on sequence '%s' was past the end: %d < %d", sequenceName, sequenceRecord.getSequenceLength(), start));
                }
                else if (end < 1) {
                    throw new PicardException(String.format("End on sequence '%s' was less than one: %d", sequenceName, end));
                }
                else if (sequenceRecord.getSequenceLength() < end) {
                    throw new PicardException(String.format("End on sequence '%s' was past the end: %d < %d", sequenceName, sequenceRecord.getSequenceLength(), end));
                }
                else if (end < start - 1) {
                    throw new PicardException(String.format("On sequence '%s', end < start-1: %d <= %d", sequenceName, end, start));
                }

                final Interval interval = new Interval(sequenceName, start, end, bedFeature.getStrand() == Strand.POSITIVE, name);
                intervalList.add(interval);

                progressLogger.record(sequenceName, start);
            }
            CloserUtil.close(bedReader);

            // Sort and write the output
            intervalList.uniqued().write(OUTPUT);
View Full Code Here

        }

        ////////////////////////////////////////////////////////////////////////
        // Main loop that consumes reads, clips them and writes them to the output
        ////////////////////////////////////////////////////////////////////////
        final ProgressLogger progress = new ProgressLogger(log, 1000000, "Read");
        final SAMRecordIterator iterator = in.iterator();

        final AdapterMarker adapterMarker = new AdapterMarker(ADAPTER_TRUNCATION_LENGTH, adapters).
                setMaxPairErrorRate(MAX_ERROR_RATE_PE).setMinPairMatchBases(MIN_MATCH_BASES_PE).
                setMaxSingleEndErrorRate(MAX_ERROR_RATE_SE).setMinSingleEndMatchBases(MIN_MATCH_BASES_SE).
                setNumAdaptersToKeep(NUM_ADAPTERS_TO_KEEP).
                setThresholdForSelectingAdaptersToKeep(PRUNE_ADAPTER_LIST_AFTER_THIS_MANY_ADAPTERS_SEEN);

        while (iterator.hasNext()) {
            final SAMRecord rec = iterator.next();
            final SAMRecord rec2 = rec.getReadPairedFlag() && iterator.hasNext() ? iterator.next() : null;
            rec.setAttribute(ReservedTagConstants.XT, null);

            // Do the clipping one way for PE and another for SE reads
            if (rec.getReadPairedFlag()) {
                // Assert that the input file is in query name order only if we see some PE reads
                if (order != SAMFileHeader.SortOrder.queryname) {
                    throw new PicardException("Input BAM file must be sorted by queryname");
                }

                if (rec2 == null) throw new PicardException("Missing mate pair for paired read: " + rec.getReadName());
                rec2.setAttribute(ReservedTagConstants.XT, null);

                // Assert that we did in fact just get two mate pairs
                if (!rec.getReadName().equals(rec2.getReadName())){
                    throw new PicardException("Adjacent reads expected to be mate-pairs have different names: " +
                            rec.getReadName() + ", " + rec2.getReadName());
                }

                // establish which of pair is first and which second
                final SAMRecord first, second;

                if (rec.getFirstOfPairFlag() && rec2.getSecondOfPairFlag()){
                    first = rec;
                    second = rec2;
                }
                else if (rec.getSecondOfPairFlag() && rec2.getFirstOfPairFlag()) {
                    first = rec2;
                    second = rec;
                }
                else {
                    throw new PicardException("Two reads with same name but not correctly marked as 1st/2nd of pair: " + rec.getReadName());
                }

                adapterMarker.adapterTrimIlluminaPairedReads(first, second);
            }
            else {
                adapterMarker.adapterTrimIlluminaSingleRead(rec);
            }

            // Then output the records, update progress and metrics
            for (final SAMRecord r : new SAMRecord[] {rec, rec2}) {
                if (r != null) {
                    progress.record(r);
                    if (out != null) out.addAlignment(r);

                    final Integer clip = rec.getIntegerAttribute(ReservedTagConstants.XT);
                    if (clip != null) histo.increment(rec.getReadLength() - clip + 1);
                }
View Full Code Here

                this.MAX_RECORDS_IN_RAM,
                this.BLOCK_SIZE,
                this.TMP_DIR);

        // progress logger!
        final ProgressLogger progress = new ProgressLogger(log, (int) 1e6, "Read");

        // Go through the records
        for (final SAMRecord record : new IterableAdapter<SAMRecord>(iterator)) {
            if (progress.record(record)) {
                iterator.logMemoryStats(log);
            }

            // Update the program record if necessary
            updateProgramRecord(record, chainedPgIds);

            // Write the alignment
            out.addAlignment(record);
        }

        // remember to close the inputs
        iterator.close();

        out.close();

        // For convenience to reference
        final Histogram<Short> opticalDupesByLibraryId = iterator.getOpticalDupesByLibraryId();

        // Log info
        log.info("Processed " + progress.getCount() + " records");
        log.info("Found " + iterator.getNumRecordsWithNoMateCigar() + " records with no mate cigar optional tag.");
        log.info("Marking " + iterator.getNumDuplicates() + " records as duplicates.");
        log.info("Found " + ((long) opticalDupesByLibraryId.getSumOfValues()) + " optical duplicate clusters."); // cast as long due to returning a double

        // Write out the metrics
View Full Code Here

    if (!ASSUME_SORTED && samReader.getFileHeader().getSortOrder() != SAMFileHeader.SortOrder.coordinate) {
      throw new PicardException("The input file " + INPUT.getAbsolutePath() + " does not appear to be coordinate sorted");
    }

    final ReferenceSequenceFileWalker refWalker = new ReferenceSequenceFileWalker(REFERENCE);
    final ProgressLogger progressLogger = new ProgressLogger(log);

    final RrbsMetricsCollector metricsCollector = new RrbsMetricsCollector(METRIC_ACCUMULATION_LEVEL, samReader.getFileHeader().getReadGroups(),
        C_QUALITY_THRESHOLD, NEXT_BASE_QUALITY_THRESHOLD, MINIMUM_READ_LENGTH, MAX_MISMATCH_RATE);

    for (final SAMRecord samRecord : samReader) {
      progressLogger.record(samRecord);
      if (!samRecord.getReadUnmappedFlag() && !isSequenceFiltered(samRecord.getReferenceName())) {
        final ReferenceSequence referenceSequence = refWalker.get(samRecord.getReferenceIndex());
        metricsCollector.acceptRecord(samRecord, referenceSequence);
      }
    }
View Full Code Here

        final SAMFileWriter outWriter = new SAMFileWriterFactory().makeSAMOrBAMWriter(outHeader,
                                                                                      outHeader.getSortOrder() == inHeader.getSortOrder(),
                                                                                      OUTPUT);

        final ProgressLogger progress = new ProgressLogger(log);
        for (final SAMRecord read : in) {
            read.setAttribute(SAMTag.RG.name(), RGID);
            outWriter.addAlignment(read);
            progress.record(read);
        }

        // cleanup
        in.close();
        outWriter.close();
View Full Code Here

TOP

Related Classes of htsjdk.samtools.util.ProgressLogger

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.