Package htsjdk.samtools.reference

Examples of htsjdk.samtools.reference.ReferenceSequenceFileWalker


        IOUtil.assertFileIsWritable(OUTPUT);
        IOUtil.assertFileIsReadable(REFERENCE_SEQUENCE);

        // Setup all the inputs
        final ProgressLogger progress = new ProgressLogger(log, 10000000, "Processed", "loci");
        final ReferenceSequenceFileWalker refWalker = new ReferenceSequenceFileWalker(REFERENCE_SEQUENCE);
        final SAMFileReader in        = new SAMFileReader(INPUT);

        final SamLocusIterator iterator = new SamLocusIterator(in);
        final List<SamRecordFilter> filters   = new ArrayList<SamRecordFilter>();
        final CountingFilter dupeFilter       = new CountingDuplicateFilter();
        final CountingFilter mapqFilter       = new CountingMapQFilter(MINIMUM_MAPPING_QUALITY);
        final CountingPairedFilter pairFilter = new CountingPairedFilter();
        filters.add(mapqFilter);
        filters.add(dupeFilter);
        filters.add(pairFilter);
        filters.add(new SecondaryAlignmentFilter()); // Not a counting filter because we never want to count reads twice
        iterator.setSamFilters(filters);
        iterator.setEmitUncoveredLoci(true);
        iterator.setMappingQualityScoreCutoff(0); // Handled separately because we want to count bases
        iterator.setQualityScoreCutoff(0);        // Handled separately because we want to count bases
        iterator.setIncludeNonPfReads(false);

        final int max = COVERAGE_CAP;
        final long[] HistogramArray = new long[max + 1];
        final boolean usingStopAfter = STOP_AFTER > 0;
        final long stopAfter = STOP_AFTER-1;
        long counter = 0;

        long basesExcludedByBaseq   = 0;
        long basesExcludedByOverlap = 0;
        long basesExcludedByCapping = 0;

        // Loop through all the loci
        while (iterator.hasNext()) {
            final SamLocusIterator.LocusInfo info = iterator.next();

            // Check that the reference is not N
            final ReferenceSequence ref = refWalker.get(info.getSequenceIndex());
            final byte base = ref.getBases()[info.getPosition()-1];
            if (base == 'N') continue;

            // Figure out the coverage while not counting overlapping reads twice, and excluding various things
            final HashSet<String> readNames = new HashSet<String>(info.getRecordAndPositions().size());
View Full Code Here


        this.unmappedBamFile = unmappedBamFile;
        this.targetBamFile = targetBamFile;
        this.referenceFasta = referenceFasta;

        this.refSeq = new ReferenceSequenceFileWalker(referenceFasta);
        this.sequenceDictionary = refSeq.getSequenceDictionary();
        if (this.sequenceDictionary == null) {
            throw new PicardException("No sequence dictionary found for " + referenceFasta.getAbsolutePath() +
                    ".  Use CreateSequenceDictionary.jar to create a sequence dictionary.");
        }
View Full Code Here

    }

    protected SAMFileHeader getHeader() { return this.header; }

    protected void resetRefSeqFileWalker() {
        this.refSeq = new ReferenceSequenceFileWalker(referenceFasta);
    }
View Full Code Here

        // Setup the standard inputs
        IOUtil.assertFileIsReadable(input);
        final SAMFileReader in = new SAMFileReader(input);

        // Optionally load up the reference sequence and double check sequence dictionaries
        final ReferenceSequenceFileWalker walker;
        if (referenceSequence == null) {
            walker = null;
        }
        else {
            IOUtil.assertFileIsReadable(referenceSequence);
            walker = new ReferenceSequenceFileWalker(referenceSequence);

            if (!in.getFileHeader().getSequenceDictionary().isEmpty()) {
                SequenceUtil.assertSequenceDictionariesEqual(in.getFileHeader().getSequenceDictionary(),
                                                             walker.getSequenceDictionary());
            }
        }

        // Check on the sort order of the BAM file
        {
            final SortOrder sort = in.getFileHeader().getSortOrder();
            if (sort != SortOrder.coordinate) {
                if (assumeSorted) {
                    log.warn("File reports sort order '" + sort + "', assuming it's coordinate sorted anyway.");
                }
                else {
                    throw new PicardException("File " + input.getAbsolutePath() + " should be coordinate sorted but " +
                                              "the header says the sort order is " + sort + ". If you believe the file " +
                                              "to be coordinate sorted you may pass ASSUME_SORTED=true");
                }
            }
        }

        // Call the abstract setup method!
        boolean anyUseNoRefReads = false;
        for (final SinglePassSamProgram program : programs) {
            program.setup(in.getFileHeader(), input);
            anyUseNoRefReads = anyUseNoRefReads || program.usesNoRefReads();
        }


        final ProgressLogger progress = new ProgressLogger(log);

        for (final SAMRecord rec : in) {
            final ReferenceSequence ref;
            if (walker == null || rec.getReferenceIndex() == SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX) {
                ref = null;
            }
            else {
                ref = walker.get(rec.getReferenceIndex());
            }

            for (final SinglePassSamProgram program : programs) {
                program.acceptRead(rec, ref);
            }
View Full Code Here

    final SAMFileReader samReader = new SAMFileReader(INPUT);
    if (!ASSUME_SORTED && samReader.getFileHeader().getSortOrder() != SAMFileHeader.SortOrder.coordinate) {
      throw new PicardException("The input file " + INPUT.getAbsolutePath() + " does not appear to be coordinate sorted");
    }

    final ReferenceSequenceFileWalker refWalker = new ReferenceSequenceFileWalker(REFERENCE);
    final ProgressLogger progressLogger = new ProgressLogger(log);

    final RrbsMetricsCollector metricsCollector = new RrbsMetricsCollector(METRIC_ACCUMULATION_LEVEL, samReader.getFileHeader().getReadGroups(),
        C_QUALITY_THRESHOLD, NEXT_BASE_QUALITY_THRESHOLD, MINIMUM_READ_LENGTH, MAX_MISMATCH_RATE);

    for (final SAMRecord samRecord : samReader) {
      progressLogger.record(samRecord);
      if (!samRecord.getReadUnmappedFlag() && !isSequenceFiltered(samRecord.getReferenceName())) {
        final ReferenceSequence referenceSequence = refWalker.get(samRecord.getReferenceIndex());
        metricsCollector.acceptRecord(samRecord, referenceSequence);
      }
    }
    metricsCollector.finish();
    final MetricsFile<RrbsMetrics, Comparable<?>> rrbsMetrics = getMetricsFile();
View Full Code Here

            else {
                targets = padded;
            }
        }

        final ReferenceSequenceFileWalker referenceWalker = new ReferenceSequenceFileWalker(REFERENCE_SEQUENCE);

        // Check that the reference and the target list have matching headers
        SequenceUtil.assertSequenceDictionariesEqual(referenceWalker.getSequenceDictionary(),
                targets.getHeader().getSequenceDictionary());

        // Design the baits!
        int discardedBaits = 0;
        final IntervalList baits = new IntervalList(targets.getHeader());
        for (final Interval target : targets) {
            final int sequenceIndex = targets.getHeader().getSequenceIndex(target.getSequence());
            final ReferenceSequence reference = referenceWalker.get(sequenceIndex);

            for (final Bait bait : DESIGN_STRATEGY.design(this, target, reference)) {
                if (bait.length() != BAIT_SIZE) {
                    throw new PicardException("Bait designed at wrong length: " + bait);
                }
View Full Code Here

        IOUtil.assertFileIsReadable(INPUT);
        IOUtil.assertFileIsWritable(OUTPUT);
        if (INTERVALS != null) IOUtil.assertFileIsReadable(INTERVALS);
        IOUtil.assertFileIsReadable(REFERENCE_SEQUENCE);
       
        final ReferenceSequenceFileWalker refWalker = new ReferenceSequenceFileWalker(REFERENCE_SEQUENCE);
        final SAMFileReader in = new SAMFileReader(INPUT);
       
        final Set<String> samples   = new HashSet<String>();
        final Set<String> libraries = new HashSet<String>();
        for (final SAMReadGroupRecord rec : in.getFileHeader().getReadGroups()) {
            samples.add(nvl(rec.getSample(), UNKNOWN_SAMPLE));
            libraries.add(nvl(rec.getLibrary(), UNKNOWN_LIBRARY));
        }
       
        // Setup the calculators
        final Set<String> contexts = CONTEXTS.isEmpty() ? makeContextStrings(CONTEXT_SIZE) : CONTEXTS;
        final ListMap<String, Calculator> calculators = new ListMap<String, Calculator>();
        for (final String context : contexts) {
            for (final String library : libraries) {
                calculators.add(context, new Calculator(library, context));
            }
        }
       
        // Load up dbSNP if available
        log.info("Loading dbSNP File: " + DB_SNP);
        final DbSnpBitSetUtil dbSnp;
        if (DB_SNP != null) dbSnp = new DbSnpBitSetUtil(DB_SNP, in.getFileHeader().getSequenceDictionary());
        else dbSnp = null;

        // Make an iterator that will filter out funny looking things
        final SamLocusIterator iterator;
        if (INTERVALS != null) {
            final IntervalList intervals = IntervalList.fromFile(INTERVALS);
            intervals.unique();
            iterator = new SamLocusIterator(in, intervals, false);
        }
        else {
            iterator = new SamLocusIterator(in);
        }
        iterator.setEmitUncoveredLoci(false);
        iterator.setMappingQualityScoreCutoff(MINIMUM_MAPPING_QUALITY);
        iterator.setSamFilters(Arrays.asList(
                new NotPrimaryAlignmentFilter(),
                new DuplicateReadFilter(),
                new InsertSizeFilter(MINIMUM_INSERT_SIZE, MAXIMUM_INSERT_SIZE)
        ));

        log.info("Starting iteration.");
        long nextLogTime = 0;
        int sites = 0;
       
        for (final SamLocusIterator.LocusInfo info : iterator) {
            // Skip dbSNP sites
            final String chrom = info.getSequenceName();
            final int pos      = info.getPosition();
            final int index    = pos-1;
            if (dbSnp != null && dbSnp.isDbSnpSite(chrom, pos)) continue;
           
            // Skip sites at the end of chromosomes
            final byte[] bases = refWalker.get(info.getSequenceIndex()).getBases();
            if (pos < 3 || pos > bases.length-3) continue;

            // Skip non C-G bases
            final byte base = StringUtil.toUpperCase(bases[index]);
            if (base != 'C' && base != 'G') continue;
View Full Code Here

TOP

Related Classes of htsjdk.samtools.reference.ReferenceSequenceFileWalker

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.