Package htsjdk.variant.vcf

Examples of htsjdk.variant.vcf.VCFFileReader


    /** Private helper method to read through the VCF and create one or more bit sets. */
    private static void loadVcf(final File dbSnpFile,
                                final SAMSequenceDictionary sequenceDictionary,
                                final Map<DbSnpBitSetUtil, Set<VariantType>> bitSetsToVariantTypes) {

        final VCFFileReader variantReader = new VCFFileReader(dbSnpFile);
        final CloseableIterator<VariantContext> variantIterator = variantReader.iterator();

        while (variantIterator.hasNext()) {
            final VariantContext kv = variantIterator.next();

            for (final Map.Entry<DbSnpBitSetUtil, Set<VariantType>> tuple : bitSetsToVariantTypes.entrySet()) {
View Full Code Here


        IOUtil.assertFileIsReadable(SEQUENCE_DICTIONARY);
        IOUtil.assertFileIsWritable(OUTPUT);

        final SAMSequenceDictionary samSequenceDictionary = SAMSequenceDictionaryExtractor.extractDictionary(SEQUENCE_DICTIONARY);

        final VCFFileReader fileReader = new VCFFileReader(INPUT, false);
        final VCFHeader fileHeader = fileReader.getFileHeader();

        final VariantContextWriterBuilder builder = new VariantContextWriterBuilder()
                .setReferenceDictionary(samSequenceDictionary)
                .clearOptions();
        if (CREATE_INDEX)
            builder.setOption(Options.INDEX_ON_THE_FLY);

        final VariantContextWriter vcfWriter = builder.setOutputFile(OUTPUT).build();
        fileHeader.setSequenceDictionary(samSequenceDictionary);
        vcfWriter.writeHeader(fileHeader);

        final ProgressLogger progress = new ProgressLogger(log, 10000);
        final CloseableIterator<VariantContext> iterator = fileReader.iterator();
        while (iterator.hasNext()) {
            final VariantContext context = iterator.next();
            vcfWriter.add(context);
            progress.record(context.getChr(), context.getStart());
        }
View Full Code Here

        return true;
    }

    /** Validates that all headers contain the same set of genotyped samples and that files are in order by position of first record. */
    private static void assertSameSamplesAndValidOrdering(final List<File> inputFiles) {
        final VCFHeader header = new VCFFileReader(inputFiles.get(0), false).getFileHeader();
        final SAMSequenceDictionary dict = header.getSequenceDictionary();
        final VariantContextComparator comparator = new VariantContextComparator(header.getSequenceDictionary());
        final List<String> samples = header.getGenotypeSamples();

        File lastFile = null;
        VariantContext lastContext = null;

        for (final File f : inputFiles) {
            final VCFFileReader in = new VCFFileReader(f, false);
            dict.assertSameDictionary(in.getFileHeader().getSequenceDictionary());
            final List<String> theseSamples = in.getFileHeader().getGenotypeSamples();

            if (!samples.equals(theseSamples)) {
                final SortedSet<String> s1 = new TreeSet<String>(samples);
                final SortedSet<String> s2 = new TreeSet<String>(theseSamples);
                s1.removeAll(theseSamples);
                s2.removeAll(samples);

                throw new IllegalArgumentException("VCFs do not have identical sample lists." +
                        " Samples unique to first file: " + s1 + ". Samples unique to " + f.getAbsolutePath() + ": " + s2 + ".");
            }

            final CloseableIterator<VariantContext> variantIterator = in.iterator();
            if (variantIterator.hasNext()) {
                final VariantContext currentContext = variantIterator.next();
                if (lastContext != null) {
                    if (comparator.compare(lastContext, currentContext) >= 0) {
                        throw new IllegalArgumentException("First record in file " + f.getAbsolutePath() + " is not after first record in " +
View Full Code Here

        VCFHeader firstHeader = null;
        VariantContextComparator comparator = null;

        for (final File f : inputFiles) {
            log.debug("Gathering from file: ", f.getAbsolutePath());
            final VCFFileReader variantReader = new VCFFileReader(f, false);
            final PeekableIterator<VariantContext> variantIterator = new PeekableIterator<VariantContext>(variantReader.iterator());
            final VCFHeader header = variantReader.getFileHeader();

            if (firstHeader == null) {
                firstHeader = header;
                out.writeHeader(firstHeader);
                comparator = new VariantContextComparator(firstHeader.getContigLines());
View Full Code Here

    @Override
    protected int doWork() {
        IOUtil.assertFileIsReadable(INPUT);
        IOUtil.assertFileIsWritable(OUTPUT);

      final VCFFileReader reader = new VCFFileReader(INPUT, false);
      final VCFHeader inputVcfHeader = new VCFHeader(reader.getFileHeader().getMetaDataInInputOrder());
      final SAMSequenceDictionary sequenceDictionary = inputVcfHeader.getSequenceDictionary();

      if (CREATE_INDEX && sequenceDictionary == null) {
        throw new PicardException("A sequence dictionary must be available (either through the input file or by setting it explicitly) when creating indexed output.");
      }

        final ProgressLogger progress = new ProgressLogger(Log.getInstance(MakeSitesOnlyVcf.class), 10000);

        // Setup the site-only file writer
        final VariantContextWriterBuilder builder = new VariantContextWriterBuilder()
                .setOutputFile(OUTPUT)
                .setReferenceDictionary(sequenceDictionary);
        if (CREATE_INDEX)
            builder.setOption(Options.INDEX_ON_THE_FLY);
        else
            builder.unsetOption(Options.INDEX_ON_THE_FLY);
        final VariantContextWriter writer = builder.build();

        final VCFHeader header = new VCFHeader(inputVcfHeader.getMetaDataInInputOrder(), SAMPLE);
        writer.writeHeader(header);

        // Go through the input, strip the records and write them to the output
        final CloseableIterator<VariantContext> iterator = reader.iterator();
      while (iterator.hasNext()) {
        final VariantContext full = iterator.next();
            final VariantContext site = subsetToSamplesWithOriginalAnnotations(full, SAMPLE);
            writer.add(site);
            progress.record(site.getChr(), site.getStart());
View Full Code Here

    @Override
    protected int doWork() {
        IOUtil.assertFileIsReadable(INPUT);
        IOUtil.assertFileIsWritable(OUTPUT);

        final VCFFileReader in = new VCFFileReader(INPUT);
        final VCFHeader header = in.getFileHeader();

        if (header.getGenotypeSamples().size() > 1) {
            throw new IllegalArgumentException("Input VCF must be single-sample.");
        }

        if (OLD_SAMPLE_NAME != null && !OLD_SAMPLE_NAME.equals(header.getGenotypeSamples().get(0))) {
            throw new IllegalArgumentException("Input VCF did not contain expected sample. Contained: " + header.getGenotypeSamples().get(0));
        }

        final EnumSet<Options> options = EnumSet.copyOf(VariantContextWriterFactory.DEFAULT_OPTIONS);
        if (CREATE_INDEX) options.add(Options.INDEX_ON_THE_FLY); else options.remove(Options.INDEX_ON_THE_FLY);

        final VCFHeader outHeader = new VCFHeader(header.getMetaDataInInputOrder(), CollectionUtil.makeList(NEW_SAMPLE_NAME));
        final VariantContextWriter out = VariantContextWriterFactory.create(OUTPUT, outHeader.getSequenceDictionary(), options);
        out.writeHeader(outHeader);

        for (final VariantContext ctx : in) {
            out.add(ctx);
        }

        out.close();
        in.close();

        return 0;
    }
View Full Code Here

        IOUtil.assertFileIsReadable(INPUT);
        IOUtil.assertFileIsWritable(OUTPUT);

        final List<VariantFilter>  variantFilters = CollectionUtil.makeList(new AlleleBalanceFilter(MIN_AB), new FisherStrandFilter(MAX_FS), new QdFilter(MIN_QD));
        final List<GenotypeFilter> genotypeFilters = CollectionUtil.makeList(new GenotypeQualityFilter(MIN_GQ), new DepthFilter(MIN_DP));
        final VCFFileReader in = new VCFFileReader(INPUT, false);
        final FilterApplyingVariantIterator iterator = new FilterApplyingVariantIterator(in.iterator(), variantFilters, genotypeFilters);

        final VariantContextWriter out = new VariantContextWriterBuilder().setOutputFile(OUTPUT).build();
        final VCFHeader header = in.getFileHeader();
        header.addMetaDataLine(new VCFFilterHeaderLine("AllGtsFiltered", "Site filtered out because all genotypes are filtered out."));
        header.addMetaDataLine(new VCFFormatHeaderLine("FT", VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, "Genotype filters."));
        for (final VariantFilter filter : variantFilters) {
            for (final VCFFilterHeaderLine line : filter.headerLines()) {
                header.addMetaDataLine(line);
            }
        }

        out.writeHeader(in.getFileHeader());

        while (iterator.hasNext()) {
            out.add(iterator.next());
        }

        out.close();
        in.close();
        return 0;
    }
View Full Code Here

                intervals = intervals.uniqued();
            }
            log.info("Finished loading up region lists.");
        }

        final VCFFileReader truthReader = new VCFFileReader(TRUTH_VCF, USE_VCF_INDEX);
        final VCFFileReader callReader = new VCFFileReader(CALL_VCF, USE_VCF_INDEX);

        // Check that the samples actually exist in the files!
        if (!truthReader.getFileHeader().getGenotypeSamples().contains(TRUTH_SAMPLE)) {
            throw new PicardException("File " + TRUTH_VCF.getAbsolutePath() + " does not contain genotypes for sample " + TRUTH_SAMPLE);
        }
        if (!callReader.getFileHeader().getGenotypeSamples().contains(CALL_SAMPLE)) {
            throw new PicardException("File " + CALL_VCF.getAbsolutePath() + " does not contain genotypes for sample " + CALL_SAMPLE);
        }

        // Verify that both VCFs have the same Sequence Dictionary
        SequenceUtil.assertSequenceDictionariesEqual(truthReader.getFileHeader().getSequenceDictionary(), callReader.getFileHeader().getSequenceDictionary());

        if (usingIntervals) {
            // If using intervals, verify that the sequence dictionaries agree with those of the VCFs
            SequenceUtil.assertSequenceDictionariesEqual(intervalsSamSequenceDictionary, truthReader.getFileHeader().getSequenceDictionary());
        }

        // Build the pair of iterators over the regions of interest
        final Iterator<VariantContext> truthIterator, callIterator;
        if (usingIntervals) {
            truthIterator = new ByIntervalListVariantContextIterator(truthReader, intervals);
            callIterator = new ByIntervalListVariantContextIterator(callReader, intervals);
        }
        else {
            truthIterator = truthReader.iterator();
            callIterator = callReader.iterator();
        }

        // Now do the iteration and count things up
        final PairedVariantSubContextIterator pairedIterator = new PairedVariantSubContextIterator(truthIterator, TRUTH_SAMPLE, callIterator, CALL_SAMPLE, truthReader.getFileHeader().getSequenceDictionary());
        snpCounter   = new GenotypeConcordanceCounts();
View Full Code Here

  @Override
  protected int doWork() {
    IOUtil.assertFileIsReadable(INPUT);
    final ProgressLogger progress = new ProgressLogger(log, 10000);

    final VCFFileReader fileReader = new VCFFileReader(INPUT);
    final VCFHeader fileHeader = fileReader.getFileHeader();

    final SAMSequenceDictionary sequenceDictionary =
        SEQUENCE_DICTIONARY != null
            ? SAMFileReader.getSequenceDictionary(SEQUENCE_DICTIONARY)
            : fileHeader.getSequenceDictionary();
    if (CREATE_INDEX && sequenceDictionary == null) {
      throw new PicardException("A sequence dictionary must be available (either through the input file or by setting it explicitly) when creating indexed output.");
    }

        final VariantContextWriterBuilder builder = new VariantContextWriterBuilder()
                .setReferenceDictionary(sequenceDictionary)
                .clearOptions();
        if (CREATE_INDEX)
            builder.setOption(Options.INDEX_ON_THE_FLY);

    final VariantContextWriter snpWriter = builder.setOutputFile(SNP_OUTPUT).build();
    final VariantContextWriter indelWriter = builder.setOutputFile(INDEL_OUTPUT).build();
    snpWriter.writeHeader(fileHeader);
    indelWriter.writeHeader(fileHeader);

        int incorrectVariantCount = 0;

    final CloseableIterator<VariantContext> iterator = fileReader.iterator();
    while (iterator.hasNext()) {
      final VariantContext context = iterator.next();
      if (context.isIndel()) indelWriter.add(context);
      else if (context.isSNP()) snpWriter.add(context);
      else {
View Full Code Here

    private final File INPUT = new File("testdata/picard/vcf/filter/testFiltering.vcf");

    /** Tests that all records get PASS set as their filter when extreme values are used for filtering. */
    @Test public void testNoFiltering() throws Exception {
        final File out = testFiltering(INPUT, 0, 0, 0, Double.MAX_VALUE);
        final VCFFileReader in = new VCFFileReader(out, false);
        for (final VariantContext ctx : in) {
            if (!ctx.filtersWereApplied() || ctx.isFiltered()) {
                Assert.fail("Context should not have been filtered: " + ctx.toString());
            }
        }
View Full Code Here

TOP

Related Classes of htsjdk.variant.vcf.VCFFileReader

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.