Package org.broad.igv.variant.vcf

Source Code of org.broad.igv.variant.vcf.VCFVariant$ZygosityCount

/*
* Copyright (c) 2007-2012 The Broad Institute, Inc.
* SOFTWARE COPYRIGHT NOTICE
* This software and its documentation are the copyright of the Broad Institute, Inc. All rights are reserved.
*
* This software is supplied without any warranty or guaranteed support whatsoever. The Broad Institute is not responsible for its use, misuse, or functionality.
*
* This software is licensed under the terms of the GNU Lesser General Public License (LGPL),
* Version 2.1 which is available at http://www.opensource.org/licenses/lgpl-2.1.php.
*/

package org.broad.igv.variant.vcf;

import org.apache.log4j.Logger;
import org.broad.igv.variant.Allele;
import org.broad.igv.variant.Genotype;
import org.broad.igv.variant.Variant;
import org.broad.igv.variant.VariantTrack;
import htsjdk.variant.variantcontext.VariantContext;
import htsjdk.variant.variantcontext.VariantContextBuilder;

import java.util.*;

/**
* @author Jim Robinson, jacob
* @date Aug 1, 2011
*/
public class VCFVariant implements Variant {

    private static Logger log = Logger.getLogger(Variant.class);

    VariantContext variantContext;
    List<Allele> alternateAlleles;
    private ZygosityCount zygosityCount;

    String chr;
    private double[] alleleFreqs;
    private double methylationRate = Double.NaN;  // <= signals unknown / not applicable
    private double coveredSampleFraction = Double.NaN;

    Map<String, VCFGenotype> genotypeMap;

    private int start = -1;

    public VCFVariant(VariantContext variantContext, String chr) {
        this.variantContext = variantContext;
        this.chr = chr;
        init();
    }

    private void init() {

        // Copy the genotype map.  Calls to variantContext.getGenotype() are expensive
        genotypeMap = new HashMap<String, VCFGenotype>();
        for (String sample : getSampleNames()) {
            htsjdk.variant.variantcontext.Genotype genotype = variantContext.getGenotype(sample);
            VCFGenotype vcfGenotype = genotype == null ? null : new VCFGenotype(genotype);
            genotypeMap.put(sample, vcfGenotype);
        }

        zygosityCount = new ZygosityCount();
        for (String sample : getSampleNames()) {
            Genotype genotype = getGenotype(sample);
            zygosityCount.incrementCount(genotype);
        }

        String afString = null;
        String[] alleleFreqKeys = {"AF", "GMAF"};
        try {
            for (String alleleFreqKey : alleleFreqKeys) {
                afString = variantContext.getAttributeAsString(alleleFreqKey, "-1");
                alleleFreqs = parseAFString(afString);
                if (alleleFreqs[0] >= 0) break;
            }
        } catch (NumberFormatException e) {
            alleleFreqs = new double[]{-1};
            log.error("Error parsing allele frequency: " + afString);
        }

    }

    /**
     * Allele frequency is a comma separated list of doubles
     * We strip away brackets and parentheses
     *
     * @param afString
     * @return
     */
    private double[] parseAFString(String afString) {
        afString = afString.replaceAll("[\\[\\]\\(\\)]", "");
        String[] tokens = afString.split(",");
        double[] result = new double[tokens.length];
        for (int ii = 0; ii < tokens.length; ii++) {
            result[ii] = Double.parseDouble(tokens[ii]);
        }
        return result;
    }

    /**
     * Compute the average methylation rate for those samples with data (i.e. with methylation rate recorded).
     */
    private void computeMethylationRate() {

        double methTotal = 0;
        int samplesWithData = 0;
        final int size = getSampleNames().size();
        if (size > 0) {
            for (String sample : getSampleNames()) {
                Genotype genotype = getGenotype(sample);
                double mr = genotype.getAttributeAsDouble("MR");
                double goodBaseCount = genotype.getAttributeAsDouble("MR");
                if (!Double.isNaN(mr) && !Double.isNaN(goodBaseCount) && goodBaseCount > VariantTrack.METHYLATION_MIN_BASE_COUNT) {
                    methTotal += mr;
                    samplesWithData++;
                }
            }
            methylationRate = samplesWithData == 0 ? 0 : methTotal / samplesWithData;
            coveredSampleFraction = ((double) samplesWithData) / size;
        }
    }


    public String getID() {
        return variantContext.getID();
    }

    public boolean isFiltered() {
        return variantContext.isFiltered();
    }

    public String getAttributeAsString(String key) {
        return variantContext.getAttributeAsString(key, null);
    }

    public String getReference() {
        return variantContext.getReference().toString();
    }

    public List<Allele> getAlternateAlleles() {
        if (alternateAlleles == null) {
            List<htsjdk.variant.variantcontext.Allele> tmp = variantContext.getAlternateAlleles();
            alternateAlleles = new ArrayList<Allele>(tmp.size());
            for (htsjdk.variant.variantcontext.Allele a : tmp) {
                alternateAlleles.add(new VCFAllele(a.getBases()));
            }
        }
        return alternateAlleles;
    }

    public double getPhredScaledQual() {
        return variantContext.getPhredScaledQual();
    }

    public String getType() {
        return variantContext.getType().toString();
    }


    /**
     * Return the allele frequency as annotated with an AF or GMAF attribute.  A value of -1 indicates
     * no annotation (unknown allele frequency).
     */
    public double[] getAlleleFreqs() {
        return alleleFreqs;
    }

    /**
     * Return the allele fraction for this variant.  The allele fraction is similiar to allele frequency, but is based
     * on the samples in this VCF as opposed to an AF or GMAF annotation.
     * <p/>
     * A value of -1 indicates unknown
     */
    public double getAlleleFraction() {

        int total = getHomVarCount() + getHetCount() + getHomRefCount();
        return total == 0 ? -1 : (((double) getHomVarCount() + ((double) getHetCount()) / 2) / total);
    }

    /**
     * Return the methylation rate as annoted with a MR attribute.  A value of -1 indicates
     * no annotation (unknown methylation rate).  This option is only applicable for dna methylation data.
     */
    public double getMethlationRate() {
        if (Double.isNaN(methylationRate)) {
            computeMethylationRate();
        }
        return methylationRate;
    }

    public double getCoveredSampleFraction() {
        if (Double.isNaN(coveredSampleFraction)) {
            computeMethylationRate();
        }
        return coveredSampleFraction;
    }

    public Collection<String> getSampleNames() {
        return variantContext.getSampleNames();
    }

    public Map<String, Object> getAttributes() {
        return variantContext.getAttributes();
    }

    @Override
    public Genotype getGenotype(String sample) {
        return genotypeMap.get(sample);
    }

    public Collection<String> getFilters() {
        return variantContext.getFilters();
    }

    @Override
    public int getHomVarCount() {
        return zygosityCount.getHomVar();
    }

    @Override
    public int getHetCount() {
        return zygosityCount.getHet();
    }

    @Override
    public int getHomRefCount() {
        return zygosityCount.getHomRef();
    }

    @Override
    public int getNoCallCount() {
        return zygosityCount.getNoCall();
    }

    @Override
    public String getChr() {
        return chr;
    }

    @Override
    public int getStart() {
        if(this.start < 0){
            calcStart();
        }
        return this.start;
    }

    @Override
    public int getEnd() {
        return variantContext.getEnd();
    }

    @Override
    public String toString() {
        return String.format("VCFVariant[%s:%d-%d]", getChr(), getStart(), getEnd());
    }

    @Override
    public String getPositionString() {
        if (variantContext.getStart() == variantContext.getEnd()) {
            return String.valueOf(variantContext.getStart());
        } else {
            return String.format("%d-%d", variantContext.getStart(), variantContext.getEnd());
        }

    }

    public String getSource(){
        return variantContext.getSource();
    }

    public VariantContext getVariantContext() {
        return variantContext;
    }

    public static VariantContext getVariantContext(Variant variant) {
        if (variant instanceof VCFVariant) {
            return ((VCFVariant) variant).getVariantContext();
        } else {
            List<htsjdk.variant.variantcontext.Allele> alleleList = new ArrayList<htsjdk.variant.variantcontext.Allele>(variant.getAlternateAlleles().size() + 1);
            alleleList.add(htsjdk.variant.variantcontext.Allele.create(variant.getReference(), true));
            for (Allele all : variant.getAlternateAlleles()) {
                alleleList.add(htsjdk.variant.variantcontext.Allele.create(all.getBases(), false));
            }
            VariantContextBuilder vcb = new VariantContextBuilder(variant.getID(), variant.getChr(), variant.getStart(), variant.getEnd(), alleleList);
            return vcb.make();
        }
    }

    /**
     * @author Jim Robinson
     * @date Aug 1, 2011
     */
    public static class ZygosityCount {
        private int homVar = 0;
        private int het = 0;
        private int homRef = 0;
        private int noCall = 0;

        public void incrementCount(Genotype genotype) {
            if (genotype != null) {
                if (genotype.isHomVar()) {
                    homVar++;
                } else if (genotype.isHet()) {
                    het++;
                } else if (genotype.isHomRef()) {
                    homRef++;
                } else {
                    noCall++;
                }
            }
        }

        public int getHomVar() {
            return homVar;
        }

        public int getHet() {
            return het;
        }

        public int getHomRef() {
            return homRef;
        }

        public int getNoCall() {
            return noCall;
        }

    }

    /**
     * VCFs specify padding bases at the beginning of indels so they can be positioned properly.
     * We display the variant only where it actually differs from the reference. So we find the longest
     * common prefix between the reference and variants
     */
    private void calcStart(){
        int prefixLength = 0;

        if(variantContext.getType() == VariantContext.Type.INDEL || variantContext.getType() == VariantContext.Type.MIXED){
            prefixLength = findCommonPrefixLength();
        }
        this.start = (variantContext.getStart() - 1) + prefixLength;
    }

    /**
     * Find the length of the common prefix between the reference and ALL
     * variant alleles
     * @return
     */
    private int findCommonPrefixLength(){
        String ref = variantContext.getReference().getDisplayString();
        int prefixLength = 0;
        boolean foundmisMatch = false;
        for(int refPos = 0; refPos < ref.length() ; refPos++){
            char refChar = ref.charAt(refPos);
            for(Allele var: getAlternateAlleles()){
                byte[] varBases = var.getBases();
                if(refPos >= varBases.length || varBases[refPos] != refChar){
                    foundmisMatch = true;
                    break;
                }
            }
            if(foundmisMatch){
                break;
            }else{
                prefixLength++;
            }
        }
        return prefixLength;
    }
}
TOP

Related Classes of org.broad.igv.variant.vcf.VCFVariant$ZygosityCount

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.