Package abra.cadabra

Source Code of abra.cadabra.SimpleCaller$BaseInfo

package abra.cadabra;

import java.text.DecimalFormat;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;

import net.sf.samtools.Cigar;
import net.sf.samtools.CigarElement;
import net.sf.samtools.CigarOperator;
import net.sf.samtools.SAMRecord;

import abra.CompareToReference2;

public class SimpleCaller {
 
  private ReadLocusReader sample;
  private CompareToReference2 c2r;
  private DecimalFormat df = new DecimalFormat("0.000");
 
  private int minAltObs;
  private float minAltFraction;
  private int minMapq;
  private int minDistanceFromIndel;
 
  private List<CachedCall> callCache = new ArrayList<CachedCall>();
 
  private static int MIN_BASE_QUALITY = 20;

  public void call(String reference, String bam, float minAltFraction, int minAltObs, int minMapq, int minDistanceFromIndel) throws Exception {
    this.minAltFraction = minAltFraction;
    this.minAltObs = minAltObs;
    this.minMapq = minMapq;
    this.minDistanceFromIndel = minDistanceFromIndel;
   
    c2r = new CompareToReference2();
    c2r.init(reference);
   
    outputHeader();
   
    this.sample = new ReadLocusReader(bam);
   
    String lastChromosome = "";
   
    int lastIndelPos = -1;
   
    Iterator<ReadsAtLocus> sampleIter = sample.iterator();
   
    while (sampleIter.hasNext()) {
      ReadsAtLocus reads = sampleIter.next();
     
      if (!reads.getChromosome().equals(lastChromosome)) {
        System.err.println("Processing chromosome: " + reads.getChromosome());
        lastChromosome = reads.getChromosome();
        lastIndelPos = -1;
        flushCache();
      }
     
      int a = 0;
      int c = 0;
      int t = 0;
      int g = 0;
      int n = 0;
     
      int aAtEdge = 0;
      int cAtEdge = 0;
      int tAtEdge = 0;
      int gAtEdge = 0;
     
      if (!c2r.containsChromosome(reads.getChromosome())) {
        System.err.println("Chromosome: [" + reads.getChromosome() + "] not in reference.  Assuming we've reached unaligned pile and stopping.");
        break;
      }
     
      char ref = c2r.containsChromosome(reads.getChromosome()) ? Character.toUpperCase(c2r.getSequence(reads.getChromosome(), reads.getPosition(), 1).charAt(0)) : 'N';
     
      if (ref != 'N') {
     
//        if (reads.getPosition() == 3193842) {
//          System.out.println("yo.");
//        }
       
        int numIndels = 0;
       
        for (SAMRecord read : reads.getReads()) {
         
          if (read.getMappingQuality() >= this.minMapq && !read.getReadUnmappedFlag()) {
         
            BaseInfo baseInfo = getBaseAtReferencePosition(read, reads.getPosition());
           
            switch(baseInfo.base) {
              case 'A':
                a++;
                if (baseInfo.isNearEdge) {
                  aAtEdge++;
                }
                break;
              case 'C':
                c++;
                if (baseInfo.isNearEdge) {
                  cAtEdge++;
                }
                break;
              case 'T':
                t++;
                if (baseInfo.isNearEdge) {
                  tAtEdge++;
                }
                break;
              case 'G':
                g++;
                if (baseInfo.isNearEdge) {
                  gAtEdge++;
                }
                break;
              default:
                n++;
                break;
            }
           
            if (baseInfo.isIndel) {
              numIndels++;
            }
          }
        }
       
        if ((float) numIndels / (float) reads.getReads().size() > this.minAltFraction) {
          // There is indel support at this locus.  Track it so we can filter nearby SNPs.
          lastIndelPos = reads.getPosition();
        }
       
        char[] bases = { 'A','C','T','G'};
        int[] counts = {a,c,t,g};
        int[] edgeCounts = { aAtEdge, cAtEdge, tAtEdge, gAtEdge };
       
        CallInfo callInfo = getAltBaseAndCounts(bases, counts, edgeCounts, ref, reads.getReads().size());
       
        // Require N number of alt obs not near edge of M block
        if ((callInfo.altCount - callInfo.altEdgeCount) > this.minAltObs && callInfo.altCount > 0) {
          output(reads.getChromosome(), reads.getPosition(), callInfo, lastIndelPos);
        }
      }
     
      checkCache(reads.getPosition(), lastIndelPos);
    }
   
    flushCache();
    System.err.println("Done.");
  }
 
  private void outputHeader() {
    System.out.println("##fileformat=VCFv4.1");
    System.out.println("##FORMAT=<ID=DP1,Number=1,Type=String,Description=\"Total read depth\">");
    System.out.println("##FORMAT=<ID=DP2,Number=1,Type=String,Description=\"Ref obs + Alt obs\">");
    System.out.println("##FORMAT=<ID=AO,Number=1,Type=String,Description=\"Alt observations\">");
    System.out.println("##FORMAT=<ID=RO,Number=1,Type=String,Description=\"Ref observations\">");
    System.out.println("##FORMAT=<ID=AF1,Number=1,Type=String,Description=\"Allele Frequency based upon DP1\">");
    System.out.println("##FORMAT=<ID=AF2,Number=1,Type=String,Description=\"Allele Frequency based upon DP2\">");
    System.out.println("#CHROM  POS  ID  REF  ALT  QUAL  FILTER  INFO  FORMAT  SAMPLE");
  }
 
  private void output(String chromosome, int position, CallInfo callInfo, int lastIndelPos) {
    if (position >= lastIndelPos+minDistanceFromIndel) {
      this.callCache.add(new CachedCall(chromosome, position, callInfo));
    }
  }
 
  private void checkCache(int currPosition, int lastIndelPos) {
    Iterator<CachedCall> iter = this.callCache.iterator();
    while (iter.hasNext()) {
      CachedCall call = iter.next();
      if (Math.abs(call.position - lastIndelPos) < minDistanceFromIndel) {
        // Too close to indel.  Discard call.
        iter.remove();
      } else if (currPosition >= call.position + minDistanceFromIndel) {
        // We have progressed minDistanceFromIndel positions away from the call.  Safe to output
        // Output the call and remove from cache
        write(call.chromosome, call.position, call.callInfo);
        iter.remove();
      }
    }
  }
 
  private void flushCache() {
    for (CachedCall call : this.callCache) {
      write(call.chromosome, call.position, call.callInfo);
    }
   
    this.callCache.clear();
  }
   
  private void write(String chromosome, int position, CallInfo callInfo) {
   
    StringBuffer call = new StringBuffer();
    call.append(chromosome);
    call.append('\t');
    call.append(position);
    call.append("\t.\t");
    call.append(callInfo.ref);
    call.append('\t');
    call.append(callInfo.alt);
    call.append("\t.\t.\tFOO=BAR;\tDP1:DP2:RO:AO:AF1:AF2\t");
   
    int depth1 = callInfo.totalDepth;
    int depth2 = callInfo.altCount + callInfo.refCount;
    float vaf1 = (float) callInfo.altCount / (float) callInfo.totalDepth;
    float vaf2 = (float) callInfo.altCount / (float) depth2;
   
    if (vaf2 > minAltFraction) {
      String vaf1Str = df.format(vaf1);
      String vaf2Str = df.format(vaf2);
     
      call.append(depth1);
      call.append(':');
      call.append(depth2);
      call.append(':');
      call.append(callInfo.refCount);
      call.append(':');
      call.append(callInfo.altCount);
      call.append(':');
      call.append(vaf1Str);
      call.append(':');
      call.append(vaf2Str);
     
      System.out.println(call.toString());
    }
  }
 
  private CallInfo getAltBaseAndCounts(char[] bases, int[] counts, int[] edgeCounts, char ref, int totalDepth) {
    int refCount = 0;
    int altCount = 0;
    int altEdgeCount = 0;
    char alt = 'N';
   
    for (int i=0; i<bases.length; i++) {
      if (bases[i] == ref) {
        refCount = counts[i];
      } else {
        if (counts[i] > altCount) {
          alt = bases[i];
          altCount = counts[i];
          altEdgeCount = edgeCounts[i];
        }
      }
    }
   
    return new CallInfo(ref, refCount, alt, altCount, altEdgeCount, totalDepth);
  }
 
  private BaseInfo getBaseAtReferencePosition(SAMRecord read, int refPos) {
    boolean isNearEdge = false;
    boolean isIndel = false;
    int alignmentStart = read.getAlignmentStart();
    Cigar cigar = read.getCigar();
   
    char base = 'N';
   
    int readIdx = 0;
    int currRefPos = alignmentStart;
   
    for (CigarElement element : cigar.getCigarElements()) {
           
      if (element.getOperator() == CigarOperator.M) {
        readIdx += element.getLength();
        currRefPos += element.getLength();
       
        if (currRefPos > refPos) {  // TODO: double check end of read base here...
         
          int offset = currRefPos - refPos;
         
          if ((offset < 3) || (offset+3 >= element.getLength())) {
            // This position is within 3 bases of start/end of alignment or clipping or indel.
            // Tag this base for evaluation downstream.
            isNearEdge = true;
          }
         
          readIdx -= offset;
          if ((readIdx < 0) || (readIdx >= read.getReadBases().length)) {
            System.err.println("Read index out of bounds for read: " + read.getSAMString());
            break;
          }
         
          if (read.getBaseQualities()[readIdx] >= MIN_BASE_QUALITY) {
            base = (char) read.getReadBases()[readIdx];
          }
          break;
        }
      } else if (element.getOperator() == CigarOperator.I) {
        if (currRefPos == refPos) {
          //TODO: Handle insertions
          isIndel = true;
          break;
        }
        readIdx += element.getLength();
      } else if (element.getOperator() == CigarOperator.D) {
        if (refPos >= currRefPos && refPos <= currRefPos+element.getLength()) {
          //TODO: Handle deletions
          isIndel = true;
          break;
        }       
        currRefPos += element.getLength();
      } else if (element.getOperator() == CigarOperator.S) {
        readIdx += element.getLength();
      }     
    }
   
    return new BaseInfo(Character.toUpperCase(base), isNearEdge, isIndel);
  }
 
  static class CallInfo {
    char ref;
    int refCount;
    char alt;
    int altCount;
    int totalDepth;
    int altEdgeCount;
   
    CallInfo(char ref, int refCount, char alt, int altCount, int altEdgeCount, int totalDepth) {
      this.ref = ref;
      this.refCount = refCount;
      this.alt = alt;
      this.altCount = altCount;
      this.altEdgeCount = altEdgeCount;
      this.totalDepth = totalDepth;
    }
  }
 
  static class BaseInfo {
    char base;
    boolean isNearEdge;
    boolean isIndel;
   
    BaseInfo(char base, boolean isNearEdge, boolean isIndel) {
      this.base = base;
      this.isNearEdge = isNearEdge;
      this.isIndel = isIndel;
    }
  }
 
  static class CachedCall {
    String chromosome;
    int position;
    CallInfo callInfo;
   
    CachedCall(String chromosome, int position, CallInfo callInfo) {
      this.chromosome = chromosome;
      this.position = position;
      this.callInfo = callInfo;
    }
  }
 
  public static void main(String[] args) throws Exception {
    SimpleCaller c = new SimpleCaller();
   
    String reference = args[0];
    String bam = args[1];
    float minAllelicFraction = Float.parseFloat(args[2]);
    int minAltObs = Integer.parseInt(args[3]);
    int minMapq = Integer.parseInt(args[4]);
    int minDistanceFromIndel = Integer.parseInt(args[5]);
 
    c.call(reference, bam, minAllelicFraction, minAltObs, minMapq, minDistanceFromIndel);
   
//    c.call("/home/lmose/reference/chr20/chr20.fa", "/home/lmose/dev/efseq/piotr_test1/calling/k101.sscs.chr20.bam", .003F, 2, 40, 50);
//    c.call("/home/lmose/reference/chr21/chr21.fa", "/home/lmose/dev/efseq/piotr_test1/calling/tiny21.bam", .003F, 2, 40, 50);
   
//    c.call("/home/lmose/reference/chr20/chr20.fa", "/home/lmose/dev/efseq/piotr_test1/calling/k101.chr20.bam", .003F, 2, 40, 50);
   
  }
}
TOP

Related Classes of abra.cadabra.SimpleCaller$BaseInfo

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.