Package abra.cadabra

Source Code of abra.cadabra.SomaticLocusCaller

package abra.cadabra;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

import net.sf.samtools.CigarElement;
import net.sf.samtools.SAMFileReader;
import net.sf.samtools.SAMFileReader.ValidationStringency;
import net.sf.samtools.SAMRecord;
import net.sf.samtools.util.CloseableIterator;

import abra.CompareToReference2;

/**
* Given a "VCF-like" file of variants to inspect, produces an output file with normal / tumor counts of each variant.
* SNVs require the alt value to match to be counted.  Indels merely require the existence of an indel at the start postion.
*
* @author Lisle E. Mose (lmose at unc dot edu)
*/
public class SomaticLocusCaller {
 
  private List<LocusInfo> loci = new ArrayList<LocusInfo>();
  private CompareToReference2 c2r;
  private int minBaseQual;

  public void call(String normal, String tumor, String vcf, String reference, int minBaseQual) throws IOException {
    loadLoci(vcf);
    c2r = new CompareToReference2();
    c2r.init(reference);
    this.minBaseQual = minBaseQual;
   
    System.err.println("Processing positions");

    SAMFileReader normalReader = new SAMFileReader(new File(normal));
    normalReader.setValidationStringency(ValidationStringency.SILENT);
   
    SAMFileReader tumorReader = new SAMFileReader(new File(tumor));
    normalReader.setValidationStringency(ValidationStringency.SILENT);
       
    for (LocusInfo locus : loci) {
      locus.normalCounts = getCounts(normalReader, locus);
      locus.tumorCounts = getCounts(tumorReader, locus);
    }
   
    normalReader.close();
    tumorReader.close();
   
    System.err.println("Writing results");
   
    outputResults();
  }
 
  private void outputResults() {
    System.out.println("##fileformat=VCFv4.1");
    System.out.println("#CHROM  POS  ID  REF  ALT  QUAL  FILTER  INFO  FORMAT  NORMAL  TUMOR");
   
    for (LocusInfo locus : loci) {
      String[] fields = new String[] {
        locus.chromosome,
        locus.isIndel() ? String.valueOf(locus.posStart+1) : String.valueOf(locus.posStart),
        locus.id,
        locus.ref,
        locus.alt,
        "0",
        getFilter(locus),
        "SOMATIC;",
        "DP:RO:AO",
        getCountsStr(locus.normalCounts),
        getCountsStr(locus.tumorCounts)
      };
     
      StringBuffer buf = new StringBuffer();
       
      for (String field : fields) {
        buf.append(field);
        buf.append('\t');
      }

      buf.deleteCharAt(buf.length()-1);
     
      System.out.println(buf.toString());
    }
  }
 
  private String getCountsStr(Counts counts) {
    return "" + counts.depth + ":" + counts.refCount + ":" + counts.altCount;
  }
 
  private String getFilter(LocusInfo locus) {
    String filter = "";
    if (locus.tumorCounts.altCount == 0) {
      filter = "NO_TUMOR_OBS;";
    }
   
    if (locus.normalCounts.altCount > 0) {
      filter += "NORMAL_OBS;";
    }
   
    if (locus.tumorCounts.depth == 0) {
      filter += "NO_TUMOR_COV;";
    }
   
    if (locus.normalCounts.depth == 0) {
      filter += "NO_NORMAL_COV;";
    }
   
    if (filter.equals("")) {
      filter = "PASS;";
    }
   
    return filter;
  }
 
  private boolean isWithin(int i, int start, int stop) {
    return i >= start && i <= stop;
  }
 
  private boolean hasIndel(SAMRecord read, LocusInfo locus) {
    int readPos = 0;
    int refPosInRead = read.getAlignmentStart();
    int cigarElementIdx = 0;
   
    while (refPosInRead <= locus.posStop && cigarElementIdx < read.getCigar().numCigarElements() && readPos < read.getReadLength()) {
      CigarElement elem = read.getCigar().getCigarElement(cigarElementIdx++);
     
      switch(elem.getOperator()) {
        case H: //NOOP
          break;
        case I:
          if (isWithin(refPosInRead, locus.posStart, locus.posStop)) {
            return true;
          }
          // Fall through to next case
        case S:
          readPos += elem.getLength();
          break;
        case D:
          if (isWithin(refPosInRead, locus.posStart, locus.posStop)) {
            return true;
          }
          // Fall through to next case
        case N:
          refPosInRead += elem.getLength();
          break;
        case M:
          readPos += elem.getLength();
          refPosInRead += elem.getLength();
          break;
        default:
          throw new IllegalArgumentException("Invalid Cigar Operator: " + elem.getOperator() + " for read: " + read.getSAMString());         
      }
    }
   
    return false;
  }
 
  private Object[] getBaseAndQualAtPosition(SAMRecord read, int refPos) {
    int readPos = 0;
    int refPosInRead = read.getAlignmentStart();
    int cigarElementIdx = 0;
   
    while (refPosInRead <= refPos && cigarElementIdx < read.getCigar().numCigarElements() && readPos < read.getReadLength()) {
      CigarElement elem = read.getCigar().getCigarElement(cigarElementIdx++);
     
      switch(elem.getOperator()) {
        case H: //NOOP
          break;
        case S:
        case I:
          readPos += elem.getLength();
          break;
        case D:
        case N:
          refPosInRead += elem.getLength();
          break;
        case M:
          if (refPos < (refPosInRead + elem.getLength())) {
            readPos += refPos - refPosInRead;
            if (readPos < read.getReadLength()) {
              // Found the base.  Return it
              return new Object[] { read.getReadString().charAt(readPos) , (int) read.getBaseQualities()[readPos] };
            }
          } else {
            readPos += elem.getLength();
            refPosInRead += elem.getLength();
          }
          break;
        default:
          throw new IllegalArgumentException("Invalid Cigar Operator: " + elem.getOperator() + " for read: " + read.getSAMString());         
      }
    }
   
    return new Object[] { 'N', (int) 0 };
  }

  private Counts getCounts(SAMFileReader reader, LocusInfo locus) {
   
    int depth = 0;
    int altCount = 0;
    int refCount = 0;
   
    CloseableIterator<SAMRecord> iter = reader.queryOverlapping(locus.chromosome, locus.posStart, locus.posStop);
    while (iter.hasNext()) {
      SAMRecord read = iter.next();
      if (!read.getDuplicateReadFlag()) {
        depth += 1;
       
        Object[] baseAndQual = getBaseAndQualAtPosition(read, locus.posStart);
        Character base = (Character) baseAndQual[0];
        int baseQual = (Integer) baseAndQual[1];
        Character refBase = c2r.getSequence(locus.chromosome, locus.posStart, 1).charAt(0);
       
        // Override input with actual reference
        locus.ref = new String(new char[] { refBase });
       
        if (locus.isIndel()) {
          if (hasIndel(read, locus)) {
            altCount += 1;
          } else if (!base.equals('N') && base.equals(refBase)) {
            refCount += 1;
          }
        } else if (baseQual >= minBaseQual) {
         
          if (!base.equals('N') && base.equals(refBase)) {
            refCount += 1;
          } else if (base.equals(locus.alt.charAt(0))) {
            altCount += 1;
          }
        }
      }
    }
   
    iter.close();
   
    return new Counts(refCount, altCount, depth);
  }
 
  private void loadLoci(String vcf) throws IOException {
    BufferedReader reader = new BufferedReader(new FileReader(vcf));
   
    String line = reader.readLine();
    int count = 0;
    while (line != null) {
      if (!line.startsWith("#")) {
        LocusInfo locus = new LocusInfo(line);
        loci.add(locus);
        count += 1;
      }
     
      line = reader.readLine();
    }
   
    System.err.println("Loaded [" + count + "] loci for inspection");
   
    reader.close();
  }
 
  static class LocusInfo {
    String id;
    String chromosome;
    int posStart;
    int posStop;
    String ref;
    String alt;
    Counts normalCounts;
    Counts tumorCounts;
   
    LocusInfo(String vcfLine) {
      String[] fields = vcfLine.split("\\s+");
      chromosome = fields[0];
     
      id = fields[2];
      ref = fields[3];
      alt = fields[4];
     
      if (fields[1].contains("-")) {
        String[] positions = fields[1].split("-");
        posStart = Integer.parseInt(positions[0]);
        posStop = Integer.parseInt(positions[1]);
      } else {
        posStart = Integer.parseInt(fields[1]);
        posStop = posStart;
        if (isIndel()) {
          posStop += 1;
        }
      }
     
      if (isIndel()) {
        posStart -= 1;
      }
    }
   
    boolean isIndel() {
      return alt.length() != ref.length();
    }
  }
 
  static class Counts {
    int refCount;
    int altCount;
    int depth;
   
    Counts(int refCount, int altCount, int depth) {
      this.refCount = refCount;
      this.altCount = altCount;
      this.depth = depth;
    }
  }
 
  public static void main(String[] args) throws Exception {
   
   
    String normal = args[0];
    String tumor = args[1];
    String vcf = args[2];
    String reference = args[3];
    int minBaseQual = Integer.parseInt(args[4]);
   
   
    /*
    String normal = "/home/lmose/dev/uncseq/oncomap/normal_test.bam";
    String tumor = "/home/lmose/dev/uncseq/oncomap/tumor_test.bam";
    String vcf = "/home/lmose/dev/uncseq/oncomap/test.vcf";
    String reference = "/home/lmose/reference/chr7/chr7.fa";
    int minBaseQual = 20;
    */

    SomaticLocusCaller caller = new SomaticLocusCaller();
    caller.call(normal, tumor, vcf, reference, minBaseQual);
  }
}
TOP

Related Classes of abra.cadabra.SomaticLocusCaller

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.