Package net.sf.cram

Source Code of net.sf.cram.Cram2Bam$Params

package net.sf.cram;

import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
import java.io.EOFException;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.PrintStream;
import java.util.ArrayList;
import java.util.LinkedList;
import java.util.List;
import java.util.zip.GZIPInputStream;
import java.util.zip.GZIPOutputStream;

import net.sf.cram.CramTools.LevelConverter;
import net.sf.cram.ReadWrite.CramHeader;
import net.sf.cram.index.CramIndex;
import net.sf.cram.index.CramIndex.Entry;
import net.sf.cram.io.CountingInputStream;
import net.sf.cram.structure.Container;
import net.sf.picard.reference.ReferenceSequence;
import net.sf.picard.reference.ReferenceSequenceFile;
import net.sf.picard.reference.ReferenceSequenceFileFactory;
import net.sf.picard.util.Log;
import net.sf.picard.util.Log.LogLevel;
import net.sf.samtools.BAMFileWriter;
import net.sf.samtools.BAMIndexFactory;
import net.sf.samtools.SAMFileWriter;
import net.sf.samtools.SAMFileWriterFactory;
import net.sf.samtools.SAMRecord;
import net.sf.samtools.SAMSequenceRecord;
import net.sf.samtools.SAMTextWriter;
import net.sf.samtools.util.SeekableFileStream;
import net.sf.samtools.util.SeekableStream;
import uk.ac.ebi.embl.ega_cipher.CipherInputStream_256;
import uk.ac.ebi.embl.ega_cipher.SeekableCipherStream_256;

import com.beust.jcommander.JCommander;
import com.beust.jcommander.Parameter;
import com.beust.jcommander.Parameters;
import com.beust.jcommander.converters.FileConverter;

public class Cram2Bam {
  private static Log log = Log.getInstance(Cram2Bam.class);

  private static void printUsage(JCommander jc) {
    StringBuilder sb = new StringBuilder();
    sb.append("\n");
    jc.usage(sb);

    System.out.println("Version "
        + Cram2Bam.class.getPackage().getImplementationVersion());
    System.out.println(sb.toString());
  }

  public static void main(String[] args) throws IOException,
      IllegalArgumentException, IllegalAccessException {
    Params params = new Params();
    JCommander jc = new JCommander(params);
    try {
      jc.parse(args);
    } catch (Exception e) {
      System.out
          .println("Failed to parse parameteres, detailed message below: ");
      System.out.println(e.getMessage());
      System.out.println();
      System.out.println("See usage: -h");
      System.exit(1);
    }

    if (args.length == 0 || params.help) {
      printUsage(jc);
      System.exit(1);
    }

    if (params.reference == null) {
      System.out.println("A reference fasta file is required.");
      System.exit(1);
    }

    Log.setGlobalLogLevel(params.logLevel);

    if (params.locations == null)
      params.locations = new ArrayList<String>();

    char[] pass = null;
    if (params.decrypt) {
      if (System.console() == null)
        throw new RuntimeException("Cannot access console.");
      pass = System.console().readPassword();
    }

    ReferenceSequenceFile referenceSequenceFile = ReferenceSequenceFileFactory
        .getReferenceSequenceFile(params.reference);

    InputStream is;
    if (params.cramFile != null) {
      FileInputStream fis = new FileInputStream(params.cramFile);
      if (params.locations == null || params.locations.isEmpty())
        is = new BufferedInputStream(fis);
      else
        is = new SeekableFileStream(params.cramFile);
    } else
      is = System.in;

    if (params.decrypt) {
      CipherInputStream_256 cipherInputStream_256 = new CipherInputStream_256(
          is, pass, 128);
      is = cipherInputStream_256.getCipherInputStream();
      if (params.locations != null && !params.locations.isEmpty()) {
        is = new SeekableCipherStream_256(new SeekableFileStream(
            params.cramFile), pass, 1, 128);
      }
    }

    long offset = 0;
    CountingInputStream cis = new CountingInputStream(is);
    CramHeader cramHeader = ReadWrite.readCramHeader(cis);
    offset = cis.getCount();

    SAMFileWriterFactory samFileWriterFactory = new SAMFileWriterFactory();
    samFileWriterFactory.setAsyncOutputBufferSize(10000);
    samFileWriterFactory.setCreateIndex(false);
    samFileWriterFactory.setCreateMd5File(false);
    samFileWriterFactory.setUseAsyncIo(true);

    SAMFileWriter writer = createSAMFileWriter(params, cramHeader,
        samFileWriterFactory);

    Container c = null;
    AlignmentSliceQuery location = null;
    if (!params.locations.isEmpty() && params.cramFile != null
        && is instanceof SeekableStream) {
      if (params.locations.size() > 1)
        throw new RuntimeException("Only one location is supported.");

      location = new AlignmentSliceQuery(params.locations.get(0));

      c = skipToContainer(params.cramFile, cramHeader,
          (SeekableStream) is, location);

      if (c == null) {
        log.error("Index file not found. ");
        return;
      }
    }

    long recordCount = 0;
    long readTime = 0;
    long parseTime = 0;
    long normTime = 0;
    long samTime = 0;
    long writeTime = 0;
    long time = 0;
    ArrayList<CramRecord> cramRecords = new ArrayList<CramRecord>(10000);

    CramNormalizer n = new CramNormalizer(cramHeader.samFileHeader);

    byte[] ref = null;
    int prevSeqId = -1;
    while (true) {
//      try {
        time = System.nanoTime();
        // cis = new CountingInputStream(is);
        c = ReadWrite.readContainer(cramHeader.samFileHeader, is);
        if (c == null) break ;
        // c.offset = offset;
        // offset += cis.getCount();
        readTime += System.nanoTime() - time;
//      } catch (EOFException e) {
//        break;
//      }

      // for random access check if the sequence is the one look for:
      if (location != null
          && cramHeader.samFileHeader.getSequence(location.sequence)
              .getSequenceIndex() != c.sequenceId)
        break;

      if (params.countOnly && params.requiredFlags == 0
          && params.filteringFlags == 0) {
        recordCount += c.nofRecords;
        continue;
      }

      try {
        time = System.nanoTime();
        cramRecords.clear();
        BLOCK_PROTO.getRecords(c.h, c, cramHeader.samFileHeader,
            cramRecords);
        parseTime += System.nanoTime() - time;
      } catch (EOFException e) {
        throw e;
      }

      if (c.sequenceId == SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX) {
        ref = new byte[] {};
      } else if (prevSeqId < 0 || prevSeqId != c.sequenceId) {
        SAMSequenceRecord sequence = cramHeader.samFileHeader
            .getSequence(c.sequenceId);
        ReferenceSequence referenceSequence = Utils
            .trySequenceNameVariants(referenceSequenceFile,
                sequence.getSequenceName());
        ref = referenceSequence.getBases();
        {
          // hack:
          int newLines = 0;
          for (byte b : ref)
            if (b == 10)
              newLines++;
          byte[] ref2 = new byte[ref.length - newLines];
          int j = 0;
          for (int i = 0; i < ref.length; i++)
            if (ref[i] == 10)
              continue;
            else
              ref2[j++] = ref[i];
          ref = ref2;
        }
        prevSeqId = c.sequenceId;
      }

      long time1 = System.nanoTime();
      n.normalize(cramRecords, true, ref, c.alignmentStart, c.h.substitutionMatrix, c.h.AP_seriesDelta);
      long time2 = System.nanoTime();
      normTime += time2 - time1;

      Cram2BamRecordFactory c2sFactory = new Cram2BamRecordFactory(
          cramHeader.samFileHeader);

      long c2sTime = 0;
      long sWriteTime = 0;

      boolean enough = false;
      for (CramRecord r : cramRecords) {
        // check if the record ends before the query start:
        if (location != null && r.getAlignmentStart() < location.start)
          continue;

        time = System.nanoTime();
        SAMRecord s = c2sFactory.create(r);

        if (params.requiredFlags != 0
            && ((params.requiredFlags & s.getFlags()) == 0))
          continue;
        if (params.filteringFlags != 0
            && ((params.filteringFlags & s.getFlags()) != 0))
          continue;
        if (params.countOnly) {
          recordCount++;
          continue;
        }

        if (ref != null)
          Utils.calculateMdAndNmTags(s, ref, params.calculateMdTag,
              params.calculateNmTag);
        c2sTime += System.nanoTime() - time;
        samTime += System.nanoTime() - time;

        time = System.nanoTime();
        writer.addAlignment(s);
        sWriteTime += System.nanoTime() - time;
        writeTime += System.nanoTime() - time;
        if (params.outputFile == null && System.out.checkError())
          break;

        // we got all the reads for random access:
        if (location != null && location.end < s.getAlignmentStart()) {
          enough = true;
          break;
        }
      }

      log.info(String
          .format("CONTAINER READ: io %dms, parse %dms, norm %dms, convert %dms, BAM write %dms",
              c.readTime / 1000000, c.parseTime / 1000000,
              (time2 - time1) / 1000000, c2sTime / 1000000,
              sWriteTime / 1000000));

      if (enough
          || (params.outputFile == null && System.out.checkError()))
        break;
    }

    if (params.countOnly)
      System.out.println(recordCount);

    // if (writer instanceof SAMTextWriter)
    // ((SAMTextWriter)writer).getWriter().flush() ;
    writer.close();

    log.warn(String
        .format("TIMES: io %ds, parse %ds, norm %ds, convert %ds, BAM write %ds",
            readTime / 1000000000, parseTime / 1000000000,
            normTime / 1000000000, samTime / 1000000000,
            writeTime / 1000000000));
  }

  private static Container skipToContainer(File cramFile,
      CramHeader cramHeader, SeekableStream cramFileInputStream,
      AlignmentSliceQuery location) throws IOException {
    Container c = null;

    { // try crai:
      List<CramIndex.Entry> entries = getCraiEntries(cramFile,
          cramHeader, cramFileInputStream, location);
      if (entries != null) {
        try {
          Entry leftmost = CramIndex.getLeftmost(entries);
          cramFileInputStream.seek(leftmost.containerStartOffset);
          c = ReadWrite.readContainerHeader(cramFileInputStream);
          if (c == null) return null ;
          if (c.alignmentStart + c.alignmentSpan > location.start) {
            cramFileInputStream.seek(leftmost.containerStartOffset);
            return c;
          }
        } catch (IOException e) {
          throw new RuntimeException(e);
        }
      }
    }

    { // try bai:
      long[] filePointers = getBaiFilePointers(cramFile, cramHeader,
          cramFileInputStream, location);
      if (filePointers.length == 0) {
        return null;
      }

      for (int i = 0; i < filePointers.length; i += 2) {
        long offset = filePointers[i] >>> 16;
        int sliceIndex = (int) ((filePointers[i] << 48) >>> 48);
        try {
          cramFileInputStream.seek(offset);
          c = ReadWrite.readContainerHeader(cramFileInputStream);
          if (c.alignmentStart + c.alignmentSpan > location.start) {
            cramFileInputStream.seek(offset);
            return c;
          }
        } catch (IOException e) {
          throw new RuntimeException(e);
        }
      }
    }

    return c;
  }

  private static long[] getBaiFilePointers(File cramFile,
      CramHeader cramHeader, SeekableStream cramFileInputStream,
      AlignmentSliceQuery location) throws IOException {
    long[] filePointers = new long[0];

    File indexFile = new File(cramFile.getAbsolutePath() + ".bai");
    if (indexFile.exists())
      filePointers = BAMIndexFactory.SHARED_INSTANCE.getBAMIndexPointers(
          indexFile,
          cramHeader.samFileHeader.getSequenceDictionary(),
          location.sequence, location.start, location.end);
    return filePointers;
  }

  private static List<CramIndex.Entry> getCraiEntries(File cramFile,
      CramHeader cramHeader, SeekableStream cramFileInputStream,
      AlignmentSliceQuery location) throws IOException {
    File indexFile = new File(cramFile.getAbsolutePath() + ".crai");
    if (indexFile.exists()) {
      FileInputStream fis = new FileInputStream(indexFile);
      GZIPInputStream gis = new GZIPInputStream(new BufferedInputStream(
          fis));
      BufferedInputStream bis = new BufferedInputStream(gis);
      List<CramIndex.Entry> full = CramIndex.readIndex(gis);

      List<CramIndex.Entry> entries = new LinkedList<CramIndex.Entry>();
      SAMSequenceRecord sequence = cramHeader.samFileHeader
          .getSequence(location.sequence);
      if (sequence == null)
        throw new RuntimeException("Sequence not found: "
            + location.sequence);

      entries.addAll(CramIndex.find(full, sequence.getSequenceIndex(),
          location.start, location.end - location.start));

      bis.close();

      return entries;
    }
    return null;
  }

  // private static long[] getFilePointers(File cramFile, CramHeader
  // cramHeader,
  // SeekableStream cramFileInputStream, AlignmentSliceQuery location,
  // boolean bai) throws IOException {
  // long[] filePointers = new long[0];
  //
  // if (bai) {
  // File indexFile = new File(cramFile.getAbsolutePath() + ".bai");
  // if (indexFile.exists())
  // filePointers = BAMIndexFactory.SHARED_INSTANCE
  // .getBAMIndexPointers(indexFile,
  // cramHeader.samFileHeader
  // .getSequenceDictionary(),
  // location.sequence, location.start, location.end);
  // } else {
  // File indexFile = new File(cramFile.getAbsolutePath() + ".crai");
  // if (indexFile.exists()) {
  // FileInputStream fis = new FileInputStream(indexFile);
  // GZIPInputStream gis = new GZIPInputStream(
  // new BufferedInputStream(fis));
  // BufferedInputStream bis = new BufferedInputStream(gis);
  // List<CramIndex.Entry> full = CramIndex.readIndex(gis);
  //
  // List<CramIndex.Entry> entries = new LinkedList<CramIndex.Entry>();
  // SAMSequenceRecord sequence = cramHeader.samFileHeader
  // .getSequence(location.sequence);
  // if (sequence == null)
  // throw new RuntimeException("Sequence not found: "
  // + location.sequence);
  //
  // entries.addAll(CramIndex.find(full, sequence.getSequenceIndex(),
  // location.start, location.end - location.start));
  //
  // bis.close();
  //
  // filePointers = new long[entries.size() * 2];
  // int i = 0;
  // for (CramIndex.Entry entry : entries) {
  // filePointers[i++] = (entry.containerStartOffset << 16) | entry.;
  // filePointers[i++] = 0;
  // }
  // }
  // }
  //
  // return filePointers;
  // }

  private static SAMFileWriter createSAMFileWriter(Params params,
      CramHeader cramHeader, SAMFileWriterFactory samFileWriterFactory)
      throws IOException {
    /*
     * building sam writer, sometimes we have to go deeper to get to the
     * required functionality:
     */
    SAMFileWriter writer = null;
    if (params.outputFastq) {
      if (params.cramFile == null) {
        writer = new FastqSAMFileWriter(System.out, null,
            cramHeader.samFileHeader);
      } else {
        writer = new FastqSAMFileWriter(
            params.cramFile.getAbsolutePath(), false,
            cramHeader.samFileHeader);

      }
    } else if (params.outputFastqGz) {
      if (params.cramFile == null) {
        GZIPOutputStream gos = new GZIPOutputStream(System.out);
        PrintStream ps = new PrintStream(gos);
        writer = new FastqSAMFileWriter(ps, null,
            cramHeader.samFileHeader);
      } else {
        writer = new FastqSAMFileWriter(
            params.cramFile.getAbsolutePath(), true,
            cramHeader.samFileHeader);

      }
    } else if (params.outputFile == null) {
      OutputStream os = new BufferedOutputStream(System.out);
      if (params.outputBAM) {
        BAMFileWriter ret = new BAMFileWriter(os, null);
        ret.setSortOrder(cramHeader.samFileHeader.getSortOrder(), true);
        ret.setHeader(cramHeader.samFileHeader);
        writer = ret;
      } else {
        writer = Utils.createSAMTextWriter(samFileWriterFactory, os,
            cramHeader.samFileHeader, params.printSAMHeader);
      }
    } else {
      writer = samFileWriterFactory.makeSAMOrBAMWriter(
          cramHeader.samFileHeader, true, params.outputFile);
    }
    return writer;
  }

  @Parameters(commandDescription = "CRAM to BAM conversion. ")
  static class Params {
    @Parameter(names = { "-l", "--log-level" }, description = "Change log level: DEBUG, INFO, WARNING, ERROR.", converter = LevelConverter.class)
    LogLevel logLevel = LogLevel.ERROR;

    @Parameter(names = { "--input-cram-file", "-I" }, converter = FileConverter.class, description = "The path to the CRAM file to uncompress. Omit if standard input (pipe).")
    File cramFile;

    @Parameter(names = { "--reference-fasta-file", "-R" }, converter = FileConverter.class, description = "Path to the reference fasta file, it must be uncompressed and indexed (use 'samtools faidx' for example).")
    File reference;

    @Parameter(names = { "--output-bam-file", "-O" }, converter = FileConverter.class, description = "The path to the output BAM file.")
    File outputFile;

    @Parameter(names = { "-b", "--output-bam-format" }, description = "Output in BAM format.")
    boolean outputBAM = false;

    @Parameter(names = { "-q", "--output-fastq-format" }, hidden = true, description = "Output in fastq format.")
    boolean outputFastq = false;

    @Parameter(names = { "-z", "--output-fastq-gz-format" }, hidden = true, description = "Output in gzipped fastq format.")
    boolean outputFastqGz = false;

    @Parameter(names = { "--print-sam-header" }, description = "Print SAM header when writing SAM format.")
    boolean printSAMHeader = false;

    @Parameter(names = { "-h", "--help" }, description = "Print help and quit")
    boolean help = false;

    @Parameter(names = { "--default-quality-score" }, description = "Use this quality score (decimal representation of ASCII symbol) as a default value when the original quality score was lost due to compression. Minimum is 33.")
    int defaultQS = '?';

    @Parameter(names = { "--calculate-md-tag" }, description = "Calculate MD tag.")
    boolean calculateMdTag = false;

    @Parameter(names = { "--calculate-nm-tag" }, description = "Calculate NM tag.")
    boolean calculateNmTag = false;

    @Parameter()
    List<String> locations;

    @Parameter(names = { "--decrypt" }, description = "Decrypt the file.")
    boolean decrypt = false;

    @Parameter(names = { "--count-only", "-c" }, description = "Count number of records.")
    boolean countOnly = false;

    @Parameter(names = { "--required-flags", "-f" }, description = "Required flags. ")
    int requiredFlags = 0;

    @Parameter(names = { "--filter-flags", "-F" }, description = "Filtering flags. ")
    int filteringFlags = 0;

  }

}
TOP

Related Classes of net.sf.cram.Cram2Bam$Params

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.