Package net.sf.samtools

Source Code of net.sf.samtools.SAMIterator$CramFileIterable

package net.sf.samtools;

import java.io.BufferedInputStream;
import java.io.EOFException;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;

import net.sf.cram.BLOCK_PROTO;
import net.sf.cram.Cram2BamRecordFactory;
import net.sf.cram.CramNormalizer;
import net.sf.cram.CramRecord;
import net.sf.cram.ReadWrite;
import net.sf.cram.ReadWrite.CramHeader;
import net.sf.cram.Utils;
import net.sf.cram.structure.Container;
import net.sf.picard.reference.ReferenceSequence;
import net.sf.picard.reference.ReferenceSequenceFile;
import net.sf.picard.util.Log;
import net.sf.samtools.SAMFileHeader.SortOrder;
import net.sf.samtools.SAMFileReader.ValidationStringency;
import net.sf.samtools.util.RuntimeEOFException;

public class SAMIterator implements SAMRecordIterator {
  private static Log log = Log.getInstance(SAMIterator.class);
  private InputStream is;
  private CramHeader cramHeader;
  private ArrayList<SAMRecord> records;
  private int recordCounter = 0;
  private SAMRecord nextRecord = null;
  private ReferenceSequenceFile referenceSequenceFile;
  private boolean restoreNMTag = true;
  private boolean restoreMDTag = false;
  private CramNormalizer normalizer;
  private byte[] refs;
  private int prevSeqId = SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX;
  private Container container;

  private ValidationStringency validationStringency = ValidationStringency.SILENT;

  public ValidationStringency getValidationStringency() {
    return validationStringency;
  }

  public void setValidationStringency(
      ValidationStringency validationStringency) {
    this.validationStringency = validationStringency;
  }

  private long samRecordIndex;

  public SAMIterator(InputStream is,
      ReferenceSequenceFile referenceSequenceFile) throws IOException {
    this.is = is;
    this.referenceSequenceFile = referenceSequenceFile;
    cramHeader = ReadWrite.readCramHeader(is);
    records = new ArrayList<SAMRecord>(100000);
    normalizer = new CramNormalizer(cramHeader.samFileHeader);
  }

  public CramHeader getCramHeader() {
    return cramHeader;
  }

  private void nextContainer() throws IOException, IllegalArgumentException,
      IllegalAccessException {
    if (records == null)
      records = new ArrayList<SAMRecord>(100000);
    records.clear();
    recordCounter = 0;

    container = null;
    container = ReadWrite.readContainer(cramHeader.samFileHeader, is);
    if (container == null)
      return;

    ArrayList<CramRecord> cramRecords = new ArrayList<CramRecord>();
    try {
      cramRecords.clear();
      BLOCK_PROTO.getRecords(container.h, container,
          cramHeader.samFileHeader, cramRecords);
    } catch (EOFException e) {
      throw e;
    }

    if (container.sequenceId == SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX) {
      refs = new byte[] {};
    } else if (prevSeqId < 0 || prevSeqId != container.sequenceId) {
      SAMSequenceRecord sequence = cramHeader.samFileHeader
          .getSequence(container.sequenceId);
      ReferenceSequence referenceSequence = Utils
          .trySequenceNameVariants(referenceSequenceFile,
              sequence.getSequenceName());
      refs = referenceSequence.getBases();
      prevSeqId = container.sequenceId;
    }

    long time1 = System.nanoTime();

    normalizer.normalize(cramRecords, true, refs, container.alignmentStart,
        container.h.substitutionMatrix, container.h.AP_seriesDelta);
    long time2 = System.nanoTime();

    Cram2BamRecordFactory c2sFactory = new Cram2BamRecordFactory(
        cramHeader.samFileHeader);

    long c2sTime = 0;

    for (CramRecord r : cramRecords) {
      long time = System.nanoTime();
      SAMRecord s = c2sFactory.create(r);
      c2sTime += System.nanoTime() - time;
      if (!r.segmentUnmapped)
        Utils.calculateMdAndNmTags(s, refs, restoreMDTag, restoreNMTag);

      s.setValidationStringency(validationStringency);

      if (validationStringency != ValidationStringency.SILENT) {
        final List<SAMValidationError> validationErrors = s.isValid();
        SAMUtils.processValidationErrors(validationErrors,
            samRecordIndex, validationStringency);
      }

      records.add(s);
    }

    log.info(String.format(
        "CONTAINER READ: io %dms, parse %dms, norm %dms, convert %dms",
        container.readTime / 1000000, container.parseTime / 1000000,
        c2sTime / 1000000, (time2 - time1) / 1000000));
  }

  @Override
  public boolean hasNext() {
    if (recordCounter >= records.size()) {
      try {
        nextContainer();
        if (records.isEmpty())
          return false;
      } catch (Exception e) {
        throw new RuntimeEOFException(e);
      }
    }

    nextRecord = records.get(recordCounter++);
    return true;
  }

  @Override
  public SAMRecord next() {
    return nextRecord;
  }

  @Override
  public void remove() {
    throw new RuntimeException("Removal of records not implemented.");
  }

  @Override
  public void close() {
    records.clear();
    try {
      is.close();
    } catch (IOException e) {
    }
  }

  public static class CramFileIterable implements Iterable<SAMRecord> {
    private ReferenceSequenceFile referenceSequenceFile;
    private File cramFile;

    public CramFileIterable(File cramFile,
        ReferenceSequenceFile referenceSequenceFile) {
      this.referenceSequenceFile = referenceSequenceFile;
      this.cramFile = cramFile;
    }

    @Override
    public Iterator<SAMRecord> iterator() {
      try {
        FileInputStream fis = new FileInputStream(cramFile);
        BufferedInputStream bis = new BufferedInputStream(fis);
        SAMIterator iterator = new SAMIterator(bis,
            referenceSequenceFile);
        return iterator;
      } catch (IOException e) {
        throw new RuntimeException(e);
      }
    }

  }

  @Override
  public SAMRecordIterator assertSorted(SortOrder sortOrder) {
    throw new RuntimeException("Not implemented.");
  }

}
TOP

Related Classes of net.sf.samtools.SAMIterator$CramFileIterable

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.