Package net.sf.samtools

Examples of net.sf.samtools.SAMRecordIterator


    SAMFileReader reader = new SAMFileReader(new File(inputSam));
    reader.setValidationStringency(ValidationStringency.SILENT);
   
    for (Feature region : regions) {
      int count = 0;
      SAMRecordIterator iter = reader.queryOverlapping(region.getSeqname(), (int) region.getStart(), (int) region.getEnd());
      while (iter.hasNext()) {
        iter.next();
        count += 1;
      }
      iter.close();
      System.out.print(count + ",");
    }
   
    reader.close();
  }
View Full Code Here


  @Test
  public void test2() throws IOException {
    SAMFileReader r = new SAMFileReader(
        new File(
            "c:/temp/HG00096.mapped.illumina.mosaik.GBR.exome.20110411.chr20.bam"));
    SAMRecordIterator iterator = r.iterator();

    CompressionHeaderFactory.HuffmanParamsCalculator c = new HuffmanParamsCalculator();

    String[] names = new String [100000] ;
    for (int i = 0; i < names.length && iterator.hasNext(); i++) {
      names[i] = iterator.next().getReadName() ;
      c.add(names[i].length());
    }
    iterator.close();
    r.close();
    c.calculate();

    int[] values = c.values();
    int[] lens = c.bitLens();
View Full Code Here

  @Test
  public void test1() throws IOException {
    SAMFileReader reader = new SAMFileReader(
        new File(
            "c:/temp/HG00096.mapped.illumina.mosaik.GBR.exome.20110411.chr20.bam"));
    SAMRecordIterator iterator = reader.iterator();

    ByteArrayOutputStream baos = new ByteArrayOutputStream();
    GZIPOutputStream gos = new GZIPOutputStream(baos);
    BufferedOutputStream bos = new BufferedOutputStream(gos) ;
    ByteArrayStopEncoding.ByteArrayStopCodec c = new ByteArrayStopEncoding.ByteArrayStopCodec(
        (byte) 0, null, bos);
    int maxRecords = 100000;
    int counter = 0;

    long writeNanos = 0;
    while (iterator.hasNext() && counter++ < maxRecords) {
      SAMRecord r = iterator.next();
      byte[] scores = r.getBaseQualities();
      long time1 = System.nanoTime();
      c.write(null, scores);
      writeNanos += System.nanoTime() - time1;
    }
View Full Code Here

  @Test
  public void test2() throws IOException {
    SAMFileReader reader = new SAMFileReader(
        new File(
            "c:/temp/HG00096.mapped.illumina.mosaik.GBR.exome.20110411.chr20.bam"));
    SAMRecordIterator iterator = reader.iterator();

    ByteArrayOutputStream baos = new ByteArrayOutputStream();
    GZIPOutputStream gos = new GZIPOutputStream(baos);
    BufferedOutputStream bos = new BufferedOutputStream(gos) ;

    ExternalByteArrayCodec c = new ExternalByteArrayCodec(
        bos, null);
    int maxRecords = 100000;
    int counter = 0;

    long writeNanos = 0;
    java.util.List<SAMRecord> records = new ArrayList<SAMRecord>(2*maxRecords) ;
    while (iterator.hasNext() && counter++ <= maxRecords) {
      SAMRecord r = iterator.next();
      records.add(r) ;
      byte[] scores = r.getBaseQualities();
      long time1 = System.nanoTime();
      c.write(null, scores);
      writeNanos += System.nanoTime() - time1;
View Full Code Here

    SAMFileReader.setDefaultValidationStringency(ValidationStringency.SILENT) ;
    SAMFileReader r1 = new SAMFileReader(params.file1);
    SAMFileReader r2 = new SAMFileReader(params.file2);

    SAMRecordIterator it1, it2;
    if (params.location != null) {
      AlignmentSliceQuery query = new AlignmentSliceQuery(params.location);
      if (SAMRecord.NO_ALIGNMENT_REFERENCE_NAME.equals(query.sequence)) {
        it1 = r1.queryUnmapped();
        it2 = r2.queryUnmapped();
View Full Code Here

    }

    Log.setGlobalLogLevel(params.logLevel);

    SAMFileReader reader = new SAMFileReader(params.bamFile);
    SAMRecordIterator iterator = reader.iterator();

    Map<String, Out> map = new TreeMap<String, DecomposeBAM.Out>();
    long bases = 0;
    while (iterator.hasNext() && params.maxRecords-- > 0) {
      SAMRecord record = iterator.next();
      bases += record.getReadLength();

      put(map, "QNAME", record.getReadName());
      put(map, "FLAG", String.valueOf(record.getFlags()));
      put(map, "RNAME", record.getReferenceName());
      put(map, "POS", String.valueOf(record.getAlignmentStart()));
      put(map, "MAPQ", String.valueOf(record.getMappingQuality()));
      put(map, "CIGAR", record.getCigarString());
      put(map, "RNEXT", record.getMateReferenceName());
      put(map, "PNEXT", String.valueOf(record.getMateAlignmentStart()));
      put(map, "TLEN", String.valueOf(record.getInferredInsertSize()));
      put(map, "SEQ", record.getReadString());
      put(map, "QUAL", record.getBaseQualityString());

      for (SAMTagAndValue tv : record.getAttributes()) {
        writeTagValue(map, tv);
      }
    }

    close(map);
    iterator.close();
    reader.close();

    long totalBytes = 0;
    for (Out out : map.values())
      totalBytes += out.file.length();
View Full Code Here

    if (params.file != null)
      reader = new SAMFileReader(params.file);
    else
      reader = new SAMFileReader(System.in);

    SAMRecordIterator it = null;
    if (params.location == null)
      it = reader.iterator();
    else {
      AlignmentSliceQuery query = new AlignmentSliceQuery(params.location);
      it = reader.query(query.sequence, query.start, query.end, false);
    }

    it = new BoundSAMRecordIterator(it, params.skipRecords,
        params.skipRecords + params.maxRecords);
    SAMFieldSelector s = new SAMFieldSelector(params.fields);
    SAMSubRecordStreamWriter writer = new SAMSubRecordStreamWriter();
    writer.selector = s;
    writer.ps = System.out;
    writer.enumerate = params.enumerate;
    long counter = params.skipRecords + 1;

    while (it.hasNext()) {
      SAMRecord record = it.next();
      writer.write(record, counter++);
    }

    it.close();
    reader.close();
  }
View Full Code Here

    BLOCK_PROTO.recordsPerSlice = params.maxSliceSize;
    ReferenceSequence sequence = null;
    List<SAMRecord> samRecords = new ArrayList<SAMRecord>(
        params.maxContainerSize);
    int prevSeqId = SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX;
    SAMRecordIterator iterator = samFileReader.iterator();
    {
      String seqName = null;
      SAMRecord samRecord = iterator.next();
      if (samRecord == null)
        throw new RuntimeException("No records found.");
      seqName = samRecord.getReferenceName();
      prevSeqId = samRecord.getReferenceIndex();
      samRecords.add(samRecord);

      // if (samFileReader.getFileHeader().getReadGroups().isEmpty()
      // || samFileReader.getFileHeader().getReadGroup(
      // Sam2CramRecordFactory.UNKNOWN_READ_GROUP_ID) == null) {
      // log.info("Adding default read group.");
      // SAMReadGroupRecord readGroup = new SAMReadGroupRecord(
      // Sam2CramRecordFactory.UNKNOWN_READ_GROUP_ID);
      //
      // readGroup
      // .setSample(Sam2CramRecordFactory.UNKNOWN_READ_GROUP_SAMPLE);
      // samFileReader.getFileHeader().addReadGroup(readGroup);
      // }

      if (SAMRecord.NO_ALIGNMENT_REFERENCE_NAME.equals(seqName))
        sequence = null;
      else
        sequence = Utils.trySequenceNameVariants(referenceSequenceFile,
            seqName);

    }

    QualityScorePreservation preservation;
    if (params.losslessQS)
      preservation = new QualityScorePreservation("*40");
    else
      preservation = new QualityScorePreservation(params.qsSpec);

    byte[] ref = sequence == null ? new byte[0] : sequence.getBases();

    {
      // hack:
      int newLines = 0;
      for (byte b : ref)
        if (b == 10)
          newLines++;
      byte[] ref2 = new byte[ref.length - newLines];
      int j = 0;
      for (int i = 0; i < ref.length; i++)
        if (ref[i] == 10)
          continue;
        else
          ref2[j++] = ref[i];
      ref = ref2;
    }

    OutputStream os;
    if (params.outputCramFile != null) {
      FileOutputStream fos = new FileOutputStream(params.outputCramFile);
      os = new BufferedOutputStream(fos);
    } else {
      log.warn("No output file, writint to STDOUT.");
      os = System.out;
    }

    if (params.encrypt) {
      CipherOutputStream_256 cos = new CipherOutputStream_256(os, pass,
          128);
      os = cos.getCipherOutputStream();
    }

    CramHeader h = new CramHeader(2, 0, params.bamFile == null ? "STDIN"
        : params.bamFile.getName(), samFileReader.getFileHeader());
    long offset = ReadWrite.writeCramHeader(h, os);

    long bases = 0;
    long coreBytes = 0;
    long[] externalBytes = new long[10];
    BLOCK_PROTO.recordsPerSlice = params.maxSliceSize;
    long globalRecordCounter = 0;
    MessageDigest md5_MessageDigest = MessageDigest.getInstance("MD5");

    do {
      if (params.outputCramFile == null && System.out.checkError())
        return;

      SAMRecord samRecord = iterator.next();
      if (samRecord == null)
        // no more records
        break;
      if (samRecord.getReferenceIndex() != prevSeqId
          || samRecords.size() >= params.maxContainerSize) {
        if (!samRecords.isEmpty()) {
          List<CramRecord> records = convert(samRecords,
              samFileReader.getFileHeader(), ref, preservation,
              params.captureAllTags, params.captureTags,
              params.ignoreTags);
          samRecords.clear();

          Container container = BLOCK_PROTO.buildContainer(records,
              samFileReader.getFileHeader(),
              params.preserveReadNames, globalRecordCounter,
              null, true);
          for (Slice s : container.slices) {
            if (s.alignmentStart < 1) {
              s.refMD5 = new byte[16];
              Arrays.fill(s.refMD5, (byte) 0);
              continue;
            }

            md5_MessageDigest.reset();

            int span = Math.min(s.alignmentSpan, ref.length
                - s.alignmentStart);
            if (s.alignmentStart == 0)
              System.out.println("gotcha");

            md5_MessageDigest.update(ref, s.alignmentStart - 1,
                span);

            String sliceRef = new String(ref, s.alignmentStart - 1,
                Math.min(span, 30));
            s.refMD5 = md5_MessageDigest.digest();
            log.debug("Slice ref starts with: " + sliceRef);
            log.debug("Slice ref md5: "
                + (String.format("%032x", new BigInteger(1,
                    s.refMD5))));
          }
          globalRecordCounter += records.size();
          records.clear();
          long len = ReadWrite.writeContainer(container, os);
          container.offset = offset;
          offset += len;

          log.info(String
              .format("CONTAINER WRITE TIMES: header build time %dms, slices build time %dms, io time %dms.",
                  container.buildHeaderTime / 1000000,
                  container.buildSlicesTime / 1000000,
                  container.writeTime / 1000000));

          for (Slice s : container.slices) {
            coreBytes += s.coreBlock.getCompressedContent().length;
            for (Integer i : s.external.keySet())
              externalBytes[i] += s.external.get(i)
                  .getCompressedContent().length;
          }
        }
      }

      if (prevSeqId != samRecord.getReferenceIndex()) {
        if (samRecord.getReferenceIndex() != SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX) {
          sequence = Utils
              .trySequenceNameVariants(referenceSequenceFile,
                  samRecord.getReferenceName());
          ref = sequence.getBases();
          {
            // hack:
            int newLines = 0;
            for (byte b : ref)
              if (b == 10)
                newLines++;
            byte[] ref2 = new byte[ref.length - newLines];
            int j = 0;
            for (int i = 0; i < ref.length; i++)
              if (ref[i] == 10)
                continue;
              else
                ref2[j++] = ref[i];
            ref = ref2;
          }
        } else
          ref = new byte[] {};
        prevSeqId = samRecord.getReferenceIndex();
      }

      samRecords.add(samRecord);
      bases += samRecord.getReadLength();

      if (params.maxRecords-- < 1)
        break;
    } while (iterator.hasNext());

    { // copied for now, should be a subroutine:
      if (!samRecords.isEmpty()) {
        List<CramRecord> records = convert(samRecords,
            samFileReader.getFileHeader(), ref, preservation,
            params.captureAllTags, params.captureTags,
            params.ignoreTags);
        samRecords.clear();
        Container container = BLOCK_PROTO.buildContainer(records,
            samFileReader.getFileHeader(),
            params.preserveReadNames, globalRecordCounter, null,
            true);
        for (Slice s : container.slices) {
          if (s.alignmentStart < 1) {
            s.refMD5 = new byte[16];
            Arrays.fill(s.refMD5, (byte) 0);
            continue;
          }

          md5_MessageDigest.reset();

          int span = Math.min(s.alignmentSpan, ref.length
              - s.alignmentStart);
          if (s.alignmentStart == 0)
            System.out.println("gotcha");

          md5_MessageDigest.update(ref, s.alignmentStart - 1,
              span);

          String sliceRef = new String(ref, s.alignmentStart - 1,
              Math.min(span, 30));
          s.refMD5 = md5_MessageDigest.digest();
          log.debug("Slice ref starts with: " + sliceRef);
          log.debug("Slice ref md5: "
              + (String.format("%032x", new BigInteger(1,
                  s.refMD5))));
        }
        records.clear();
        ReadWrite.writeContainer(container, os);
        log.info(String
            .format("CONTAINER WRITE TIMES: header build time %dms, slices build time %dms, io time %dms.",
                container.buildHeaderTime / 1000000,
                container.buildSlicesTime / 1000000,
                container.writeTime / 1000000));

        for (Slice s : container.slices) {
          coreBytes += s.coreBlock.getCompressedContent().length;
          for (Integer i : s.external.keySet())
            externalBytes[i] += s.external.get(i)
                .getCompressedContent().length;
        }
      }
    }

    iterator.close();
    samFileReader.close();
    os.close();

    StringBuilder sb = new StringBuilder();
    sb.append(String.format("STATS: core %.2f b/b", 8f * coreBytes / bases));
View Full Code Here

            "c:/temp/human_g1k_v37.fasta"));

    ReferenceSequence sequence = null;
    {
      String seqName = null;
      SAMRecordIterator iterator = samFileReader.iterator();
      SAMRecord samRecord = iterator.next();
      seqName = samRecord.getReferenceName();
      iterator.close();
      sequence = referenceSequenceFile.getSequence(seqName);
    }

    int maxRecords = 100000;
    List<SAMRecord> samRecords = new ArrayList<SAMRecord>(maxRecords);

    int alStart = Integer.MAX_VALUE;
    int alEnd = 0;
    long baseCount = 0;
    SAMRecordIterator iterator = samFileReader.iterator();

    do {
      SAMRecord samRecord = iterator.next();
      if (!samRecord.getReferenceName().equals(sequence.getName()))
        break;

      baseCount += samRecord.getReadLength();
      samRecords.add(samRecord);
      if (samRecord.getAlignmentStart() > 0
          && alStart > samRecord.getAlignmentStart())
        alStart = samRecord.getAlignmentStart();
      if (alEnd < samRecord.getAlignmentEnd())
        alEnd = samRecord.getAlignmentEnd();

      if (samRecords.size() >= maxRecords)
        break;
    } while (iterator.hasNext());

    ReferenceTracks tracks = new ReferenceTracks(sequence.getContigIndex(),
        sequence.getName(), sequence.getBases(), alEnd - alStart + 100);
    tracks.moveForwardTo(alStart);
View Full Code Here

  public static void main(String[] args) {
    File bamFile = new File(args[0]);
    SAMFileReader reader = new SAMFileReader(bamFile);

    ReferenceSequenceFile ref = ReferenceSequenceFileFactory.getReferenceSequenceFile(new File(args[1]));
    SAMRecordIterator iterator = reader.iterator();
    String refName = iterator.next().getReferenceName();
    iterator.close();
    ReferenceSequence sequence = ref.getSequence(refName);

    iterator = reader.iterator();
    Sam2CramRecordFactory factory = new Sam2CramRecordFactory(sequence.getBases());
    factory.preserveReadNames = false;
    factory.captureAllTags = false;
    factory.captureUnmappedBases = true;
    factory.captureUnmappedScores = true;
    factory.preserveReadNames = true;

    List<CramRecord> cramRecords = new ArrayList<CramRecord>();
    List<SAMRecord> samRecords = new ArrayList<SAMRecord>();
    for (int i = 0; i < 100000; i++) {
      SAMRecord samRecord = iterator.next();
      samRecords.add(samRecord);
      CramRecord cramRecord = factory.createCramRecord(samRecord);
      // cramRecord.setReadName(samRecord.getReadName()) ;
      cramRecord.counter = i;
      cramRecords.add(cramRecord);
View Full Code Here

TOP

Related Classes of net.sf.samtools.SAMRecordIterator

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.