Package net.sf.samtools

Examples of net.sf.samtools.SAMFileReader


    }
  }

  @Test
  public void test2() throws IOException {
    SAMFileReader r = new SAMFileReader(
        new File(
            "c:/temp/HG00096.mapped.illumina.mosaik.GBR.exome.20110411.chr20.bam"));
    SAMRecordIterator iterator = r.iterator();

    CompressionHeaderFactory.HuffmanParamsCalculator c = new HuffmanParamsCalculator();

    String[] names = new String [100000] ;
    for (int i = 0; i < names.length && iterator.hasNext(); i++) {
      names[i] = iterator.next().getReadName() ;
      c.add(names[i].length());
    }
    iterator.close();
    r.close();
    c.calculate();

    int[] values = c.values();
    int[] lens = c.bitLens();
    System.out.println(Arrays.toString(values));
View Full Code Here


public class TestByteArrayStopCodec {

  @Test
  public void test1() throws IOException {
    SAMFileReader reader = new SAMFileReader(
        new File(
            "c:/temp/HG00096.mapped.illumina.mosaik.GBR.exome.20110411.chr20.bam"));
    SAMRecordIterator iterator = reader.iterator();

    ByteArrayOutputStream baos = new ByteArrayOutputStream();
    GZIPOutputStream gos = new GZIPOutputStream(baos);
    BufferedOutputStream bos = new BufferedOutputStream(gos) ;
    ByteArrayStopEncoding.ByteArrayStopCodec c = new ByteArrayStopEncoding.ByteArrayStopCodec(
        (byte) 0, null, bos);
    int maxRecords = 100000;
    int counter = 0;

    long writeNanos = 0;
    while (iterator.hasNext() && counter++ < maxRecords) {
      SAMRecord r = iterator.next();
      byte[] scores = r.getBaseQualities();
      long time1 = System.nanoTime();
      c.write(null, scores);
      writeNanos += System.nanoTime() - time1;
    }
    bos.close();

    ByteArrayInputStream bais = new ByteArrayInputStream(baos.toByteArray());
    GZIPInputStream gis = new GZIPInputStream(bais);
    c = new ByteArrayStopEncoding.ByteArrayStopCodec((byte) 0,
        new BufferedInputStream(gis), null);

    long bases = 0;
    long time3 = System.nanoTime();
    for (counter = 0; counter < maxRecords; counter++)
      bases += c.read(null).length;
    long time4 = System.nanoTime();

    System.out.printf("ByteArrayStopCodec: bases %d, %d ms, %d ms.\n", bases,
        (writeNanos) / 1000000, (time4 - time3) / 1000000);
    reader.close();
  }
View Full Code Here

    reader.close();
  }

  @Test
  public void test2() throws IOException {
    SAMFileReader reader = new SAMFileReader(
        new File(
            "c:/temp/HG00096.mapped.illumina.mosaik.GBR.exome.20110411.chr20.bam"));
    SAMRecordIterator iterator = reader.iterator();

    ByteArrayOutputStream baos = new ByteArrayOutputStream();
    GZIPOutputStream gos = new GZIPOutputStream(baos);
    BufferedOutputStream bos = new BufferedOutputStream(gos) ;

    ExternalByteArrayCodec c = new ExternalByteArrayCodec(
        bos, null);
    int maxRecords = 100000;
    int counter = 0;

    long writeNanos = 0;
    java.util.List<SAMRecord> records = new ArrayList<SAMRecord>(2*maxRecords) ;
    while (iterator.hasNext() && counter++ <= maxRecords) {
      SAMRecord r = iterator.next();
      records.add(r) ;
      byte[] scores = r.getBaseQualities();
      long time1 = System.nanoTime();
      c.write(null, scores);
      writeNanos += System.nanoTime() - time1;
    }
    bos.close();

    ByteArrayInputStream bais = new ByteArrayInputStream(baos.toByteArray());
    GZIPInputStream gis = new GZIPInputStream(bais);
    c = new ExternalByteArrayCodec(null, new BufferedInputStream(gis));

    long bases = 0;
    long readNanos = 0 ;
    for (counter = 0; counter < records.size(); counter++) {
      SAMRecord record = records.get(counter) ;
       
      long time4 = System.nanoTime();
      bases += c.read(null, record.getReadLength()).length;
      readNanos += System.nanoTime() - time4 ;
    }

    System.out.printf("ExternalByteArrayCodec: bases %d, %d ms, %d ms.\n", bases,
        (writeNanos) / 1000000, (readNanos) / 1000000);
    reader.close();
  }
View Full Code Here

      System.exit(1);
    }

    Log.setGlobalLogLevel(params.logLevel);

    SAMFileReader reader = new SAMFileReader(params.bamFile);
    SAMRecordIterator iterator = reader.iterator();

    Map<String, Out> map = new TreeMap<String, DecomposeBAM.Out>();
    long bases = 0;
    while (iterator.hasNext() && params.maxRecords-- > 0) {
      SAMRecord record = iterator.next();
      bases += record.getReadLength();

      put(map, "QNAME", record.getReadName());
      put(map, "FLAG", String.valueOf(record.getFlags()));
      put(map, "RNAME", record.getReferenceName());
      put(map, "POS", String.valueOf(record.getAlignmentStart()));
      put(map, "MAPQ", String.valueOf(record.getMappingQuality()));
      put(map, "CIGAR", record.getCigarString());
      put(map, "RNEXT", record.getMateReferenceName());
      put(map, "PNEXT", String.valueOf(record.getMateAlignmentStart()));
      put(map, "TLEN", String.valueOf(record.getInferredInsertSize()));
      put(map, "SEQ", record.getReadString());
      put(map, "QUAL", record.getBaseQualityString());

      for (SAMTagAndValue tv : record.getAttributes()) {
        writeTagValue(map, tv);
      }
    }

    close(map);
    iterator.close();
    reader.close();

    long totalBytes = 0;
    for (Out out : map.values())
      totalBytes += out.file.length();
View Full Code Here

    if (params.referenceFasta != null)
      System.setProperty("reference",
          params.referenceFasta.getAbsolutePath());

    SAMFileReader.setDefaultValidationStringency(ValidationStringency.SILENT) ;
    SAMFileReader r1 = new SAMFileReader(params.file1);
    SAMFileReader r2 = new SAMFileReader(params.file2);

    SAMRecordIterator it1, it2;
    if (params.location != null) {
      AlignmentSliceQuery query = new AlignmentSliceQuery(params.location);
      if (SAMRecord.NO_ALIGNMENT_REFERENCE_NAME.equals(query.sequence)) {
        it1 = r1.queryUnmapped();
        it2 = r2.queryUnmapped();
      } else {
        it1 = r1.queryContained(query.sequence, query.start, query.end);
        it2 = r2.queryContained(query.sequence, query.start, query.end);
      }
    } else {
      it1 = r1.iterator();
      it2 = r2.iterator();
    }

    SamRecordComparision c = new SamRecordComparision();
    c.maxValueLen = params.maxValueLength;
    c.compareTags = params.compareTags;
    c.ignoreFlags = params.ignoreFalgs;
    c.ignoreTLENDiff = params.ignoreTLENDiff;
    c.maxValueLen = params.maxValueLength ;

    if (params.ignoreTags != null) {
      String chunks[] = params.ignoreTags.split(":");
      for (String tagId : chunks) {
        if (!tagId.matches("^[A-Z]{2}$"))
          throw new RuntimeException(
              "Expecting tag id to match ^[A-Z]{2}$ but got this: "
                  + tagId);
        c.tagsToIgnore.add(tagId);
      }
    }

    if (params.ignoreFields != null) {
      String chunks[] = params.ignoreFields.split(":");
      for (String fieldName : chunks) {
        FIELD_TYPE type = FIELD_TYPE.valueOf(fieldName);
        c.fields.remove(type);
      }
    }

    List<SamRecordDiscrepancy> discrepancies = c.compareRecords(it1, it2,
        params.maxDiscrepancies);

    if (params.countOnly)
      System.out.println(discrepancies.size());
    else if (params.dbDumpFile == null) {
      if (discrepancies.isEmpty())
        System.out.println("No discrepancies found");
      else
        System.out.println("Found discrepansies: "
            + discrepancies.size());
      if (params.dumpDiscrepancies)
        for (SamRecordDiscrepancy d : discrepancies)
          c.log(d, params.dumpRecords, System.out);

      c.summary(discrepancies, System.out);
    } else {
      db(params.dbDumpFile, "discrepancy".toUpperCase(),
          discrepancies.iterator());
    }

    r1.close();
    r2.close();

  }
View Full Code Here

      source.id = file.getName();
      source.path = file.getAbsolutePath();

      File index = new File(file.getAbsolutePath() + ".bai");
      if (index.exists()) {
        SAMFileReader reader = new SAMFileReader(file, index);
        source.reader = reader;
        if (query == null)
          source.it = reader.iterator();
        else
          source.it = reader.query(query.sequence, query.start,
              query.end, true);
      } else {
        index = new File(file.getAbsolutePath() + ".crai");
        if (index.exists()) {
          SAMFileReader reader = new SAMFileReader(file);
          source.reader = reader;
          if (query == null)
            source.it = reader.iterator();
          else {
            SeekableFileStream is = new SeekableFileStream(file);

            FileInputStream fis = new FileInputStream(index);
            GZIPInputStream gis = new GZIPInputStream(
                new BufferedInputStream(fis));
            BufferedInputStream bis = new BufferedInputStream(gis);
            List<CramIndex.Entry> full = CramIndex.readIndex(gis);

            List<CramIndex.Entry> entries = new LinkedList<CramIndex.Entry>();
            SAMSequenceRecord sequence = reader.getFileHeader()
                .getSequence(query.sequence);
            if (sequence == null)
              throw new RuntimeException("Sequence not found: "
                  + query.sequence);

            entries.addAll(CramIndex.find(full,
                sequence.getSequenceIndex(), query.start,
                query.end - query.start));

            bis.close();

            SAMIterator it = new SAMIterator(is, refFile);
            is.seek(entries.get(0).containerStartOffset);
            BAMQueryFilteringIterator bit = new BAMQueryFilteringIterator(
                it, query.sequence, query.start, query.end,
                BAMQueryFilteringIterator.QueryType.CONTAINED,
                reader.getFileHeader());
            source.it = bit;
          }
        } else {
          SAMFileReader reader = new SAMFileReader(file);
          source.reader = reader;
          source.it = reader.iterator();
        }
      }
    }

    return sources;
View Full Code Here

    } else {
      if (!params.bai)
        throw new RuntimeException(
            "CRAM index is not compatible with BAM files.");

      SAMFileReader reader = new SAMFileReader(params.inputFile);
      if (!reader.isBinary()) {
        reader.close();
        throw new SAMException(
            "Input file must be bam file, not sam file.");
      }

      if (!reader.getFileHeader().getSortOrder()
          .equals(SAMFileHeader.SortOrder.coordinate)) {
        reader.close();
        throw new SAMException(
            "Input bam file must be sorted by coordinates");
      }

      File indexFile = new File(params.inputFile.getAbsolutePath()
          + ".bai");
      BAMIndexer indexer = new BAMIndexer(indexFile,
          reader.getFileHeader());
      for (SAMRecord record : reader) {
        indexer.processAlignment(record);
      }
      indexer.finish();
      reader.close();
    }

  }
View Full Code Here

      System.setProperty("reference",
          params.referenceFasta.getAbsolutePath());

    Log.setGlobalLogLevel(params.logLevel);

    SAMFileReader reader = null;
    if (params.file != null)
      reader = new SAMFileReader(params.file);
    else
      reader = new SAMFileReader(System.in);

    SAMRecordIterator it = null;
    if (params.location == null)
      it = reader.iterator();
    else {
      AlignmentSliceQuery query = new AlignmentSliceQuery(params.location);
      it = reader.query(query.sequence, query.start, query.end, false);
    }

    it = new BoundSAMRecordIterator(it, params.skipRecords,
        params.skipRecords + params.maxRecords);
    SAMFieldSelector s = new SAMFieldSelector(params.fields);
    SAMSubRecordStreamWriter writer = new SAMSubRecordStreamWriter();
    writer.selector = s;
    writer.ps = System.out;
    writer.enumerate = params.enumerate;
    long counter = params.skipRecords + 1;

    while (it.hasNext()) {
      SAMRecord record = it.next();
      writer.write(record, counter++);
    }

    it.close();
    reader.close();
  }
View Full Code Here

    File bamFile = params.bamFile;
    SAMFileReader
        .setDefaultValidationStringency(ValidationStringency.SILENT);

    SAMFileReader samFileReader = null;
    if (params.bamFile == null) {
      log.warn("No input file, reading from input...");
      samFileReader = new SAMFileReader(System.in);
    } else
      samFileReader = new SAMFileReader(bamFile);

    ReferenceSequenceFile referenceSequenceFile = ReferenceSequenceFileFactory
        .getReferenceSequenceFile(params.referenceFasta);

    BLOCK_PROTO.recordsPerSlice = params.maxSliceSize;
    ReferenceSequence sequence = null;
    List<SAMRecord> samRecords = new ArrayList<SAMRecord>(
        params.maxContainerSize);
    int prevSeqId = SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX;
    SAMRecordIterator iterator = samFileReader.iterator();
    {
      String seqName = null;
      SAMRecord samRecord = iterator.next();
      if (samRecord == null)
        throw new RuntimeException("No records found.");
      seqName = samRecord.getReferenceName();
      prevSeqId = samRecord.getReferenceIndex();
      samRecords.add(samRecord);

      // if (samFileReader.getFileHeader().getReadGroups().isEmpty()
      // || samFileReader.getFileHeader().getReadGroup(
      // Sam2CramRecordFactory.UNKNOWN_READ_GROUP_ID) == null) {
      // log.info("Adding default read group.");
      // SAMReadGroupRecord readGroup = new SAMReadGroupRecord(
      // Sam2CramRecordFactory.UNKNOWN_READ_GROUP_ID);
      //
      // readGroup
      // .setSample(Sam2CramRecordFactory.UNKNOWN_READ_GROUP_SAMPLE);
      // samFileReader.getFileHeader().addReadGroup(readGroup);
      // }

      if (SAMRecord.NO_ALIGNMENT_REFERENCE_NAME.equals(seqName))
        sequence = null;
      else
        sequence = Utils.trySequenceNameVariants(referenceSequenceFile,
            seqName);

    }

    QualityScorePreservation preservation;
    if (params.losslessQS)
      preservation = new QualityScorePreservation("*40");
    else
      preservation = new QualityScorePreservation(params.qsSpec);

    byte[] ref = sequence == null ? new byte[0] : sequence.getBases();

    {
      // hack:
      int newLines = 0;
      for (byte b : ref)
        if (b == 10)
          newLines++;
      byte[] ref2 = new byte[ref.length - newLines];
      int j = 0;
      for (int i = 0; i < ref.length; i++)
        if (ref[i] == 10)
          continue;
        else
          ref2[j++] = ref[i];
      ref = ref2;
    }

    OutputStream os;
    if (params.outputCramFile != null) {
      FileOutputStream fos = new FileOutputStream(params.outputCramFile);
      os = new BufferedOutputStream(fos);
    } else {
      log.warn("No output file, writint to STDOUT.");
      os = System.out;
    }

    if (params.encrypt) {
      CipherOutputStream_256 cos = new CipherOutputStream_256(os, pass,
          128);
      os = cos.getCipherOutputStream();
    }

    CramHeader h = new CramHeader(2, 0, params.bamFile == null ? "STDIN"
        : params.bamFile.getName(), samFileReader.getFileHeader());
    long offset = ReadWrite.writeCramHeader(h, os);

    long bases = 0;
    long coreBytes = 0;
    long[] externalBytes = new long[10];
    BLOCK_PROTO.recordsPerSlice = params.maxSliceSize;
    long globalRecordCounter = 0;
    MessageDigest md5_MessageDigest = MessageDigest.getInstance("MD5");

    do {
      if (params.outputCramFile == null && System.out.checkError())
        return;

      SAMRecord samRecord = iterator.next();
      if (samRecord == null)
        // no more records
        break;
      if (samRecord.getReferenceIndex() != prevSeqId
          || samRecords.size() >= params.maxContainerSize) {
        if (!samRecords.isEmpty()) {
          List<CramRecord> records = convert(samRecords,
              samFileReader.getFileHeader(), ref, preservation,
              params.captureAllTags, params.captureTags,
              params.ignoreTags);
          samRecords.clear();

          Container container = BLOCK_PROTO.buildContainer(records,
              samFileReader.getFileHeader(),
              params.preserveReadNames, globalRecordCounter,
              null, true);
          for (Slice s : container.slices) {
            if (s.alignmentStart < 1) {
              s.refMD5 = new byte[16];
              Arrays.fill(s.refMD5, (byte) 0);
              continue;
            }

            md5_MessageDigest.reset();

            int span = Math.min(s.alignmentSpan, ref.length
                - s.alignmentStart);
            if (s.alignmentStart == 0)
              System.out.println("gotcha");

            md5_MessageDigest.update(ref, s.alignmentStart - 1,
                span);

            String sliceRef = new String(ref, s.alignmentStart - 1,
                Math.min(span, 30));
            s.refMD5 = md5_MessageDigest.digest();
            log.debug("Slice ref starts with: " + sliceRef);
            log.debug("Slice ref md5: "
                + (String.format("%032x", new BigInteger(1,
                    s.refMD5))));
          }
          globalRecordCounter += records.size();
          records.clear();
          long len = ReadWrite.writeContainer(container, os);
          container.offset = offset;
          offset += len;

          log.info(String
              .format("CONTAINER WRITE TIMES: header build time %dms, slices build time %dms, io time %dms.",
                  container.buildHeaderTime / 1000000,
                  container.buildSlicesTime / 1000000,
                  container.writeTime / 1000000));

          for (Slice s : container.slices) {
            coreBytes += s.coreBlock.getCompressedContent().length;
            for (Integer i : s.external.keySet())
              externalBytes[i] += s.external.get(i)
                  .getCompressedContent().length;
          }
        }
      }

      if (prevSeqId != samRecord.getReferenceIndex()) {
        if (samRecord.getReferenceIndex() != SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX) {
          sequence = Utils
              .trySequenceNameVariants(referenceSequenceFile,
                  samRecord.getReferenceName());
          ref = sequence.getBases();
          {
            // hack:
            int newLines = 0;
            for (byte b : ref)
              if (b == 10)
                newLines++;
            byte[] ref2 = new byte[ref.length - newLines];
            int j = 0;
            for (int i = 0; i < ref.length; i++)
              if (ref[i] == 10)
                continue;
              else
                ref2[j++] = ref[i];
            ref = ref2;
          }
        } else
          ref = new byte[] {};
        prevSeqId = samRecord.getReferenceIndex();
      }

      samRecords.add(samRecord);
      bases += samRecord.getReadLength();

      if (params.maxRecords-- < 1)
        break;
    } while (iterator.hasNext());

    { // copied for now, should be a subroutine:
      if (!samRecords.isEmpty()) {
        List<CramRecord> records = convert(samRecords,
            samFileReader.getFileHeader(), ref, preservation,
            params.captureAllTags, params.captureTags,
            params.ignoreTags);
        samRecords.clear();
        Container container = BLOCK_PROTO.buildContainer(records,
            samFileReader.getFileHeader(),
            params.preserveReadNames, globalRecordCounter, null,
            true);
        for (Slice s : container.slices) {
          if (s.alignmentStart < 1) {
            s.refMD5 = new byte[16];
            Arrays.fill(s.refMD5, (byte) 0);
            continue;
          }

          md5_MessageDigest.reset();

          int span = Math.min(s.alignmentSpan, ref.length
              - s.alignmentStart);
          if (s.alignmentStart == 0)
            System.out.println("gotcha");

          md5_MessageDigest.update(ref, s.alignmentStart - 1,
              span);

          String sliceRef = new String(ref, s.alignmentStart - 1,
              Math.min(span, 30));
          s.refMD5 = md5_MessageDigest.digest();
          log.debug("Slice ref starts with: " + sliceRef);
          log.debug("Slice ref md5: "
              + (String.format("%032x", new BigInteger(1,
                  s.refMD5))));
        }
        records.clear();
        ReadWrite.writeContainer(container, os);
        log.info(String
            .format("CONTAINER WRITE TIMES: header build time %dms, slices build time %dms, io time %dms.",
                container.buildHeaderTime / 1000000,
                container.buildSlicesTime / 1000000,
                container.writeTime / 1000000));

        for (Slice s : container.slices) {
          coreBytes += s.coreBlock.getCompressedContent().length;
          for (Integer i : s.external.keySet())
            externalBytes[i] += s.external.get(i)
                .getCompressedContent().length;
        }
      }
    }

    iterator.close();
    samFileReader.close();
    os.close();

    StringBuilder sb = new StringBuilder();
    sb.append(String.format("STATS: core %.2f b/b", 8f * coreBytes / bases));
    for (int i = 0; i < externalBytes.length; i++)
View Full Code Here

  public static void main(String[] args, boolean preserveReadNames)
      throws IllegalArgumentException, IllegalAccessException,
      IOException {
    File bamFile = new File(
        "c:/temp/HG00096.mapped.illumina.mosaik.GBR.exome.20110411.chr20.bam");
    SAMFileReader samFileReader = new SAMFileReader(bamFile);
    ReferenceSequenceFile referenceSequenceFile = ReferenceSequenceFileFactory
        .getReferenceSequenceFile(new File(
            "c:/temp/human_g1k_v37.fasta"));

    ReferenceSequence sequence = null;
    {
      String seqName = null;
      SAMRecordIterator iterator = samFileReader.iterator();
      SAMRecord samRecord = iterator.next();
      seqName = samRecord.getReferenceName();
      iterator.close();
      sequence = referenceSequenceFile.getSequence(seqName);
    }

    int maxRecords = 100000;
    List<SAMRecord> samRecords = new ArrayList<SAMRecord>(maxRecords);

    int alStart = Integer.MAX_VALUE;
    int alEnd = 0;
    long baseCount = 0;
    SAMRecordIterator iterator = samFileReader.iterator();

    do {
      SAMRecord samRecord = iterator.next();
      if (!samRecord.getReferenceName().equals(sequence.getName()))
        break;

      baseCount += samRecord.getReadLength();
      samRecords.add(samRecord);
      if (samRecord.getAlignmentStart() > 0
          && alStart > samRecord.getAlignmentStart())
        alStart = samRecord.getAlignmentStart();
      if (alEnd < samRecord.getAlignmentEnd())
        alEnd = samRecord.getAlignmentEnd();

      if (samRecords.size() >= maxRecords)
        break;
    } while (iterator.hasNext());

    ReferenceTracks tracks = new ReferenceTracks(sequence.getContigIndex(),
        sequence.getName(), sequence.getBases(), alEnd - alStart + 100);
    tracks.moveForwardTo(alStart);

    Sam2CramRecordFactory f = new Sam2CramRecordFactory(
        sequence.getBases(), samFileReader.getFileHeader());
    f.captureUnmappedBases = true;
    f.captureUnmappedScores = true;
    List<CramRecord> cramRecords = new ArrayList<CramRecord>(maxRecords);
    int prevAlStart = samRecords.get(0).getAlignmentStart();
    int index = 0;
    QualityScorePreservation preservation = new QualityScorePreservation(
        "R8X10-R40X5-N40-U40");
    for (SAMRecord samRecord : samRecords) {
      CramRecord cramRecord = f.createCramRecord(samRecord);
      cramRecord.index = index++;
      cramRecord.alignmentStartOffsetFromPreviousRecord = samRecord
          .getAlignmentStart() - prevAlStart;
      prevAlStart = samRecord.getAlignmentStart();

      cramRecords.add(cramRecord);
      int refPos = samRecord.getAlignmentStart();
      int readPos = 0;
      for (CigarElement ce : samRecord.getCigar().getCigarElements()) {
        if (ce.getOperator().consumesReferenceBases()) {
          for (int i = 0; i < ce.getLength(); i++)
            tracks.addCoverage(refPos + i, 1);
        }
        switch (ce.getOperator()) {
        case M:
        case X:
        case EQ:
          for (int i = readPos; i < ce.getLength(); i++) {
            byte readBase = samRecord.getReadBases()[readPos + i];
            byte refBase = tracks.baseAt(refPos + i);
            if (readBase != refBase)
              tracks.addMismatches(refPos + i, 1);
          }
          break;

        default:
          break;
        }

        readPos += ce.getOperator().consumesReadBases() ? ce
            .getLength() : 0;
        refPos += ce.getOperator().consumesReferenceBases() ? ce
            .getLength() : 0;
      }

      preservation.addQualityScores(samRecord, cramRecord, tracks);
    }

    // mating:
    Map<String, CramRecord> mateMap = new TreeMap<String, CramRecord>();
    for (CramRecord r : cramRecords) {
      if (r.lastSegment) {
        r.recordsToNextFragment = -1;
        if (r.firstSegment)
          continue;
      }

      String name = r.getReadName();
      CramRecord mate = mateMap.get(name);
      if (mate == null) {
        mateMap.put(name, r);
        continue;
      }

      mate.recordsToNextFragment = r.index - mate.index - 1;
      mate.next = r;
      r.previous = mate;
    }
    for (CramRecord r : cramRecords) {
      if (!r.lastSegment && r.next == null)
        r.detached = true;
    }

    for (int i = 0; i < Math.min(cramRecords.size(), 10); i++)
      System.out.println(cramRecords.get(i).toString());

    System.out.println();

    long time1 = System.nanoTime();
    Container c = buildContainer(cramRecords,
        samFileReader.getFileHeader(), preserveReadNames, 0, null, true);
    long time2 = System.nanoTime();
    System.out.println("Container written in " + (time2 - time1) / 1000000
        + " milli seconds");

    ArrayList<CramRecord> newRecords = new ArrayList<CramRecord>();
    time1 = System.nanoTime();
    getRecords(c.h, c, samFileReader.getFileHeader(), newRecords);

    mateMap.clear();
    {
      int i = 0;
      for (CramRecord r : newRecords)
        r.index = i++;
    }
    for (CramRecord r : newRecords) {
      if (r.lastSegment) {
        r.recordsToNextFragment = -1;
        if (r.firstSegment)
          continue;
      }

      String name = r.getReadName();
      CramRecord mate = mateMap.get(name);
      if (mate == null) {
        mateMap.put(name, r);
        continue;
      }

      mate.recordsToNextFragment = r.index - mate.index - 1;
      mate.next = r;
      r.previous = mate;
    }
    for (CramRecord r : newRecords) {
      if (!r.lastSegment && r.next == null)
        r.detached = true;
    }

    time2 = System.nanoTime();
    System.out.println("Container read in " + (time2 - time1) / 1000000
        + " milli seconds");

    for (int i = 0; i < Math.min(newRecords.size(), 10); i++)
      System.out.println(newRecords.get(i).toString());

    ByteArrayOutputStream baos = new ByteArrayOutputStream();
    long size = 0;
    for (Slice s : c.slices) {
      size += s.coreBlock.getCompressedContent().length;
      for (Block b : s.external.values()) {
        size += b.getCompressedContent().length;
      }
    }
    System.out.println("Bases: " + baseCount);
    System.out.printf("Uncompressed container size: %d; %.2f\n", size, size
        * 8f / baseCount);
    System.out.printf("Compressed container size: %d; %.2f\n", baos.size(),
        baos.size() * 8f / baseCount);

    File cramFile = new File("c:/temp/test1.cram1");
    FileOutputStream fos = new FileOutputStream(cramFile);
    BufferedOutputStream bos = new BufferedOutputStream(fos);
    CramHeader cramHeader = new CramHeader(1, 0, bamFile.getName(),
        samFileReader.getFileHeader());

    ReadWrite.writeCramHeader(cramHeader, bos);
    ReadWrite.writeContainer(c, bos);
    bos.close();
    fos.close();
View Full Code Here

TOP

Related Classes of net.sf.samtools.SAMFileReader

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.