Package net.sf.picard.reference

Examples of net.sf.picard.reference.ReferenceSequence


    int coverage;
    int previous_position = 0;

    final Map<SNPCallPair, TreeMap<Integer, Long>> calltable_validation = new HashMap<SNPCallPair, TreeMap<Integer, Long>>();

    ReferenceSequence reference = null;

    while ((reference = reference_file.nextSequence()) != null) {
      for (SamPositionIterator position_iterator : positionIterators) {
        position_iterator.nextSequence();
        String ref_name = reference.getName();

        if (!position_iterator.getCurrentSequence().equals(ref_name))
          continue;

        System.out.println("Processing reads on sequence " + ref_name);

        byte[] ref_array = reference.getBases();

        // Iterate through loci with enough coverage
        while (position_iterator.hasMoreElements()) {

          PositionInfo p = position_iterator.nextElement();
View Full Code Here


      if (tagNucleotideDiffs == null) {
        addError(new SAMValidationError(Type.MISSING_TAG_NM,
            "NM tag (nucleotide differences) is missing",
            record.getReadName(), recordNumber));
      } else if (refFileWalker != null) {
        final ReferenceSequence refSequence = refFileWalker.get(record
            .getReferenceIndex());
        final int actualNucleotideDiffs = SequenceUtil
            .calculateSamNmTag(record, refSequence.getBases(), 0,
                isBisulfiteSequenced());

        if (!tagNucleotideDiffs.equals(actualNucleotideDiffs)) {
          addError(new SAMValidationError(Type.INVALID_TAG_NM,
              "NM tag (nucleotide differences) in file ["
View Full Code Here

  public static byte[] getBasesFromReferenceFile(String referenceFilePath,
      String seqName, int from, int length) {
    ReferenceSequenceFile referenceSequenceFile = ReferenceSequenceFileFactory
        .getReferenceSequenceFile(new File(referenceFilePath));
    ReferenceSequence sequence = referenceSequenceFile.getSequence(seqName);
    byte[] bases = referenceSequenceFile.getSubsequenceAt(
        sequence.getName(), from, from + length).getBases();
    return bases;
  }
View Full Code Here

              + file.getAbsolutePath() + "'");
  }

  public static ReferenceSequence getReferenceSequenceOrNull(
      ReferenceSequenceFile rsFile, String name) {
    ReferenceSequence rs = null;
    try {
      return rsFile.getSequence(name);
    } catch (PicardException e) {
      return null;
    }
View Full Code Here

  private static final Pattern chrPattern = Pattern.compile("chr.*",
      Pattern.CASE_INSENSITIVE);

  public static ReferenceSequence trySequenceNameVariants(
      ReferenceSequenceFile rsFile, String name) {
    ReferenceSequence rs = getReferenceSequenceOrNull(rsFile, name);

    if (rs == null && name.equals("M")) {
      rs = getReferenceSequenceOrNull(rsFile, "MT");
    }

View Full Code Here

  }

  public static byte[] getBasesOrNull(ReferenceSequenceFile rsFile,
      String name, int start, int len) {

    ReferenceSequence rs = trySequenceNameVariants(rsFile, name);
    if (rs == null)
      return null;

    if (len < 1)
      return rs.getBases();
    else
      return rsFile.getSubsequenceAt(rs.getName(), 1, len).getBases();
  }
View Full Code Here

  public static void checkRefMD5(SAMSequenceDictionary d,
      ReferenceSequenceFile refFile, boolean checkExistingMD5,
      boolean failIfMD5Mismatch) throws NoSuchAlgorithmException {

    for (SAMSequenceRecord r : d.getSequences()) {
      ReferenceSequence sequence = refFile.getSequence(r
          .getSequenceName());
      if (!r.getAttributes().contains(SAMSequenceRecord.MD5_TAG)) {
        String md5 = calculateMD5(sequence.getBases());
        r.setAttribute(SAMSequenceRecord.MD5_TAG, md5);
      } else {
        if (checkExistingMD5) {
          String existingMD5 = r
              .getAttribute(SAMSequenceRecord.MD5_TAG);
          String md5 = calculateMD5(sequence.getBases());
          if (!md5.equals(existingMD5)) {

            String message = String
                .format("For sequence %s the md5 %s does not match the actual md5 %s.",
                    r.getSequenceName(), existingMD5, md5);
View Full Code Here

      if (c.sequenceId == SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX) {
        ref = new byte[] {};
      } else if (prevSeqId < 0 || prevSeqId != c.sequenceId) {
        SAMSequenceRecord sequence = cramHeader.samFileHeader
            .getSequence(c.sequenceId);
        ReferenceSequence referenceSequence = Utils
            .trySequenceNameVariants(referenceSequenceFile,
                sequence.getSequenceName());
        ref = referenceSequence.getBases();
        {
          // hack:
          int newLines = 0;
          for (byte b : ref)
            if (b == 10)
View Full Code Here

    ReferenceSequenceFile referenceSequenceFile = ReferenceSequenceFileFactory
        .getReferenceSequenceFile(params.referenceFasta);

    BLOCK_PROTO.recordsPerSlice = params.maxSliceSize;
    ReferenceSequence sequence = null;
    List<SAMRecord> samRecords = new ArrayList<SAMRecord>(
        params.maxContainerSize);
    int prevSeqId = SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX;
    SAMRecordIterator iterator = samFileReader.iterator();
    {
      String seqName = null;
      SAMRecord samRecord = iterator.next();
      if (samRecord == null)
        throw new RuntimeException("No records found.");
      seqName = samRecord.getReferenceName();
      prevSeqId = samRecord.getReferenceIndex();
      samRecords.add(samRecord);

      // if (samFileReader.getFileHeader().getReadGroups().isEmpty()
      // || samFileReader.getFileHeader().getReadGroup(
      // Sam2CramRecordFactory.UNKNOWN_READ_GROUP_ID) == null) {
      // log.info("Adding default read group.");
      // SAMReadGroupRecord readGroup = new SAMReadGroupRecord(
      // Sam2CramRecordFactory.UNKNOWN_READ_GROUP_ID);
      //
      // readGroup
      // .setSample(Sam2CramRecordFactory.UNKNOWN_READ_GROUP_SAMPLE);
      // samFileReader.getFileHeader().addReadGroup(readGroup);
      // }

      if (SAMRecord.NO_ALIGNMENT_REFERENCE_NAME.equals(seqName))
        sequence = null;
      else
        sequence = Utils.trySequenceNameVariants(referenceSequenceFile,
            seqName);

    }

    QualityScorePreservation preservation;
    if (params.losslessQS)
      preservation = new QualityScorePreservation("*40");
    else
      preservation = new QualityScorePreservation(params.qsSpec);

    byte[] ref = sequence == null ? new byte[0] : sequence.getBases();

    {
      // hack:
      int newLines = 0;
      for (byte b : ref)
        if (b == 10)
          newLines++;
      byte[] ref2 = new byte[ref.length - newLines];
      int j = 0;
      for (int i = 0; i < ref.length; i++)
        if (ref[i] == 10)
          continue;
        else
          ref2[j++] = ref[i];
      ref = ref2;
    }

    OutputStream os;
    if (params.outputCramFile != null) {
      FileOutputStream fos = new FileOutputStream(params.outputCramFile);
      os = new BufferedOutputStream(fos);
    } else {
      log.warn("No output file, writint to STDOUT.");
      os = System.out;
    }

    if (params.encrypt) {
      CipherOutputStream_256 cos = new CipherOutputStream_256(os, pass,
          128);
      os = cos.getCipherOutputStream();
    }

    CramHeader h = new CramHeader(2, 0, params.bamFile == null ? "STDIN"
        : params.bamFile.getName(), samFileReader.getFileHeader());
    long offset = ReadWrite.writeCramHeader(h, os);

    long bases = 0;
    long coreBytes = 0;
    long[] externalBytes = new long[10];
    BLOCK_PROTO.recordsPerSlice = params.maxSliceSize;
    long globalRecordCounter = 0;
    MessageDigest md5_MessageDigest = MessageDigest.getInstance("MD5");

    do {
      if (params.outputCramFile == null && System.out.checkError())
        return;

      SAMRecord samRecord = iterator.next();
      if (samRecord == null)
        // no more records
        break;
      if (samRecord.getReferenceIndex() != prevSeqId
          || samRecords.size() >= params.maxContainerSize) {
        if (!samRecords.isEmpty()) {
          List<CramRecord> records = convert(samRecords,
              samFileReader.getFileHeader(), ref, preservation,
              params.captureAllTags, params.captureTags,
              params.ignoreTags);
          samRecords.clear();

          Container container = BLOCK_PROTO.buildContainer(records,
              samFileReader.getFileHeader(),
              params.preserveReadNames, globalRecordCounter,
              null, true);
          for (Slice s : container.slices) {
            if (s.alignmentStart < 1) {
              s.refMD5 = new byte[16];
              Arrays.fill(s.refMD5, (byte) 0);
              continue;
            }

            md5_MessageDigest.reset();

            int span = Math.min(s.alignmentSpan, ref.length
                - s.alignmentStart);
            if (s.alignmentStart == 0)
              System.out.println("gotcha");

            md5_MessageDigest.update(ref, s.alignmentStart - 1,
                span);

            String sliceRef = new String(ref, s.alignmentStart - 1,
                Math.min(span, 30));
            s.refMD5 = md5_MessageDigest.digest();
            log.debug("Slice ref starts with: " + sliceRef);
            log.debug("Slice ref md5: "
                + (String.format("%032x", new BigInteger(1,
                    s.refMD5))));
          }
          globalRecordCounter += records.size();
          records.clear();
          long len = ReadWrite.writeContainer(container, os);
          container.offset = offset;
          offset += len;

          log.info(String
              .format("CONTAINER WRITE TIMES: header build time %dms, slices build time %dms, io time %dms.",
                  container.buildHeaderTime / 1000000,
                  container.buildSlicesTime / 1000000,
                  container.writeTime / 1000000));

          for (Slice s : container.slices) {
            coreBytes += s.coreBlock.getCompressedContent().length;
            for (Integer i : s.external.keySet())
              externalBytes[i] += s.external.get(i)
                  .getCompressedContent().length;
          }
        }
      }

      if (prevSeqId != samRecord.getReferenceIndex()) {
        if (samRecord.getReferenceIndex() != SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX) {
          sequence = Utils
              .trySequenceNameVariants(referenceSequenceFile,
                  samRecord.getReferenceName());
          ref = sequence.getBases();
          {
            // hack:
            int newLines = 0;
            for (byte b : ref)
              if (b == 10)
View Full Code Here

    SAMFileReader samFileReader = new SAMFileReader(bamFile);
    ReferenceSequenceFile referenceSequenceFile = ReferenceSequenceFileFactory
        .getReferenceSequenceFile(new File(
            "c:/temp/human_g1k_v37.fasta"));

    ReferenceSequence sequence = null;
    {
      String seqName = null;
      SAMRecordIterator iterator = samFileReader.iterator();
      SAMRecord samRecord = iterator.next();
      seqName = samRecord.getReferenceName();
      iterator.close();
      sequence = referenceSequenceFile.getSequence(seqName);
    }

    int maxRecords = 100000;
    List<SAMRecord> samRecords = new ArrayList<SAMRecord>(maxRecords);

    int alStart = Integer.MAX_VALUE;
    int alEnd = 0;
    long baseCount = 0;
    SAMRecordIterator iterator = samFileReader.iterator();

    do {
      SAMRecord samRecord = iterator.next();
      if (!samRecord.getReferenceName().equals(sequence.getName()))
        break;

      baseCount += samRecord.getReadLength();
      samRecords.add(samRecord);
      if (samRecord.getAlignmentStart() > 0
          && alStart > samRecord.getAlignmentStart())
        alStart = samRecord.getAlignmentStart();
      if (alEnd < samRecord.getAlignmentEnd())
        alEnd = samRecord.getAlignmentEnd();

      if (samRecords.size() >= maxRecords)
        break;
    } while (iterator.hasNext());

    ReferenceTracks tracks = new ReferenceTracks(sequence.getContigIndex(),
        sequence.getName(), sequence.getBases(), alEnd - alStart + 100);
    tracks.moveForwardTo(alStart);

    Sam2CramRecordFactory f = new Sam2CramRecordFactory(
        sequence.getBases(), samFileReader.getFileHeader());
    f.captureUnmappedBases = true;
    f.captureUnmappedScores = true;
    List<CramRecord> cramRecords = new ArrayList<CramRecord>(maxRecords);
    int prevAlStart = samRecords.get(0).getAlignmentStart();
    int index = 0;
View Full Code Here

TOP

Related Classes of net.sf.picard.reference.ReferenceSequence

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.