Package net.sf.samtools

Examples of net.sf.samtools.SAMFileReader


  }
 
  private void downsampleSam(String sam, String downsampledSam, double keepProbability) {
    System.out.println("keepProbability: " + keepProbability);
   
    SAMFileReader reader = new SAMFileReader(new File(sam));
    reader.setValidationStringency(ValidationStringency.SILENT);
   
    SAMFileWriter downsampleOutput = new SAMFileWriterFactory().makeSAMOrBAMWriter(
        samHeaders[0], true, new File(downsampledSam));

    Random random = new Random(RANDOM_SEED);
    int downsampleCount = 0;
   
    for (SAMRecord read : reader) {
      if (random.nextDouble() < keepProbability) {
        downsampleOutput.addAlignment(read);
        downsampleCount += 1;
      }
    }
   
    downsampleOutput.close();
    reader.close();
   
    System.out.println("Downsampled to: " + downsampleCount);
  }
View Full Code Here


   
    log("Identifying header and determining read length");
   
    this.samHeaders = new SAMFileHeader[this.inputSams.length];
   
    SAMFileReader reader = new SAMFileReader(new File(inputSams[0]));
    try {
      reader.setValidationStringency(ValidationStringency.SILENT);
 
      //ASSUMPTION!: All samples have same read length & insert len!
      samHeaders[0] = reader.getFileHeader();
      samHeaders[0].setSortOrder(SAMFileHeader.SortOrder.unsorted);
     
      Iterator<SAMRecord> iter = reader.iterator();
     
      int cnt = 0;
      while ((iter.hasNext()) && (cnt < 1000000)) {
        SAMRecord read = iter.next();
        this.readLength = Math.max(this.readLength, read.getReadLength());
        this.maxMapq = Math.max(this.maxMapq, read.getMappingQuality());
       
        // Assumes aligner sets proper pair flag correctly
        if ((isPairedEnd) && (read.getReadPairedFlag()) && (read.getProperPairFlag())) {
          this.minInsertLength = Math.min(this.minInsertLength, Math.abs(read.getInferredInsertSize()));
          this.maxInsertLength = Math.max(this.maxInsertLength, Math.abs(read.getInferredInsertSize()));
        }
      }
     
      // Allow some fudge in insert length
      minInsertLength = Math.max(minInsertLength - 2*readLength, 0);
      maxInsertLength = maxInsertLength + 2*readLength;
     
      System.out.println("Min insert length: " + minInsertLength);
      System.out.println("Max insert length: " + maxInsertLength);
     
    } finally {
      reader.close();
    }
   
    for (int i=1; i<this.inputSams.length; i++) {
      SAMFileReader reader2 = new SAMFileReader(new File(inputSams[i]));
      reader2.setValidationStringency(ValidationStringency.SILENT);
      samHeaders[i] = reader2.getFileHeader();
      samHeaders[i].setSortOrder(SAMFileHeader.SortOrder.unsorted);
      reader2.close();     
    }
       
    log("Max read length is: " + readLength);
    if (assemblerSettings.getMinContigLength() < 1) {
      assemblerSettings.setMinContigLength(Math.max(readLength+1, MIN_CONTIG_LENGTH));
View Full Code Here

 
  //TODO: Dedup with getSamHeaderAndReadLength
  private void getRnaSamHeaderAndReadLength(String inputSam) {
   
    log("Identifying RNA header and determining read length");
    SAMFileReader reader = new SAMFileReader(new File(inputSam));
    try {
      reader.setValidationStringency(ValidationStringency.SILENT);
 
      rnaHeader = reader.getFileHeader();
      rnaHeader.setSortOrder(SAMFileHeader.SortOrder.unsorted);
     
      Iterator<SAMRecord> iter = reader.iterator();
     
      int cnt = 0;
      while ((iter.hasNext()) && (cnt < 1000000)) {
        SAMRecord read = iter.next();
        this.rnaReadLength = Math.max(this.rnaReadLength, read.getReadLength());
      }
    } finally {
      reader.close();
    }
   
    log("Max RNA read length is: " + rnaReadLength);
  }
View Full Code Here

   
    boolean hasCleanContigs = false;
   
    BufferedWriter writer = new BufferedWriter(new FileWriter(cleanContigsFasta, false));
   
    SAMFileReader contigReader = new SAMFileReader(new File(contigsSam));
    contigReader.setValidationStringency(ValidationStringency.SILENT);
   
    int contigCount = 0;
   
    for (SAMRecord contigRead : contigReader) {
      if (contigRead.getMappingQuality() >= this.minContigMapq) {
       
        SAMRecordUtils.removeSoftClips(contigRead);
       
        String bases = contigRead.getReadString();
       
        //TODO: Why would cigar length be zero here?
        if ((bases.length() >= assemblerSettings.getMinContigLength()) &&
          (contigRead.getCigarLength() > 0)) {
         
          CigarElement first = contigRead.getCigar().getCigarElement(0);
          CigarElement last = contigRead.getCigar().getCigarElement(contigRead.getCigarLength()-1);
         
          if ((first.getOperator() == CigarOperator.M) &&
            (last.getOperator() == CigarOperator.M)) {
           
            String prefix = "";
            String suffix = "";

            if (!contigRead.getReferenceName().startsWith("uc0")) {
           
              // Pull in read length bases from reference to the beginning and end of the contig.
              prefix = c2r.getSequence(contigRead.getReferenceName(),
                  contigRead.getAlignmentStart()-readLength, readLength);
              suffix = c2r.getSequence(contigRead.getReferenceName(), contigRead.getAlignmentEnd()+1, readLength);
             
              bases = prefix.toUpperCase() + bases + suffix.toUpperCase();
            }
           
            Cigar cigar = new Cigar();
            if (contigRead.getCigarLength() == 1) {
              CigarElement elem = new CigarElement(first.getLength() + prefix.length() + suffix.length(), first.getOperator());
              cigar.add(elem);
            } else {
              CigarElement firstNew = new CigarElement(first.getLength() + prefix.length(), first.getOperator());
              CigarElement lastNew = new CigarElement(last.getLength() + suffix.length(), last.getOperator());
             
              cigar.add(firstNew);
              for (int i=1; i<contigRead.getCigarLength()-1; i++) {
                cigar.add(contigRead.getCigar().getCigarElement(i));
              }
             
              cigar.add(lastNew);
            }
           
            contigRead.setCigar(cigar);
            contigRead.setAlignmentStart(contigRead.getAlignmentStart()-prefix.length());

          } else {
            System.out.println("Not padding contig: " + contigRead.getReadName());
          }
         
          // Aligned contigs are already expressed in forward strand context
  //        if (contigRead.getReadNegativeStrandFlag()) {
  //          bases = reverseComplementor.reverseComplement(bases);
  //        }
         
          //TODO: Safer delimiter?  This assumes no ~ in any read
          contigRead.setReadString("");
          String contigReadStr = contigRead.getSAMString();
          contigReadStr = contigReadStr.replace('\t','~');
         
          String contigName = contigRead.getReadName() + "_" + contigCount++ + "~" + contigReadStr;
         
          writer.append(">" + contigName);
          writer.append(bases);
          writer.append("\n");
          hasCleanContigs = true;
        }
      }
    }
    contigReader.close();
   
    writer.close();
   
    return hasCleanContigs;
  }
View Full Code Here

      this.origRead = null;
    }
  }
 
  public static void main(String[] args) {
    SAMFileReader reader = new SAMFileReader(new File("/home/lmose/dev/abra/1076/candidates.bam"));
    SAMFileHeader header = reader.getFileHeader();
    reader.close();
    header.setSortOrder(SortOrder.unsorted);
   
    SAMFileWriterFactory writerFactory = new SAMFileWriterFactory();
    writerFactory.setUseAsyncIo(false);
    SAMFileWriter writer = writerFactory.makeSAMOrBAMWriter(
View Full Code Here

      SAMFileWriter writer, boolean isPairedEnd,
      List<Feature> regions, int minMappingQuality) throws IOException {
   
    System.out.println("sam: " + inputSam);
   
        SAMFileReader reader = new SAMFileReader(new File(inputSam));
        reader.setValidationStringency(ValidationStringency.SILENT);
       
        output1 = new FastqOutputFile();
        output1.init(outputFastq);
        int lineCnt = 0;
       
        this.regionTracker = new RegionTracker(regions, reader.getFileHeader());
       
        for (SAMRecord read : reader) {
        if (SAMRecordUtils.isPrimary(read) && (!SAMRecordUtils.isFiltered(isPairedEnd, read))) {
         
          // These tags can be lengthy, so remove them.
          // TODO: Improve the way this is handled
          read.setAttribute("XA", null);
          read.setAttribute("OQ", null);
          read.setAttribute("MD", null);
          read.setAttribute("BQ", null);
         
          int yx = 0;
         
          boolean isAmbiguous = !read.getReadUnmappedFlag() && read.getMappingQuality() == 0;
         
          if ((!read.getReadFailsVendorQualityCheckFlag()) && (!isAmbiguous)) {
            // Calculate the number of mismatches to reference for this read.
            if (c2r != null) {
              try {
                yx = SAMRecordUtils.getEditDistance(read, c2r);
              } catch (ArrayIndexOutOfBoundsException e) {
                System.out.println("Index error for read: " + read.getSAMString());
                throw e;
              }
            } else {
              yx = read.getReadLength();
            }
           
            read.setAttribute("YX", yx);
          }

          boolean offTargetFiltered = false;
        if (yx > 0 && !read.getReadUnmappedFlag() && read.getMappingQuality() < MIN_OFF_TARGET_MAPQ && !regionTracker.isInRegion(read)) {
          read.setAttribute("YR", 2);
          offTargetFiltered = true;
//          System.out.println("Filtering off target: " + read.getSAMString());
        }
         
          if ((yx > 0 && !offTargetFiltered && read.getMappingQuality() >= minMappingQuality) || (read.getReadUnmappedFlag())) {
           
            if ((!read.getReadUnmappedFlag()) && (!regionTracker.isInRegion(read))) {
              read.setAttribute("YR", 1);
            }
           
            // Eligible for realignment, output FASTQ record
            output1.write(samReadToFastqRecord(read, c2r));
          } else if (writer != null) {
            // Either xactly matches reference or failed vendor QC or low mapq, so
            // output directly to final BAM
            writer.addAlignment(read);
          }
        }
       
            lineCnt++;
            if ((lineCnt % 1000000) == 0) {
                System.out.println("record: " + lineCnt);
            }
        }
               
        output1.close();
        reader.close();
  }
View Full Code Here

*/
  public static void main(String[] args) throws Exception {
   
//    String inputSam = "/home/lmose/dev/abra/region_tracker/normal.sort.bam";
    String inputSam = "/home/lmose/dev/abra/region_tracker/chr12.small.bam";
    SAMFileReader reader = new SAMFileReader(new File(inputSam));
    SAMFileHeader header = reader.getFileHeader();
    reader.close();
       
    CompareToReference2 c2r = null;
   
    SAMFileWriterFactory writerFactory = new SAMFileWriterFactory();
//    writerFactory.setUseAsyncIo(true);
View Full Code Here

       
        BufferedWriter writer = new BufferedWriter(new FileWriter(outputFile, false));
       
        File file = new File(inputFile);
       
        SAMFileReader inputSam = new SAMFileReader(file);
        inputSam.setValidationStringency(ValidationStringency.SILENT);
       
        int count = 0;

        for (SAMRecord read : inputSam) {
            updateJunctionCounts(read);
            if ((count++ % 1000000) == 0) {
                System.out.println("Processed " + count + " reads.");
            }
        }
        
        inputSam.close();
        outputCounts(writer);
        writer.close();
    }
View Full Code Here

//    reads1 = new BufferedWriter(new FileWriter(readsFile + "1.fa", false));
//    reads2 = new BufferedWriter(new FileWriter(readsFile + "2.fa", false));
   
    List<SAMRecord> currReads = new ArrayList<SAMRecord>();
   
    SAMFileReader reader = new SAMFileReader(new File(input));
    reader.setValidationStringency(ValidationStringency.SILENT);
   
    int prevMaxEnd = -1;
   
    SAMRecord lastRead = null;

    for (SAMRecord read : reader) {
      if (read.getMappingQuality() > 0) {
        if (lastRead == null || !lastRead.getReferenceName().equals(read.getReferenceName()) || (read.getAlignmentStart()-prevMaxEnd) < MAX_READ_GAP) {
          currReads.add(read);
        } else {
//          processReads(currReads);
          spawnProcessingThread(currReads);
          currReads = new ArrayList<SAMRecord>();
          currReads.add(read);
        }
       
        if (read.getAlignmentEnd() > prevMaxEnd || !lastRead.getReferenceName().equals(read.getReferenceName())) {
          prevMaxEnd = read.getAlignmentEnd();
        }
       
        lastRead = read;
      }
    }
   
    threadManager.waitForAllThreadsToComplete();
   
    reader.close();
    contigWriter.close();
    badRegionBed.close();
  }
View Full Code Here

   
    log("Adjusting reads.");
   
    RealignmentWriter writer = getRealignmentWriter(outputSam, isTightAlignment, tempDir);
   
    SAMFileReader contigReader = new SAMFileReader(new File(alignedToContigSam));
    contigReader.setValidationStringency(ValidationStringency.SILENT);
   
    SamStringReader samStringReader = new SamStringReader(samHeader);
   
    for (SAMRecord read : contigReader) {
     
      String origSamStr = read.getReadName();
      origSamStr = origSamStr.replace(Sam2Fastq.FIELD_DELIMITER, "\t");
      SAMRecord orig;
      try {
        orig = samStringReader.getRead(origSamStr);
      } catch (RuntimeException e) {
        System.out.println("Error processing: [" + origSamStr + "]");
        System.out.println("Contig read: [" + read.getSAMString() + "]");
        e.printStackTrace();
        throw e;
      }
      orig.setHeader(samHeader);
     
      orig.setReadString(read.getReadString());
      orig.setBaseQualityString(read.getBaseQualityString());

      SAMRecord readToOutput = null;
     
      // Only adjust reads that align to contig with no indel and shorter edit distance than the original alignment
      String matchingString = read.getReadLength() + "M";
      if ((read.getCigarString().equals(matchingString)) &&
        (read.getReadUnmappedFlag() == false&&
        (!orig.getCigarString().contains("N")) &&  // Don't remap introns
        (SAMRecordUtils.getEditDistance(read, null) < SAMRecordUtils.getOrigEditDistance(orig)) &&
        (!isFiltered(orig))) {
       
        SAMRecord origRead = orig;
        String contigReadStr = read.getReferenceName();
       
        int numBestHits = SAMRecordUtils.getIntAttribute(read, "X0");
        int numSubOptimalHits = SAMRecordUtils.getIntAttribute(read, "X1");
       
        int totalHits = numBestHits + numSubOptimalHits;
       
        List<HitInfo> bestHits = getBestHits(contigReadStr, samStringReader, read, matchingString);
       
        Map<String, SAMRecord> outputReadAlignmentInfo = convertBestHitsToAlignmentInfo(bestHits, origRead);
       
        readToOutput = getUpdatedReadInfo(outputReadAlignmentInfo, read,
            orig, origRead, c2r, totalHits, isTightAlignment);
       
      }
     
      adjustForStrand(read.getReadNegativeStrandFlag(), orig);
      writer.addAlignment(readToOutput, orig);
    }

    int realignedCount = writer.flush();
    contigReader.close();
   
    log("Done adjusting reads.  Number of reads realigned: " + realignedCount);
  }
View Full Code Here

TOP

Related Classes of net.sf.samtools.SAMFileReader

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.