package picard.analysis;
import htsjdk.samtools.SAMFileHeader;
import htsjdk.samtools.SAMFileHeader.SortOrder;
import htsjdk.samtools.SAMFileReader;
import htsjdk.samtools.SAMRecord;
import htsjdk.samtools.reference.ReferenceSequence;
import htsjdk.samtools.reference.ReferenceSequenceFileWalker;
import htsjdk.samtools.util.IOUtil;
import htsjdk.samtools.util.Log;
import htsjdk.samtools.util.ProgressLogger;
import htsjdk.samtools.util.SequenceUtil;
import picard.PicardException;
import picard.cmdline.CommandLineProgram;
import picard.cmdline.Option;
import picard.cmdline.StandardOptionDefinitions;
import java.io.File;
import java.util.Arrays;
import java.util.Collection;
/**
* Super class that is designed to provide some consistent structure between subclasses that
* simply iterate once over a coordinate sorted BAM and collect information from the records
* as the go in order to produce some kind of output.
*
* @author Tim Fennell
*/
public abstract class SinglePassSamProgram extends CommandLineProgram {
@Option(shortName= StandardOptionDefinitions.INPUT_SHORT_NAME, doc="Input SAM or BAM file.")
public File INPUT;
@Option(shortName="O", doc="File to write the output to.")
public File OUTPUT;
@Option(shortName=StandardOptionDefinitions.REFERENCE_SHORT_NAME, doc="Reference sequence fasta", optional=true)
public File REFERENCE_SEQUENCE;
@Option(doc="If true (default), then the sort order in the header file will be ignored.",
shortName = StandardOptionDefinitions.ASSUME_SORTED_SHORT_NAME)
public boolean ASSUME_SORTED = true;
@Option(doc="Stop after processing N reads, mainly for debugging.")
public long STOP_AFTER = 0;
private static final Log log = Log.getInstance(SinglePassSamProgram.class);
/**
* Final implementation of doWork() that checks and loads the input and optionally reference
* sequence files and the runs the sublcass through the setup() acceptRead() and finish() steps.
*/
@Override protected final int doWork() {
makeItSo(INPUT, REFERENCE_SEQUENCE, ASSUME_SORTED, STOP_AFTER, Arrays.asList(this));
return 0;
}
public static void makeItSo(final File input,
final File referenceSequence,
final boolean assumeSorted,
final long stopAfter,
final Collection<SinglePassSamProgram> programs) {
// Setup the standard inputs
IOUtil.assertFileIsReadable(input);
final SAMFileReader in = new SAMFileReader(input);
// Optionally load up the reference sequence and double check sequence dictionaries
final ReferenceSequenceFileWalker walker;
if (referenceSequence == null) {
walker = null;
}
else {
IOUtil.assertFileIsReadable(referenceSequence);
walker = new ReferenceSequenceFileWalker(referenceSequence);
if (!in.getFileHeader().getSequenceDictionary().isEmpty()) {
SequenceUtil.assertSequenceDictionariesEqual(in.getFileHeader().getSequenceDictionary(),
walker.getSequenceDictionary());
}
}
// Check on the sort order of the BAM file
{
final SortOrder sort = in.getFileHeader().getSortOrder();
if (sort != SortOrder.coordinate) {
if (assumeSorted) {
log.warn("File reports sort order '" + sort + "', assuming it's coordinate sorted anyway.");
}
else {
throw new PicardException("File " + input.getAbsolutePath() + " should be coordinate sorted but " +
"the header says the sort order is " + sort + ". If you believe the file " +
"to be coordinate sorted you may pass ASSUME_SORTED=true");
}
}
}
// Call the abstract setup method!
boolean anyUseNoRefReads = false;
for (final SinglePassSamProgram program : programs) {
program.setup(in.getFileHeader(), input);
anyUseNoRefReads = anyUseNoRefReads || program.usesNoRefReads();
}
final ProgressLogger progress = new ProgressLogger(log);
for (final SAMRecord rec : in) {
final ReferenceSequence ref;
if (walker == null || rec.getReferenceIndex() == SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX) {
ref = null;
}
else {
ref = walker.get(rec.getReferenceIndex());
}
for (final SinglePassSamProgram program : programs) {
program.acceptRead(rec, ref);
}
progress.record(rec);
// See if we need to terminate early?
if (stopAfter > 0 && progress.getCount() >= stopAfter) {
break;
}
// And see if we're into the unmapped reads at the end
if (!anyUseNoRefReads && rec.getReferenceIndex() == SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX) {
break;
}
}
in.close();
for (final SinglePassSamProgram program : programs) {
program.finish();
}
}
/** Can be overriden and set to false if the section of unmapped reads at the end of the file isn't needed. */
protected boolean usesNoRefReads() { return true; }
/** Should be implemented by subclasses to do one-time initialization work. */
protected abstract void setup(final SAMFileHeader header, final File samFile);
/**
* Should be implemented by subclasses to accept SAMRecords one at a time.
* If the read has a reference sequence and a reference sequence file was supplied to the program
* it will be passed as 'ref'. Otherwise 'ref' may be null.
*/
protected abstract void acceptRead(final SAMRecord rec, final ReferenceSequence ref);
/** Should be implemented by subclasses to do one-time finalization work. */
protected abstract void finish();
}