/*
* The MIT License
*
* Copyright (c) 2009 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
package picard.sam;
import htsjdk.samtools.SAMFileReader;
import htsjdk.samtools.SAMValidationError;
import htsjdk.samtools.SamFileValidator;
import htsjdk.samtools.ValidationStringency;
import htsjdk.samtools.reference.ReferenceSequenceFile;
import htsjdk.samtools.reference.ReferenceSequenceFileFactory;
import htsjdk.samtools.util.IOUtil;
import picard.PicardException;
import picard.cmdline.CommandLineProgram;
import picard.cmdline.CommandLineProgramProperties;
import picard.cmdline.Option;
import picard.cmdline.StandardOptionDefinitions;
import picard.cmdline.programgroups.SamOrBam;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.List;
/**
* Command line program wrapping SamFileValidator.
*
* @author Doug Voet
*/
@CommandLineProgramProperties(
usage = "Read a SAM or BAM file and report on its validity.",
usageShort = "Validates a SAM or BAM file",
programGroup = SamOrBam.class
)
public class ValidateSamFile extends CommandLineProgram {
public enum Mode { VERBOSE, SUMMARY }
@Option(shortName=StandardOptionDefinitions.INPUT_SHORT_NAME,
doc="Input SAM/BAM file")
public File INPUT;
@Option(shortName=StandardOptionDefinitions.OUTPUT_SHORT_NAME,
doc="Output file or standard out if missing",
optional=true)
public File OUTPUT;
@Option(shortName="M",
doc="Mode of output")
public Mode MODE = Mode.VERBOSE;
@Option(doc="List of validation error types to ignore.")
public List<SAMValidationError.Type> IGNORE = new ArrayList<SAMValidationError.Type>();
@Option(shortName="MO",
doc="The maximum number of lines output in verbose mode")
public Integer MAX_OUTPUT = 100;
@Option(shortName=StandardOptionDefinitions.REFERENCE_SHORT_NAME,
doc="Reference sequence file, the NM tag check will be skipped if this is missing",
optional=true)
public File REFERENCE_SEQUENCE;
@Option(doc="If true, only report errors and ignore warnings.")
public boolean IGNORE_WARNINGS = false;
@Option(doc="If true and input is a BAM file with an index file, also validates the index.")
public boolean VALIDATE_INDEX = true;
@Option (shortName="BISULFITE",
doc="Whether the SAM or BAM file consists of bisulfite sequenced reads. " +
"If so, C->T is not counted as an error in computing the value of the NM tag.")
public boolean IS_BISULFITE_SEQUENCED = false;
@Option(doc="Relevant for a coordinate-sorted file containing read pairs only. " +
"Maximum number of file handles to keep open when spilling mate info to disk. " +
"Set this number a little lower than the per-process maximum number of file that may be open. " +
"This number can be found by executing the 'ulimit -n' command on a Unix system.")
public int MAX_OPEN_TEMP_FILES = 8000;
public static void main(final String[] args) {
System.exit(new ValidateSamFile().instanceMain(args));
}
@Override
protected int doWork() {
IOUtil.assertFileIsReadable(INPUT);
ReferenceSequenceFile reference = null;
if (REFERENCE_SEQUENCE != null) {
IOUtil.assertFileIsReadable(REFERENCE_SEQUENCE);
reference = ReferenceSequenceFileFactory.getReferenceSequenceFile(REFERENCE_SEQUENCE);
}
final PrintWriter out;
if (OUTPUT != null) {
IOUtil.assertFileIsWritable(OUTPUT);
try {
out = new PrintWriter(OUTPUT);
}
catch (FileNotFoundException e) {
// we already asserted this so we should not get here
throw new PicardException("Unexpected exception", e);
}
}
else {
out = new PrintWriter(System.out);
}
final ValidationStringency originalStringency = SAMFileReader.getDefaultValidationStringency();
SAMFileReader.setDefaultValidationStringency(ValidationStringency.SILENT);
boolean result;
try {
final SAMFileReader samReader = new SAMFileReader(INPUT);
if (!samReader.isBinary()) VALIDATE_INDEX=false;
if (VALIDATE_INDEX){
samReader.enableIndexCaching(true);
}
samReader.enableCrcChecking(true);
final SamFileValidator validator = new SamFileValidator(out, MAX_OPEN_TEMP_FILES);
validator.setErrorsToIgnore(IGNORE);
if (IGNORE_WARNINGS) {
validator.setIgnoreWarnings(IGNORE_WARNINGS);
}
if (MODE == Mode.SUMMARY) {
validator.setVerbose(false, 0);
} else {
validator.setVerbose(true, MAX_OUTPUT);
}
if (IS_BISULFITE_SEQUENCED) {
validator.setBisulfiteSequenced(IS_BISULFITE_SEQUENCED);
}
if (VALIDATE_INDEX){
validator.setValidateIndex(VALIDATE_INDEX);
}
if (IOUtil.isRegularPath(INPUT)) {
// Do not check termination if reading from a stream
validator.validateBamFileTermination(INPUT);
}
result = false;
switch (MODE) {
case SUMMARY:
result = validator.validateSamFileSummary(samReader, reference);
break;
case VERBOSE:
result = validator.validateSamFileVerbose(samReader, reference);
break;
}
out.flush();
} finally {
SAMFileReader.setDefaultValidationStringency(originalStringency);
}
return result ? 0 : 1;
}
}