protected int doWork() {
IOUtil.assertFileIsReadable(INPUT);
IOUtil.assertFileIsWritable(OUTPUT);
final boolean sanitizing = SANITIZE;
final SamReader in = SamReaderFactory.makeDefault().validationStringency(VALIDATION_STRINGENCY).open(INPUT);
final SAMFileHeader inHeader = in.getFileHeader();
// If we are going to override SAMPLE_ALIAS or LIBRARY_NAME, make sure all the read
// groups have the same values.
final List<SAMReadGroupRecord> rgs = inHeader.getReadGroups();
if (SAMPLE_ALIAS != null || LIBRARY_NAME != null) {
boolean allSampleAliasesIdentical = true;
boolean allLibraryNamesIdentical = true;
for (int i = 1; i < rgs.size(); i++) {
if (!rgs.get(0).getSample().equals(rgs.get(i).getSample())) {
allSampleAliasesIdentical = false;
}
if (!rgs.get(0).getLibrary().equals(rgs.get(i).getLibrary())) {
allLibraryNamesIdentical = false;
}
}
if (SAMPLE_ALIAS != null && !allSampleAliasesIdentical) {
throw new PicardException("Read groups have multiple values for sample. " +
"A value for SAMPLE_ALIAS cannot be supplied." );
}
if (LIBRARY_NAME != null && !allLibraryNamesIdentical) {
throw new PicardException("Read groups have multiple values for library name. " +
"A value for library name cannot be supplied." );
}
}
////////////////////////////////////////////////////////////////////////////
// Build the output writer with an appropriate header based on the options
////////////////////////////////////////////////////////////////////////////
final boolean presorted = (inHeader.getSortOrder() == SORT_ORDER) || (SORT_ORDER == SortOrder.queryname && SANITIZE);
final SAMFileHeader outHeader = new SAMFileHeader();
for (final SAMReadGroupRecord rg : inHeader.getReadGroups()) {
if (SAMPLE_ALIAS != null) {
rg.setSample(SAMPLE_ALIAS);
}
if (LIBRARY_NAME != null) {
rg.setLibrary(LIBRARY_NAME);
}
outHeader.addReadGroup(rg);
}
outHeader.setSortOrder(SORT_ORDER);
if (!REMOVE_ALIGNMENT_INFORMATION) {
outHeader.setSequenceDictionary(inHeader.getSequenceDictionary());
outHeader.setProgramRecords(inHeader.getProgramRecords());
}
final SAMFileWriter out = new SAMFileWriterFactory().makeSAMOrBAMWriter(outHeader, presorted, OUTPUT);
////////////////////////////////////////////////////////////////////////////
// Build a sorting collection to use if we are sanitizing
////////////////////////////////////////////////////////////////////////////
final SortingCollection<SAMRecord> sorter;
if (sanitizing) {
sorter = SortingCollection.newInstance(SAMRecord.class, new BAMRecordCodec(outHeader), new SAMRecordQueryNameComparator(), MAX_RECORDS_IN_RAM);
}
else {
sorter = null;
}
final ProgressLogger progress = new ProgressLogger(log, 1000000, "Reverted");
for (final SAMRecord rec : in) {
// Weed out non-primary and supplemental read as we don't want duplicates in the reverted file!
if (rec.isSecondaryOrSupplementary()) continue;
// Actually to the reverting of the remaining records
revertSamRecord(rec);
if (sanitizing) sorter.add(rec);
else out.addAlignment(rec);
progress.record(rec);
}
////////////////////////////////////////////////////////////////////////////
// Now if we're sanitizing, clean up the records and write them to the output
////////////////////////////////////////////////////////////////////////////
if (!sanitizing) {
out.close();
}
else {
long total = 0, discarded = 0;
final PeekableIterator<SAMRecord> iterator = new PeekableIterator<SAMRecord>(sorter.iterator());
final Map<SAMReadGroupRecord, FastqQualityFormat> readGroupToFormat = new HashMap<SAMReadGroupRecord, FastqQualityFormat>();
// Figure out the quality score encoding scheme for each read group.
for (final SAMReadGroupRecord rg : inHeader.getReadGroups()) {
final SamReader reader = SamReaderFactory.makeDefault().validationStringency(VALIDATION_STRINGENCY).open(INPUT);
final SamRecordFilter filter = new SamRecordFilter() {
public boolean filterOut(final SAMRecord rec) {
return !rec.getReadGroup().getId().equals(rg.getId());
}
public boolean filterOut(final SAMRecord first, final SAMRecord second) {
throw new UnsupportedOperationException();
}
};
readGroupToFormat.put(rg, QualityEncodingDetector.detect(QualityEncodingDetector.DEFAULT_MAX_RECORDS_TO_ITERATE, new FilteringIterator(reader.iterator(), filter), RESTORE_ORIGINAL_QUALITIES));
CloserUtil.close(reader);
}
for(final SAMReadGroupRecord r : readGroupToFormat.keySet()) {
log.info("Detected quality format for " + r.getReadGroupId() + ": " + readGroupToFormat.get(r));
}