IOUtil.assertFileIsReadable(INPUT);
IOUtil.assertFileIsWritable(METRICS);
final SAMFileReader in = new SAMFileReader(INPUT);
final SAMFileHeader.SortOrder order = in.getFileHeader().getSortOrder();
SAMFileWriter out = null;
if (OUTPUT != null) {
IOUtil.assertFileIsWritable(OUTPUT);
out = new SAMFileWriterFactory().makeSAMOrBAMWriter(in.getFileHeader(), true, OUTPUT);
}
final Histogram<Integer> histo = new Histogram<Integer>("clipped_bases", "read_count");
// Combine any adapters and custom adapter pairs from the command line into an array for use in clipping
final AdapterPair[] adapters;
{
final List<AdapterPair> tmp = new ArrayList<AdapterPair>();
tmp.addAll(ADAPTERS);
if (FIVE_PRIME_ADAPTER != null && THREE_PRIME_ADAPTER != null) {
tmp.add(new CustomAdapterPair(FIVE_PRIME_ADAPTER, THREE_PRIME_ADAPTER));
}
adapters = tmp.toArray(new AdapterPair[tmp.size()]);
}
////////////////////////////////////////////////////////////////////////
// Main loop that consumes reads, clips them and writes them to the output
////////////////////////////////////////////////////////////////////////
final ProgressLogger progress = new ProgressLogger(log, 1000000, "Read");
final SAMRecordIterator iterator = in.iterator();
final AdapterMarker adapterMarker = new AdapterMarker(ADAPTER_TRUNCATION_LENGTH, adapters).
setMaxPairErrorRate(MAX_ERROR_RATE_PE).setMinPairMatchBases(MIN_MATCH_BASES_PE).
setMaxSingleEndErrorRate(MAX_ERROR_RATE_SE).setMinSingleEndMatchBases(MIN_MATCH_BASES_SE).
setNumAdaptersToKeep(NUM_ADAPTERS_TO_KEEP).
setThresholdForSelectingAdaptersToKeep(PRUNE_ADAPTER_LIST_AFTER_THIS_MANY_ADAPTERS_SEEN);
while (iterator.hasNext()) {
final SAMRecord rec = iterator.next();
final SAMRecord rec2 = rec.getReadPairedFlag() && iterator.hasNext() ? iterator.next() : null;
rec.setAttribute(ReservedTagConstants.XT, null);
// Do the clipping one way for PE and another for SE reads
if (rec.getReadPairedFlag()) {
// Assert that the input file is in query name order only if we see some PE reads
if (order != SAMFileHeader.SortOrder.queryname) {
throw new PicardException("Input BAM file must be sorted by queryname");
}
if (rec2 == null) throw new PicardException("Missing mate pair for paired read: " + rec.getReadName());
rec2.setAttribute(ReservedTagConstants.XT, null);
// Assert that we did in fact just get two mate pairs
if (!rec.getReadName().equals(rec2.getReadName())){
throw new PicardException("Adjacent reads expected to be mate-pairs have different names: " +
rec.getReadName() + ", " + rec2.getReadName());
}
// establish which of pair is first and which second
final SAMRecord first, second;
if (rec.getFirstOfPairFlag() && rec2.getSecondOfPairFlag()){
first = rec;
second = rec2;
}
else if (rec.getSecondOfPairFlag() && rec2.getFirstOfPairFlag()) {
first = rec2;
second = rec;
}
else {
throw new PicardException("Two reads with same name but not correctly marked as 1st/2nd of pair: " + rec.getReadName());
}
adapterMarker.adapterTrimIlluminaPairedReads(first, second);
}
else {
adapterMarker.adapterTrimIlluminaSingleRead(rec);
}
// Then output the records, update progress and metrics
for (final SAMRecord r : new SAMRecord[] {rec, rec2}) {
if (r != null) {
progress.record(r);
if (out != null) out.addAlignment(r);
final Integer clip = rec.getIntegerAttribute(ReservedTagConstants.XT);
if (clip != null) histo.increment(rec.getReadLength() - clip + 1);
}
}
}
if (out != null) out.close();
// Lastly output the metrics to file
final MetricsFile<?,Integer> metricsFile = getMetricsFile();
metricsFile.setHistogram(histo);
metricsFile.write(METRICS);