/*
* The MIT License
*
* Copyright (c) 2011 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
package picard.illumina;
import htsjdk.samtools.ReservedTagConstants;
import htsjdk.samtools.SAMRecord;
import htsjdk.samtools.SAMTag;
import htsjdk.samtools.filter.SamRecordFilter;
import htsjdk.samtools.filter.SolexaNoiseFilter;
import picard.fastq.IlluminaReadNameEncoder;
import picard.fastq.ReadNameEncoder;
import picard.illumina.parser.ClusterData;
import picard.illumina.parser.ReadData;
import picard.illumina.parser.ReadStructure;
import picard.util.AdapterMarker;
import picard.util.AdapterPair;
import picard.util.IlluminaUtil;
import java.util.List;
/**
* Takes ClusterData provided by an IlluminaDataProvider into one or two SAMRecords,
* as appropriate, and optionally marking adapter sequence. There is one converter per
* IlluminaBasecallsToSam run, and all the TileProcessors use the same converter.
*
* @author jburke@broadinstitute.org
*/
public class ClusterDataToSamConverter implements
IlluminaBasecallsConverter.ClusterDataConverter<IlluminaBasecallsToSam.SAMRecordsForCluster> {
private final String runBarcode;
private final String readGroupId;
private final SamRecordFilter filters = new SolexaNoiseFilter();
private final boolean isPairedEnd;
private final boolean isBarcoded;
private final int [] templateIndices;
private final int [] barcodeIndices;
private final AdapterMarker adapterMarker;
private final int outputRecordsPerCluster;
private final ReadNameEncoder readNameEncoder;
/**
* Constructor
*
* @param runBarcode Used to construct read names.
* @param readGroupId If non-null, set RG attribute on SAMRecord to this.
* @param readStructure The expected structure (number of reads and indexes,
* and their length) in the read.
* @param adapters The list of adapters to check for in the read
*/
public ClusterDataToSamConverter(final String runBarcode,
final String readGroupId,
final ReadStructure readStructure,
final List<IlluminaUtil.IlluminaAdapterPair> adapters) {
this.runBarcode = runBarcode;
this.readGroupId = readGroupId;
this.readNameEncoder = new IlluminaReadNameEncoder(runBarcode);
this.isPairedEnd = readStructure.templates.length() == 2;
this.isBarcoded = !readStructure.barcodes.isEmpty();
if (adapters.isEmpty()) {
this.adapterMarker = null;
} else {
this.adapterMarker = new AdapterMarker(adapters.toArray(new AdapterPair[adapters.size()]));
}
this.templateIndices = readStructure.templates.getIndices();
this.barcodeIndices = readStructure.barcodes.getIndices();
this.outputRecordsPerCluster = readStructure.templates.length();
}
/**
* Creates a new SAM record from the basecall data
*/
private SAMRecord createSamRecord(final ReadData readData, final String readName, final boolean isPf, final boolean firstOfPair, final String unmatchedBarcode) {
final SAMRecord sam = new SAMRecord(null);
sam.setReadName(readName);
sam.setReadBases(readData.getBases());
sam.setBaseQualities(readData.getQualities());
// Flag values
sam.setReadPairedFlag(isPairedEnd);
sam.setReadUnmappedFlag(true);
sam.setReadFailsVendorQualityCheckFlag(!isPf);
if (isPairedEnd) {
sam.setMateUnmappedFlag(true);
sam.setFirstOfPairFlag(firstOfPair);
sam.setSecondOfPairFlag(!firstOfPair);
}
if (filters.filterOut(sam)) {
sam.setAttribute(ReservedTagConstants.XN, 1);
}
if (this.readGroupId != null) {
sam.setAttribute(SAMTag.RG.name(), readGroupId);
}
// If it's a barcoded run and the read isn't assigned to a barcode, then add the barcode
// that was read as an optional tag
if (unmatchedBarcode != null) {
sam.setAttribute(SAMTag.BC.name(), unmatchedBarcode);
}
return sam;
}
/**
* Creates the SAMRecord for each read in the cluster
*/
public IlluminaBasecallsToSam.SAMRecordsForCluster convertClusterToOutputRecord(final ClusterData cluster) {
final IlluminaBasecallsToSam.SAMRecordsForCluster ret = new IlluminaBasecallsToSam.SAMRecordsForCluster(outputRecordsPerCluster);
final String readName = readNameEncoder.generateReadName(cluster, null); // Use null here to prevent /1 or /2 suffixes on read name.
// Get and transform the unmatched barcode, if any, to store with the reads
String unmatchedBarcode = null;
if (isBarcoded && cluster.getMatchedBarcode() == null) {
final byte barcode[][] = new byte[barcodeIndices.length][];
for (int i = 0; i < barcodeIndices.length; i++) {
barcode[i] = cluster.getRead(barcodeIndices[i]).getBases();
}
unmatchedBarcode = IlluminaUtil.barcodeSeqsToString(barcode).replace('.', 'N'); //TODO: This has a separator, where as in other places we do not use a separator
}
final SAMRecord firstOfPair = createSamRecord(
cluster.getRead(templateIndices[0]), readName, cluster.isPf(), true,unmatchedBarcode);
ret.records[0] = firstOfPair;
SAMRecord secondOfPair = null;
if(isPairedEnd) {
secondOfPair = createSamRecord(
cluster.getRead(templateIndices[1]), readName, cluster.isPf(), false, unmatchedBarcode);
ret.records[1] = secondOfPair;
}
if (adapterMarker != null) {
// Clip the read
if (isPairedEnd) {
adapterMarker.adapterTrimIlluminaPairedReads(firstOfPair, secondOfPair);
}
else {
adapterMarker.adapterTrimIlluminaSingleRead(firstOfPair);
}
}
return ret;
}
}