/*
* The MIT License
*
* Copyright (c) 2012 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
package picard.illumina.parser;
import htsjdk.samtools.util.StringUtil;
import picard.PicardException;
/**
* In multiple locations we need to know what cycles are output, as of now we output all non-skip cycles, but rather than sprinkle
* this knowledge throughout the parser code, instead OutputMapping provides all the data a client might want about the
* cycles to be output including what ReadType they are.
*
* @author jburke@broadinstitute.org
*/
public class OutputMapping {
/**
* This class represents the mapping from Raw Cycles to TwoDIndices into output data structures and ClusterData. This class
* also contains ReadStructure.Substructure that describes which reads/cycles should be output.
*
* For each cycle # (1-Based) there is a corresponding element in the cycleToOutputIndex array where
* cycleToOutputIndex[cycle#].arrayIndex indicates the readNumber that cycle will be found on and cycleToOutputIndex[cycle#].elementIndex
* indicates the array inde on that readNumber that the cycle occupies. There are also various intermediate byte[][]
* structures (in BclData, QseqReadData, etc...) where the top level array corresponds with the readNumber and the second-level
* array corresponds with cycleNumber, cycleToOutputIndex is used to index into these arrays.
*/
private final TwoDIndex[] cycleToOutputIndex;
/** The collection of ReadDescriptors and information about those read descriptors that describe all the
* cycles that IlluminaDataProvider should output in a ClusterData object */
private final ReadStructure.Substructure outputSubstructure;
/** The original read structure without any skips */
private final ReadStructure outputReadStructure;
/**
* Create an OutputMapping from a readStructure, currently the outputSubstructure just references the readStructure.nonSkips
* Substructure
* @param readStructure The readStructure for the given run that we want an OutputMapping for
*/
public OutputMapping(final ReadStructure readStructure) {
this.outputSubstructure = readStructure.nonSkips;
this.cycleToOutputIndex = makeCycleToOutputIndexArray(readStructure);
this.outputReadStructure = outputSubstructure.toReadStructure();
}
/** @return The number of reads that should be found in the output clusterData */
public int numOutputReads() {
return outputSubstructure.length();
}
/** @return An array of cycles in ascending order of all the cycles that should be output. */
public int [] getOutputCycles() {
return outputSubstructure.getCycles();
}
/** @return An ordered array of lengths, where each element represents the size of output reads respectively */
public int [] getOutputReadLengths() {
return outputSubstructure.getDescriptorLengths();
}
/** @return The total number of cycles that will be output */
public int getTotalOutputCycles() {
return outputSubstructure.getTotalCycles();
}
/** @return An ordered array of Ranges over cycle indices(cycle#-1), where each range represents a contiguous block of cycles
* to output, and each cycle in getOutputCycles() is in ONE AND ONLY ONE Range, all ranges are inclusive of both ends
*/
public Range [] getCycleIndexRanges() {
return outputSubstructure.getCycleIndexRanges();
}
/** @return An iterator over the read descriptors that describe the reads to be output*/
public Iterable<ReadDescriptor> getOutputDescriptors() {
return outputSubstructure;
}
public ReadStructure getOutputReadStructure() {
return outputReadStructure;
}
/**
* Return an index that where:
* index.arrayIndex - represents either the read number the cycle will be output too, or (in some cases)
* an array index into a two dimensional array of byte[][] where the top level array corresponds to read number
*
* index.elementIndex - represents the element a cycle will appear in inside it's give read, or the element
* in an array as described above
* @param cycle The cycle for which we want an index
* @return A TwoDArrayIndex indicating where this cycle can be found
*/
public TwoDIndex getOutputIndexForCycle(final int cycle) {
return cycleToOutputIndex[cycle];
}
/**
* An index into two dimensional arrays or other two dimensional structures where arrayIndex equals the index
* into the top level structure and elementIndex is equal to the index into the bottom level structure
*/
static class TwoDIndex {
//index into the "outer structure" i.e. if we have an array T[][], we would index T[majorIndex][minorIndex]
public final int majorIndex;
//index into the "inner structure", see majorIndex
public final int minorIndex;
public TwoDIndex(final int majorIndex, final int minorIndex) {
this.majorIndex = majorIndex;
this.minorIndex = minorIndex;
}
@Override
public String toString() {
return "TwoDIndex(majorIndex == " + majorIndex + ", minorIndex == " + minorIndex + ")";
}
@Override
public boolean equals(final Object thatObj) {
if(thatObj == null || !(thatObj instanceof TwoDIndex)) {
return false;
}
final TwoDIndex that = (TwoDIndex) thatObj;
return this.majorIndex == that.majorIndex && this.minorIndex == that.minorIndex;
}
}
/**
* Create an array where each index corresponds to a cycle # (with Cycle 0 = null) and each element in
* an index into a ClusterData's reads/bases and reads/qualities for the run described by readStructure
* @param readStructure The readStructure describing the run concerned
* @return An array of TwoDArrayIndexes
*/
private TwoDIndex[] makeCycleToOutputIndexArray(final ReadStructure readStructure) {
int totalCycles = readStructure.totalCycles;
final TwoDIndex[] cycleToOutputIndex = new TwoDIndex[totalCycles + 1];
final int [] outputCycles = getOutputCycles();
final int [] outputLengths = getOutputReadLengths();
int outputCycleIndex = 0;
int arrIndex = 0;
int elementIndex = 0;
for(int i = 1; i <= totalCycles && outputCycleIndex < outputCycles.length; i++) {
if(outputCycles[outputCycleIndex] == i) {
if(elementIndex >= outputLengths[arrIndex]) {
elementIndex = 0;
++arrIndex;
}
cycleToOutputIndex[i] = new TwoDIndex(arrIndex, elementIndex);
++elementIndex;
++outputCycleIndex;
}
}
if(outputCycleIndex != outputCycles.length) {
throw new PicardException("Error in read structure outputCycles (" + StringUtil.intValuesToString(outputCycles) + ") and total cycles (" + totalCycles + ") OutputCycleIndex(" + outputCycleIndex + ")");
}
return cycleToOutputIndex;
}
}