Package picard.illumina.parser

Source Code of picard.illumina.parser.OutputMapping$TwoDIndex

/*
* The MIT License
*
* Copyright (c) 2012 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
package picard.illumina.parser;

import htsjdk.samtools.util.StringUtil;
import picard.PicardException;

/**
* In multiple locations we need to know what cycles are output, as of now we output all non-skip cycles, but rather than sprinkle
* this knowledge throughout the parser code, instead OutputMapping provides all the data a client might want about the
* cycles to be output including what ReadType they are.
*
* @author jburke@broadinstitute.org
*/
public class OutputMapping {
    /**
     * This class represents the mapping from Raw Cycles to TwoDIndices into output data structures and ClusterData.  This class
     * also contains ReadStructure.Substructure that describes which reads/cycles should be output.
     *
     * For each cycle # (1-Based) there is a corresponding element in the cycleToOutputIndex array where
     * cycleToOutputIndex[cycle#].arrayIndex indicates the readNumber that cycle will be found on and cycleToOutputIndex[cycle#].elementIndex
     * indicates the array inde on that readNumber that the cycle occupies.  There are also various intermediate byte[][]
     * structures (in BclData, QseqReadData, etc...) where the top level array corresponds with the readNumber and the second-level
     * array corresponds with cycleNumber, cycleToOutputIndex is used to index into these arrays.
     */
    private final TwoDIndex[] cycleToOutputIndex;

    /** The collection of ReadDescriptors and information about those read descriptors that describe all the
     * cycles that IlluminaDataProvider should output in a ClusterData object */
    private final ReadStructure.Substructure outputSubstructure;

    /** The original read structure without any skips */
    private final ReadStructure outputReadStructure;

    /**
     * Create an OutputMapping from a readStructure, currently the outputSubstructure just references the readStructure.nonSkips
     * Substructure
     * @param readStructure The readStructure for the given run that we want an OutputMapping for
     */
    public OutputMapping(final ReadStructure readStructure) {
        this.outputSubstructure = readStructure.nonSkips;
        this.cycleToOutputIndex = makeCycleToOutputIndexArray(readStructure);
        this.outputReadStructure = outputSubstructure.toReadStructure();
    }

    /** @return The number of reads that should be found in the output clusterData */
    public int numOutputReads() {
        return outputSubstructure.length();
    }

    /** @return An array of cycles in ascending order of all the cycles that should be output. */
    public int [] getOutputCycles() {
        return outputSubstructure.getCycles();
    }

    /** @return An ordered array of lengths, where each element represents the size of output reads respectively */
    public int [] getOutputReadLengths() {
        return outputSubstructure.getDescriptorLengths();
    }

    /** @return The total number of cycles that will be output */
    public int getTotalOutputCycles() {
        return outputSubstructure.getTotalCycles();
    }

    /** @return An ordered array of Ranges over cycle indices(cycle#-1), where each range represents a contiguous block of cycles
     * to output, and each cycle in getOutputCycles() is in ONE AND ONLY ONE Range, all ranges are inclusive of both ends
     */
    public Range [] getCycleIndexRanges() {
        return outputSubstructure.getCycleIndexRanges();
    }

    /** @return An iterator over the read descriptors that describe the reads to be output*/
    public Iterable<ReadDescriptor> getOutputDescriptors() {
        return outputSubstructure;
    }

    public ReadStructure getOutputReadStructure() {
        return outputReadStructure;
    }

    /**
     * Return an index that where:
     *     index.arrayIndex - represents either the read number the cycle will be output too, or (in some cases)
     *     an array index into a two dimensional array of byte[][] where the top level array corresponds to read number
     *
     *     index.elementIndex - represents the element a cycle will appear in inside it's give read, or the element
     *     in an array as described above
     * @param cycle The cycle for which we want an index
     * @return A TwoDArrayIndex indicating where this cycle can be found
     */
    public TwoDIndex getOutputIndexForCycle(final int cycle) {
        return cycleToOutputIndex[cycle];
    }

    /**
     * An index into two dimensional arrays or other two dimensional structures where arrayIndex equals the index
     * into the top level structure and elementIndex is equal to the index into the bottom level structure
     */
    static class TwoDIndex {
        //index into the "outer structure" i.e. if we have an array T[][], we would index T[majorIndex][minorIndex]
        public final int majorIndex;

        //index into the "inner structure", see majorIndex
        public final int minorIndex;

        public TwoDIndex(final int majorIndex, final int minorIndex) {
            this.majorIndex = majorIndex;
            this.minorIndex = minorIndex;
        }

        @Override
        public String toString() {
            return "TwoDIndex(majorIndex == " + majorIndex + ", minorIndex == " + minorIndex + ")";
        }

        @Override
        public boolean equals(final Object thatObj) {
            if(thatObj == null || !(thatObj instanceof TwoDIndex)) {
                return false;
            }

            final TwoDIndex that = (TwoDIndex) thatObj;
            return this.majorIndex == that.majorIndex && this.minorIndex == that.minorIndex;
        }
    }

    /**
     * Create an array where each index corresponds to a cycle # (with Cycle 0 = null) and each element in
     * an index into a ClusterData's reads/bases and reads/qualities for the run described by readStructure
     * @param readStructure The readStructure describing the run concerned
     * @return An array of TwoDArrayIndexes
     */
    private TwoDIndex[] makeCycleToOutputIndexArray(final ReadStructure readStructure) {
        int totalCycles = readStructure.totalCycles;
        final TwoDIndex[] cycleToOutputIndex = new TwoDIndex[totalCycles + 1];

        final int [] outputCycles = getOutputCycles();
        final int [] outputLengths = getOutputReadLengths();
        int outputCycleIndex = 0;
        int arrIndex = 0;
        int elementIndex = 0;
        for(int i = 1; i <= totalCycles && outputCycleIndex < outputCycles.length; i++) {
            if(outputCycles[outputCycleIndex] == i) {
                if(elementIndex >= outputLengths[arrIndex]) {
                    elementIndex = 0;
                    ++arrIndex;
                }

                cycleToOutputIndex[i] = new TwoDIndex(arrIndex, elementIndex);
                ++elementIndex;
                ++outputCycleIndex;
            }
        }

        if(outputCycleIndex != outputCycles.length) {
            throw new PicardException("Error in read structure outputCycles (" + StringUtil.intValuesToString(outputCycles) + ") and total cycles (" + totalCycles + ") OutputCycleIndex(" + outputCycleIndex + ")");
        }

        return cycleToOutputIndex;
    }
}
TOP

Related Classes of picard.illumina.parser.OutputMapping$TwoDIndex

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.