Package org.broad.igv.feature.tribble

Source Code of org.broad.igv.feature.tribble.PSLCodec

/*
* Copyright (c) 2007-2011 by The Broad Institute of MIT and Harvard.  All Rights Reserved.
*
* This software is licensed under the terms of the GNU Lesser General Public License (LGPL),
* Version 2.1 which is available at http://www.opensource.org/licenses/lgpl-2.1.php.
*
* THE SOFTWARE IS PROVIDED "AS IS." THE BROAD AND MIT MAKE NO REPRESENTATIONS OR
* WARRANTES OF ANY KIND CONCERNING THE SOFTWARE, EXPRESS OR IMPLIED, INCLUDING,
* WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
* PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER
* OR NOT DISCOVERABLE.  IN NO EVENT SHALL THE BROAD OR MIT, OR THEIR RESPECTIVE
* TRUSTEES, DIRECTORS, OFFICERS, EMPLOYEES, AND AFFILIATES BE LIABLE FOR ANY DAMAGES
* OF ANY KIND, INCLUDING, WITHOUT LIMITATION, INCIDENTAL OR CONSEQUENTIAL DAMAGES,
* ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER
* THE BROAD OR MIT SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT
* SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING.
*/

package org.broad.igv.feature.tribble;

import org.broad.igv.Globals;
import org.broad.igv.feature.PSLRecord;
import org.broad.igv.feature.BasicFeature;
import org.broad.igv.feature.Exon;
import org.broad.igv.feature.Strand;
import org.broad.igv.feature.genome.Genome;

/**
* @author jrobinso
* @date Aug 5, 2010
* <p/>
* Columns:
* <p/>
* 1. matches - Number of bases that match that aren't repeats
* 2. misMatches - Number of bases that don't match
* 3. repMatches - Number of bases that match but are part of repeats
* 4. nCount - Number of 'N' bases
* 5. qNumInsert - Number of inserts in query
* 6. qBaseInsert - Number of bases inserted in query
* 7. tNumInsert - Number of inserts in target
* 8. tBaseInsert - Number of bases inserted in target
* 9. strand - '+' or '-' for query strand. For translated alignments, second '+'or '-' is for genomic strand
* 10. qName - Query sequence name
* 11. qSize - Query sequence size
* 12. qStart - Alignment start position in query
* 13. qEnd - Alignment end position in query
* 14. tName - Target sequence name
* 15. tSize - Target sequence size
* 16. tStart - Alignment start position in target
* 17. tEnd - Alignment end position in target
* 18. blockCount - Number of blocks in the alignment (a block contains no gaps)
* 19. blockSizes - Comma-separated list of sizes of each block
* 20. qStarts - Comma-separated list of starting positions of each block in query
* 21. tStarts - Comma-separated list of starting positions of each block in target
*/
public class PSLCodec extends UCSCCodec<BasicFeature> {

    Genome genome;
    boolean keepText;

    public PSLCodec(){
        this(null);
    }

    public PSLCodec(Genome genome) {
        this(genome, false);
    }

    public PSLCodec(Genome genome, boolean keepText) {
        super(BasicFeature.class);
        this.genome = genome;
        this.keepText = keepText;

    }

    public PSLRecord decode(String line) {

        PSLRecord f = null;
        try {
            if (line.trim().length() == 0 || line.startsWith("#") || line.startsWith("track") ||
                    line.startsWith("browser")) {
                return null;
            }

            String[] tokens = Globals.singleTabMultiSpacePattern.split(line);
            int nTokens = tokens.length;
            if (nTokens < 21) {
                // log.info("Skipping line ")
                return null;
            }
            int tSize = Integer.parseInt(tokens[14]);
            String chrToken = tokens[13];
            String chr = genome == null ? chrToken : genome.getChromosomeAlias(chrToken);
            int start = Integer.parseInt(tokens[15]); // IS PSL 1 or ZERO based,  closed or open?

            String strandString = tokens[8];
            Strand strand = strandString.startsWith("+") ? Strand.POSITIVE : Strand.NEGATIVE;

            boolean gNeg = false;
            if (strandString.length() > 1) {
                gNeg = strandString.charAt(1) == '-';
            }

            f = new PSLRecord();
            f.setName(tokens[9]);
            f.setChr(chr);
            f.setStart(start);
            f.setEnd(Integer.parseInt(tokens[16]));
            f.setStrand(strand);

            int exonCount = Integer.parseInt(tokens[17]);
            String[] exonSizes = tokens[18].split(",");
            String[] startsBuffer = tokens[20].split(",");

            if (startsBuffer.length == exonSizes.length && exonCount == startsBuffer.length) {
                for (int i = 0; i < startsBuffer.length; i++) {
                    int exonSize = Integer.parseInt(exonSizes[i]);
                    int exonStart = Integer.parseInt(startsBuffer[i]);
                    if (gNeg) {
                        exonStart = tSize - exonStart - exonSize;
                    }
                    int exonEnd = exonStart + exonSize;
                    Exon exon = new Exon(chr, exonStart, exonEnd, strand);
                    f.addExon(exon);
                }
            } else {
                // TODO -- warn
            }

            //score = percentId = 100.0 * (match + repMatch)  / (misMatch + match + repMatch + qGapCount + tGapCount)
            int match = Integer.parseInt(tokens[0]);
            int misMatch = Integer.parseInt(tokens[1]);
            int repMatch = Integer.parseInt(tokens[2]);
            int ns = Integer.parseInt(tokens[3]);
            int qGapCount = Integer.parseInt(tokens[4]);
            int qGapBases = Integer.parseInt(tokens[5]);
            int tGapCount = Integer.parseInt(tokens[6]);
            int tGapBases = Integer.parseInt(tokens[7]);
            int qSize = Integer.parseInt(tokens[10]);

            float score = (1000.0f * (match + repMatch - misMatch - qGapCount - tGapCount)) / qSize;

            f.setMatch(match);
            f.setMisMatch(misMatch);
            f.setRepMatch(repMatch);
            f.setNs(ns);
            f.setQGapCount(qGapCount);
            f.setQGapBases(qGapBases);
            f.setTGapCount(tGapCount);
            f.setTGapBases(tGapBases);
            f.setQSize(qSize);

            f.setScore(score);

            // Build description
            StringBuffer desc = new StringBuffer();
            desc.append("matches = " + tokens[0]);
            desc.append("<br>");
            desc.append("mismatches = " + tokens[1]);
            desc.append("<br>");
            desc.append("repeat matches = " + tokens[2]);
            desc.append("<br>");
            desc.append("# inserts in query = " + tokens[4]);
            desc.append("<br>");
            desc.append("# inserts in target = " + tokens[6]);
            f.setDescription(desc.toString());

            if(keepText) {
                f.setText(line);
            }

        } catch (NumberFormatException e) {
            return null;
        }


        return f;
    }

    /**
     * This function returns true iff the File potentialInput can be parsed by this
     * codec.
     * <p/>
     * There is an assumption that there's never a situation where two different Codecs
     * return true for the same file.  If this occurs, the recommendation would be to error out.
     * <p/>
     * Note this function must never throw an error.  All errors should be trapped
     * and false returned.
     *
     * @param path the file to test for parsability with this codec
     * @return true if potentialInput can be parsed, false otherwise
     */
    public boolean canDecode(String path) {
        return path.toLowerCase().endsWith(".psl");
    }
}
TOP

Related Classes of org.broad.igv.feature.tribble.PSLCodec

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.