Package org.broad.igv.feature.tribble

Source Code of org.broad.igv.feature.tribble.EMBLTableCodec$EmblTableIterator

package org.broad.igv.feature.tribble;

import htsjdk.samtools.util.LocationAware;
import org.apache.log4j.Logger;
import org.broad.igv.feature.BasicFeature;
import org.broad.igv.feature.Exon;
import org.broad.igv.feature.Strand;
import htsjdk.tribble.AbstractFeatureCodec;
import htsjdk.tribble.FeatureCodecHeader;
import htsjdk.tribble.readers.*;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.List;

/**
* @author jrobinso
*         Date: 11/15/13
*         Time: 1:08 PM
*/
public class EMBLTableCodec extends AbstractFeatureCodec<BasicFeature, EMBLTableCodec.EmblTableIterator> {

    private static Logger log = Logger.getLogger(EMBLTableCodec.class);

    public EMBLTableCodec() {
        super(BasicFeature.class);
    }

    @Override
    public BasicFeature decode(EmblTableIterator s) throws IOException {

        EmblRecord emblRecord = s.next();
        if(emblRecord == null) return null;

        BasicFeature feature = new BasicFeature(emblRecord.getChromosome(), emblRecord.getStart(),
                emblRecord.getEnd());
        feature.setType(emblRecord.getType());
        feature.setIdentifier(emblRecord.getIdentifier());
        feature.setName(emblRecord.getIdentifier());
        feature.setStrand(emblRecord.getStrand());
        feature.setDescription(emblRecord.getDescription());
        if (emblRecord.getAlias() != null) {
            feature.setName(emblRecord.getAlias());
        }

        // If this is a "gene part" add the exons
        for (Exon exon : emblRecord.getExons()) {
            feature.addExon(exon);
        }

        return feature;
    }

    @Override
    public FeatureCodecHeader readHeader(EmblTableIterator o) throws IOException {
        return null;
    }

    /**
     * Generates a reader appropriate for use by this codec from the generic input stream.  Implementers should
     * assume the stream is buffered.
     */
    @Override
    public EmblTableIterator makeSourceFromStream(InputStream inputStream) {
        return new EmblTableIterator(inputStream);
    }

    /**
     * Generates a {@link LocationAware} reader of type EmblTableIterator.  Like {@link #makeSourceFromStream(java.io.InputStream)}, except
     * the {@link LocationAware} compatibility is required for creating indexes.
     * <p/>
     * Implementers of this method must return a type that is both {@link LocationAware} as well as EmblTableIterator.  Note that this
     * requirement cannot be enforced via the method signature due to limitations in Java's generic typing system.  Instead, consumers
     * should cast the call result into a EmblTableIterator when applicable.
     */
    @Override
    public LocationAware makeIndexableSourceFromStream(InputStream inputStream) {
        return null;    // <= indexing will fail until this is implemented.
    }

    @Override
    public boolean isDone(EmblTableIterator o) {
        return !o.hasNext();
    }

    @Override
    public void close(EmblTableIterator o) {
        o.close();
    }

    static class EmblRecord {

        private static Logger log = Logger.getLogger(EmblRecord.class);

        boolean isNegative;
        private String type;
        private String chromosome;
        private String identifier;
        private String alias;
        private String description;
        private int start = Integer.MAX_VALUE;
        private int end;
        List<Exon> exons;


        EmblRecord(String type, String chromosome, String lociString, boolean isNegative) {
            this.isNegative = isNegative;
            this.type = type;
            this.chromosome = chromosome;
            createExons(lociString, isNegative);
        }

        /**
         * Method description
         *
         * @return
         */
        public int getStart() {
            return start;
        }

        /**
         * Method description
         *
         * @return
         */
        public int getEnd() {
            return end;
        }

        /**
         * Method description
         *
         * @return
         */
        public boolean isGenePart() {
            return type.equals("CDS") || type.equals("3'UTR") || type.equals("5'UTR");
        }

        /**
         * Method description
         *
         * @return
         */
        public Strand getStrand() {
            return isNegative ? Strand.NEGATIVE : Strand.POSITIVE;
        }

        /**
         * Method description
         *
         * @return
         */
        public String getType() {
            return type;
        }

        /**
         * Method description
         *
         * @return
         */
        public String getIdentifier() {
            return identifier;
        }

        /**
         * Method description
         *
         * @param identifier
         */
        public void setIdentifier(String identifier) {
            this.identifier = identifier;
        }

        /**
         * Method description
         *
         * @return
         */
        public String getAlias() {
            return alias;
        }

        /**
         * Method description
         *
         * @param alias
         */
        public void setAlias(String alias) {
            this.alias = alias;
        }

        /**
         * Method description
         *
         * @return
         */
        public List<Exon> getExons() {
            return exons;
        }

        /**
         * Method description
         *
         * @param nextLine
         */
        public void append(String nextLine) {
            String attrString = nextLine.substring(21);
            if (attrString.startsWith("/gene=")) {
                String[] kv = attrString.split("=");
                String geneName = kv[1].replace("\"", "");
                if (geneName.startsWith("SP")) {

                    // Some genes have multiple identifiers.  Only use the first one
                    if (getIdentifier() == null) {
                        setIdentifier(geneName);
                    }
                } else {
                    setAlias(geneName);
                }
            } else if (attrString.startsWith("/systematic_id=")) {
                String[] kv = attrString.split("=");
                String id = kv[1].replace("\"", "");
                setIdentifier(id);
                setAlias(id);

            } else {
                appendToDescription(nextLine.substring(22).trim());
            }
        }

        /**
         * Method description
         *
         * @param note
         */
        public void appendToDescription(String note) {
            if (description == null) {
                description = note;
            } else {
                description += "<br>" + note;
            }
        }

        /**
         * Method description
         *
         * @return
         */
        public String getDescription() {
            return description;
        }

        /**
         * Create a list of Exon objects from the Embl join string.  Apparently exons in embl
         * format are represented by a single CDS record.
         *
         * @param joinString
         * @param isNegative
         */
        void createExons(String joinString, boolean isNegative) {
            String[] lociArray = joinString.split(",");
            exons = new ArrayList(lociArray.length);
            for (String loci : lociArray) {
                try {
                    String[] tmp = loci.split("\\.\\.");
                    int exonStart = Integer.parseInt(tmp[0]) - 1;    // - (isNegative ? 0 : 1);
                    int exonEnd = exonStart + 1;
                    if (tmp.length > 1) {
                        exonEnd = Integer.parseInt(tmp[1]);
                    }

                    Strand strand = isNegative ? Strand.NEGATIVE : Strand.POSITIVE;
                    Exon r = new Exon(chromosome, exonStart, exonEnd, strand);
                    start = Math.min(start, exonStart);
                    end = Math.max(end, exonEnd);
                    exons.add(r);


                } catch (NumberFormatException e) {
                    log.error("Error parsing exon number; " + joinString, e);
                }
            }
        }

        /**
         * Method description
         *
         * @return
         */
        public String getChromosome() {
            return chromosome;
        }
    }

    static class EmblTableIterator  {

        String chromosome;
        BufferedReader reader;
        PositionalBufferedStream is;
        EmblRecord currentRecord = null;

        EmblTableIterator(InputStream stream) {

            if(stream instanceof  PositionalBufferedStream) {
                is = (PositionalBufferedStream) stream;
            }
            else {
                is = new PositionalBufferedStream(stream);
            }

            reader = new BufferedReader(new InputStreamReader(stream));
            // Advance to first record
            next();
        }

        boolean hasNext() {
            return currentRecord != null;
        }

        EmblRecord next() {

            String nextLine = null;
            try {
                while ((nextLine = reader.readLine()) != null) {

                    if (nextLine.startsWith("ID"))    // Chromosome change
                    {
                        String chr = getFirstWord(nextLine.substring(2));
                        chromosome = chr.replace("chromosome", "chr").replace("_", "");
                    } else if (nextLine.startsWith("FT")) {
                        String featureKey = nextLine.substring(5, 19).trim();
                        if (featureKey.length() == 0) {
                            if (currentRecord != null) {
                                currentRecord.append(nextLine);
                            }
                        } else {

                            // New feature started.
                            EmblRecord returnValue = currentRecord;

                            String temp = nextLine.substring(21);
                            boolean isNegative = temp.contains("complement");
                            String lociString = parseJoinString(temp, reader).replace("<",
                                    "").replace(">", "").trim();
                            currentRecord = new EmblRecord(featureKey.trim(), chromosome, lociString, isNegative);

                            return returnValue;
                        }
                    } else {
                        // Skip line
                    }
                }

                return currentRecord;
            } catch (IOException ex) {
                log.error("Error parsing EMBL file", ex);
                return null;
            }
        }

        private String getFirstWord(String string) {
            String trimmedString = string.trim();
            char[] chars = trimmedString.toCharArray();
            int whitespaceIndex = 0;
            for (whitespaceIndex = 0; whitespaceIndex < chars.length; whitespaceIndex++) {
                if (Character.isSpaceChar(chars[whitespaceIndex])) {
                    break;
                }
            }

            return trimmedString.substring(0, whitespaceIndex).trim();

        }


        /**
         * FT   CDS             join(complement(5000933..5001976),
         * FT                   complement(5000325..5000891),complement(5000024..5000272))
         * FT                   /product="GTPase activating protein (predicted)"
         * FT                   /gene="SPAC1952.17c"
         * FT                   /gene="SPAC890.01c"
         *
         * @param joinString
         * @param reader
         * @return
         * @throws IOException
         */
        public String parseJoinString(String joinString, BufferedReader reader)
                throws IOException {

            if (joinString.startsWith("join") || joinString.startsWith("complement")) {
                int leftParenCount = countChar(joinString, '(');
                int rightParenCount = countChar(joinString, ')');
                while (leftParenCount != rightParenCount) {
                    joinString += reader.readLine().replace("FT", "").trim();
                    leftParenCount = countChar(joinString, '(');
                    rightParenCount = countChar(joinString, ')');
                }

                // join and complement functions irrelevant
                joinString = joinString.replace("join", "");
                joinString = joinString.replace("complement", "");
                joinString = joinString.replace("(", "");
                joinString = joinString.replace(")", "");
                joinString = joinString.replace('<', ' ');
                return joinString;

            } else {
                return joinString;
            }

        }


        /**
         * This must exist in the jdk ?
         *
         * @param string
         * @return
         */
        static int countChar(String string, char c) {
            int cnt = 0;
            for (int i = 0; i < string.length(); i++) {
                if (c == string.charAt(i)) {
                    cnt++;
                }
            }
            return cnt;

        }

        public void close() {
            if (reader != null) try {
                reader.close();
            } catch (IOException e) {
                log.error("Error closing EMBL reader", e);
            }
        }

    }

}
TOP

Related Classes of org.broad.igv.feature.tribble.EMBLTableCodec$EmblTableIterator

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.