Package org.broad.igv.maf

Source Code of org.broad.igv.maf.MAFParser

/*
* Copyright (c) 2007-2012 The Broad Institute, Inc.
* SOFTWARE COPYRIGHT NOTICE
* This software and its documentation are the copyright of the Broad Institute, Inc. All rights are reserved.
*
* This software is supplied without any warranty or guaranteed support whatsoever. The Broad Institute is not responsible for its use, misuse, or functionality.
*
* This software is licensed under the terms of the GNU Lesser General Public License (LGPL),
* Version 2.1 which is available at http://www.opensource.org/licenses/lgpl-2.1.php.
*/

package org.broad.igv.maf;

import htsjdk.samtools.seekablestream.SeekableStream;
import org.broad.igv.Globals;
import org.broad.igv.util.ParsingUtils;
import org.broad.igv.util.index.Interval;
import org.broad.igv.util.index.IntervalTree;
import org.broad.igv.util.stream.IGVSeekableStreamFactory;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
* @author jrobinso
*         Date: 2/18/13
*         Time: 9:59 AM
*/
public class MAFParser implements MAFReader {

    String path;
    MAFIndex index;
    List<String> species;
    String trackName;

    public MAFParser(String path) {
        this.path = path;
        try {
            parseHeader();
        } catch (IOException e) {
            e.printStackTrace()//To change body of catch statement use File | Settings | File Templates.
        }
        String indexPath = path + ".index";
        try {
            if (ParsingUtils.pathExists(indexPath)) {
                index = MAFIndex.loadIndex(indexPath);
            } else {
                index = MAFIndex.createIndex(path);
                MAFIndex.writeIndex(index, indexPath);
            }
        } catch (IOException e) {
            e.printStackTrace()//To change body of catch statement use File | Settings | File Templates.
        }
    }


    public String getTrackName() {
        return trackName;
    }

    @Override
    public List<MultipleAlignmentBlock> loadAlignments(String chr, int start, int end) throws IOException {

        IntervalTree ivTree = index.getIntervalTree(chr);
        if (ivTree == null) return null;

        List<Interval> intervals = ivTree.findOverlapping(start, end);
        if (intervals.isEmpty()) {
            return null;
        }


        // Find the starting (left most) interval.  Alignment blocks do not overlap, so we can start at the
        // minimum file offset and just proceed until the end of the interval.
        long startPosition = Long.MAX_VALUE;
        for (Interval iv : intervals) {
            startPosition = Math.min(startPosition, iv.getValue());
        }


        SeekableStream is = null;


        is = IGVSeekableStreamFactory.getInstance().getStreamFor(path);
        is.seek(startPosition);

        BufferedReader reader = new BufferedReader(new InputStreamReader(is), 256000);

        List<MultipleAlignmentBlock> alignments = new ArrayList<MultipleAlignmentBlock>();

        String line;
        while ((line = reader.readLine()) != null) {
            if (line.startsWith("a ")) {
                // TODO -- parse score (optional)
                MultipleAlignmentBlock block = parseBlock(reader);
                if(block.getEnd() < start) {
                    continue;
                }
                if (block.getStart() > end || !block.getChr().equals(chr)) {
                    break;
                } else {
                    alignments.add(block);
                }
            }
        }
        return alignments;
    }


    @Override
    public Collection<String> getChrNames() {
        return index.getChromosomes();
    }

    @Override
    public Collection<String> getSpecies() {
        return species != null ? species : index.getSpecies();
    }

    @Override
    public String getSpeciesName(String speciesId) {
        return speciesId;  //To change body of implemented methods use File | Settings | File Templates.
    }

    @Override
    public String getRefId() {
        return index.getRefId()//To change body of implemented methods use File | Settings | File Templates.
    }


    void parseHeader() throws IOException {

        BufferedReader reader = null;

        try {
            InputStream is = IGVSeekableStreamFactory.getInstance().getStreamFor(path);
            reader = new BufferedReader(new InputStreamReader(is));

            String line;
            while ((line = reader.readLine()) != null) {

                if (line.startsWith("a ")) {
                    return// Done with header
                } else if (line.startsWith("track")) {

                    String[] tokens = breakQuotedString(line);
                    for (int i = 0; i < tokens.length; i++) {
                        String key = null;
                        String value = null;
                        String[] kv = tokens[i].split("=");
                        if (kv.length == 1) {
                            if (tokens[i].endsWith(("="))) {
                                key = kv[0].toLowerCase().trim();
                                value = tokens[++i];
                            }
                        } else if (kv.length == 2) {
                            key = kv[0].toLowerCase().trim();
                            value = kv[1];
                        }
                        if (key != null) {
                            if (key.equals("name")) {
                                this.trackName = value;
                            }
                            if (key.equals("speciesorder")) {
                                species = Arrays.asList(Globals.whitespacePattern.split(value));
                            }
                        }
                    }
                }
            }
        } finally {
            if (reader != null) reader.close();
        }
    }

    private String[] breakQuotedString(String subjectString) {

        List<String> matchList = new ArrayList<String>();
        Pattern regex = Pattern.compile("[^\\s\"']+|\"([^\"]*)\"|'([^']*)'");
        Matcher regexMatcher = regex.matcher(subjectString);
        while (regexMatcher.find()) {
            if (regexMatcher.group(1) != null) {
                // Add double-quoted string without the quotes
                matchList.add(regexMatcher.group(1));
            } else if (regexMatcher.group(2) != null) {
                // Add single-quoted string without the quotes
                matchList.add(regexMatcher.group(2));
            } else {
                // Add unquoted word
                matchList.add(regexMatcher.group());
            }
        }

        return matchList.toArray(new String[]{});
    }


    /**
     * Parse an alignment block.
     *
     * @param reader
     */
    private MultipleAlignmentBlock parseBlock(BufferedReader reader) throws IOException {

        String line;
        MultipleAlignmentBlock ma = new MultipleAlignmentBlock();

        while ((line = reader.readLine()) != null) {
            if (line.trim().length() == 0) {
                return ma;
            }
            if (line.startsWith("s ")) {

                String[] tokens = Globals.whitespacePattern.split(line);

                String src = tokens[1];
                String species = src;
                String chr = src;
                if (src.contains(".")) {
                    String[] srcTokens = ParsingUtils.PERIOD_PATTERN.split(src);
                    species = srcTokens[0];
                    chr = srcTokens[1];
                }
                int start = Integer.parseInt(tokens[2]);
                int size = Integer.parseInt(tokens[3]);
                char strand = tokens[4].charAt(0);
                int srcSize = Integer.parseInt(tokens[5]);
                String text = tokens[6];

                ma.addSequence(new MultipleAlignmentBlock.Sequence(species, chr, start, size, strand, srcSize, text));


            }


        }
        return ma;
    }

}
TOP

Related Classes of org.broad.igv.maf.MAFParser

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.