Package org.broad.igv.data

Source Code of org.broad.igv.data.GenomeSummaryData

/*
* Copyright (c) 2007-2012 The Broad Institute, Inc.
* SOFTWARE COPYRIGHT NOTICE
* This software and its documentation are the copyright of the Broad Institute, Inc. All rights are reserved.
*
* This software is supplied without any warranty or guaranteed support whatsoever. The Broad Institute is not responsible for its use, misuse, or functionality.
*
* This software is licensed under the terms of the GNU Lesser General Public License (LGPL),
* Version 2.1 which is available at http://www.opensource.org/licenses/lgpl-2.1.php.
*/

/*
* To change this template, choose Tools | Templates
* and open the template in the editor.
*/
package org.broad.igv.data;

import org.apache.log4j.Logger;
import org.broad.igv.feature.genome.Genome;
import org.broad.igv.tdf.Accumulator;
import org.broad.igv.track.WindowFunction;
import org.broad.igv.util.collections.FloatArrayList;
import org.broad.igv.util.collections.IntArrayList;

import java.util.*;

/**
* Summarize (using a windowing function) numeric data points which are associated
* with locations on a genome. Stored by chromosome
* @author jrobinso
*/
public class GenomeSummaryData {

    private static Logger log = Logger.getLogger(GenomeSummaryData.class);

    // Genome coordinates are in kilobases
    private static final double locationUnit = 1000.0;

    /**
     * Number of virtual pixels
     */
    int nPixels = 1000;

    Genome genome;

    String[] samples;

    /**
     * Chromosome name -> [sample name -> list of data values]
     */
    Map<String, Map<String, FloatArrayList>> dataMap = new HashMap<String, Map<String, FloatArrayList>>();

    /**
     * Chromosome name -> list of start locations
     */
    Map<String, IntArrayList> locationMap;

    /**
     * start locations of currently relevant ordered list of chromosomes, spanning whole genomes
     */
    int[] locations;


    /**
     * sample name -> list of sample data
     */
    Map<String, float[]> data;

    int nDataPts = 0;

    Set<String> skippedChromosomes = new HashSet<String>();


    /**
     * Scale in kilobases / pixel
     */
    double scale;

    public GenomeSummaryData(Genome genome, String[] samples) {
        this.genome = genome;
        this.samples = samples;
        scale = (genome.getNominalLength() / locationUnit) / nPixels;

        List<String> chrNames = genome.getLongChromosomeNames();
        locationMap = new HashMap<String, IntArrayList>();
        dataMap = new HashMap<String, Map<String, FloatArrayList>>();
        for (String chr : chrNames) {
            locationMap.put(chr, new IntArrayList(nPixels / 10));
            dataMap.put(chr, new HashMap<String, FloatArrayList>());
            for (String s : samples) {
                dataMap.get(chr).put(s, new FloatArrayList(nPixels / 10));
            }
        }
    }

    /**
     * Changes scale of summary, ie zoom in or out
     * Mainly for testing, can't use after adding any data
     * @param scale
     */
    void setScale(double scale){
        if(nDataPts > 0) throw new IllegalStateException("Can't alter scale after adding data");
        this.scale = scale;
        nPixels = (int) (((double) this.genome.getNominalLength() / locationUnit) / scale);
    }


    /**
     * Add data to be condensed for the whole genome view
     *
     * @param chr
     * @param locs Genomic positions
     * @param sampleData  Map of sample name -> array of values.
     */
    public void addData(String chr, int[] locs, Map<String, float[]> sampleData) {

        IntArrayList locations = locationMap.get(chr);
        if (locations == null) {
            if (!skippedChromosomes.contains(chr)) {
                skippedChromosomes.add(chr);
                log.info("Skipping data for: " + chr);
            }
            return;
        }

        int lastPixel = -1;
        int lastGenomeLocation = -1;
        Map<String, Accumulator> dataPoints = new HashMap<String, Accumulator>();

        for (int i = 0; i < locs.length; i++) {

            int genomeLocation = genome.getGenomeCoordinate(chr, locs[i]);
            int pixel = (int) (genomeLocation / scale);
            if (lastPixel >= 0 && pixel != lastPixel) {
                locations.add(lastGenomeLocation);
                finishLastLocation(chr, dataPoints);
            }

            for (String s : samples) {
                float[] data = sampleData.get(s);
                Accumulator dp = dataPoints.get(s);
                if (dp == null) {
                    dp = new Accumulator(WindowFunction.mean);
                    dataPoints.put(s, dp);
                }
                try {
                    dp.add(1, data[i], null);
                } catch (Exception e) {
                    log.error("Error adding to GenomeSummaryData", e);
                }
            }

            lastPixel = pixel;
            lastGenomeLocation = genomeLocation;
        }

        locations.add(lastGenomeLocation);
        finishLastLocation(chr, dataPoints);
    }

    /**
     * Mark the previous genomic location as having been completely summarized
     * @param chr
     * @param dataPoints  Map sample -> accumulator, which stored data temporarily being accumulated at a given genome location
     */
    private void finishLastLocation(String chr, Map<String, Accumulator> dataPoints) {
        nDataPts++;

        for (String s : dataMap.get(chr).keySet()) {
            Accumulator dp = dataPoints.get(s);
            dp.finish();
            dataMap.get(chr).get(s).add(dp.getValue());
        }
        dataPoints.clear();
    }

    public int[] getLocations() {
        if (locations == null) {
            createDataArrays();
        }

        return locations;
    }


    public float[] getData(String sample) {
        if (!data.containsKey(sample)) {
            createDataArrays();
        }
        return data.get(sample);

    }

    /**
     * Recalculate:
     * 0. Start locations for plotting. Shared across all samples
     * 1. Summary data for a given sample, across all stored chromosomes
     */
    private synchronized void createDataArrays() {
        locations = new int[nDataPts];
        int offset = 0;
        List<String> chrNames = genome.getLongChromosomeNames();
        for (String chr : chrNames) {
            int[] chrLocs = locationMap.get(chr).toArray();
            System.arraycopy(chrLocs, 0, locations, offset, chrLocs.length);
            offset += chrLocs.length;
        }

        data = new HashMap<String, float[]>();
        for (String s : samples) {
            float[] sampleData = new float[nDataPts];
            offset = 0;
            for (String chr : chrNames) {
                float[] chrData = dataMap.get(chr).get(s).toArray();
                System.arraycopy(chrData, 0, sampleData, offset, chrData.length);
                offset += chrData.length;
            }
            data.put(s, sampleData);
        }

        locationMap.clear();
        dataMap.clear();
    }

}
TOP

Related Classes of org.broad.igv.data.GenomeSummaryData

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.