Package org.broad.igv.feature

Source Code of org.broad.igv.feature.FeatureUtils

/*
* Copyright (c) 2007-2012 The Broad Institute, Inc.
* SOFTWARE COPYRIGHT NOTICE
* This software and its documentation are the copyright of the Broad Institute, Inc. All rights are reserved.
*
* This software is supplied without any warranty or guaranteed support whatsoever. The Broad Institute is not responsible for its use, misuse, or functionality.
*
* This software is licensed under the terms of the GNU Lesser General Public License (LGPL),
* Version 2.1 which is available at http://www.opensource.org/licenses/lgpl-2.1.php.
*/

/*
* FeatureUtils.java
*
* Useful utilities for working with Features
*/
package org.broad.igv.feature;

import com.google.common.base.Predicate;
import org.broad.igv.feature.genome.Genome;
import org.broad.igv.feature.genome.GenomeManager;
import htsjdk.tribble.Feature;

import java.util.*;

/**
* @author jrobinso
*/
public class FeatureUtils {


    public static Predicate<Feature> getOverlapPredicate(final String chr, final int start, final int end) {
        Predicate<Feature> overlapPredicate = new Predicate<Feature>() {
            @Override
            public boolean apply(Feature object) {
                return chr.equals(object.getChr()) && object.getStart() <= end && object.getEnd() > start;
            }
        };
        return overlapPredicate;
    }

    public static Map<String, List<IGVFeature>> divideByChromosome(List<IGVFeature> features) {
        Map<String, List<IGVFeature>> featureMap = new LinkedHashMap();
        for (IGVFeature f : features) {
            List<IGVFeature> flist = featureMap.get(f.getChr());
            if (flist == null) {
                flist = new ArrayList();
                featureMap.put(f.getChr(), flist);
            }
            flist.add(f);
        }
        return featureMap;
    }

    /**
     * Segregate a list of possibly overlapping features into a list of
     * non-overlapping lists of features.
     */
    public static List<List<IGVFeature>> segregateFeatures(List<IGVFeature> features, double scale) {

        // Create a list to hold the lists of non-overlapping features
        List<List<IGVFeature>> segmentedLists = new ArrayList();

        // Make a working copy of the original list.
        List<IGVFeature> workingList = new LinkedList(features);
        sortFeatureList(workingList);

        // Loop until all features have been allocated to non-overlapping lists
        while (workingList.size() > 0) {

            List<IGVFeature> nonOverlappingFeatures = new LinkedList();
            List<IGVFeature> overlappingFeatures = new LinkedList();

            // Prime the loop with the first feature, it can't overlap itself
            IGVFeature f1 = workingList.remove(0);
            nonOverlappingFeatures.add(f1);
            while (workingList.size() > 0) {
                IGVFeature f2 = workingList.remove(0);
                int scaledStart = (int) (f2.getStart() / scale);
                int scaledEnd = (int) (f1.getEnd() / scale);
                if (scaledStart > scaledEnd) {
                    nonOverlappingFeatures.add(f2);
                    f1 = f2;
                } else {
                    overlappingFeatures.add(f2);
                }
            }

            // Add the list of non-overlapping features and start again with whats left
            segmentedLists.add(nonOverlappingFeatures);
            workingList = overlappingFeatures;
        }
        return segmentedLists;
    }

    /**
     * Sort the feature list by ascending start value
     */
    public static void sortFeatureList(List<? extends Feature> features) {
        Collections.sort(features, FEATURE_START_COMPARATOR);
    }


    /**
     * Null safe version of {@linkplain #combineSortedFeatureListsNoDups(java.util.Iterator, java.util.Iterator, int, int)}
     * If BOTH self and other are null, returns null. If only one is null,
     * returns the other
     *
     * @param self
     * @param other
     * @param start
     * @param end
     * @return
     */
    public static <T extends Feature> List<T> combineSortedFeatureListsNoDups(List<T> self, List<T> other, int start, int end) {
        if (self == null && other == null) {
            return null;
        } else if (self == null) {
            return other;
        } else if (other == null) {
            return self;
        }

        return combineSortedFeatureListsNoDups(self.iterator(), other.iterator(), start, end);
    }

    /**
     * Features are sorted by start position. The interval being merged
     * will have some features on the left or right that the current
     * interval does not have. Both are sorted by start position.
     * So we first add at the beginning, and then the end,
     * only those features which don't overlap the original interval.
     *
     * @param selfIter  iterator of features belonging to this interval
     * @param otherIter iterator of features belonging to some other interval
     * @param start     the beginning of the interval from which selfIter was derived
     * @param end       the end of the interval from which selfIter was derived
     * @return Combined sorted list.
     * @throws ClassCastException If the elements of an iterator cannot be cast
     *                            to a Feature.
     */
    public static <T extends Feature> List<T> combineSortedFeatureListsNoDups(Iterator<T> selfIter, Iterator<T> otherIter, int start, int end) {
        List<T> allFeatures = new ArrayList<T>();
        T otherFeat = null;

        while (otherIter.hasNext()) {
            otherFeat = otherIter.next();
            if (otherFeat.getEnd() > start) break;
            allFeatures.add(otherFeat);
        }

        while (selfIter.hasNext()) {
            allFeatures.add(selfIter.next());
        }

        while (otherIter.hasNext()) {
            if (otherFeat.getStart() >= end) {
                allFeatures.add(otherFeat);
            }
            otherFeat = otherIter.next();
        }

        if (otherFeat != null && otherFeat.getStart() >= end) {
            allFeatures.add(otherFeat);
        }

        return allFeatures;
    }

    /**
     * Return a feature from the supplied list at the given position.
     *
     * @param position 0-based genomic position to which to search for feature
     * @param buffer   search region. The first feature which contains the start position, (expanded by buffer, inclusive)
     *                 will be accepted.
     * @param features
     * @return
     */
    public static <T extends Feature> T getFeatureAt(double position, int buffer, List<? extends T> features) {

        int startIdx = 0;
        int endIdx = features.size();

        while (startIdx != endIdx) {
            int idx = (startIdx + endIdx) / 2;

            T feature = features.get(idx);

            int effectiveStart = feature.getStart();
            int effectiveEnd = feature.getEnd();

            if (position >= effectiveStart - buffer) {
                if (position <= effectiveEnd + buffer) {
                    return features.get(idx);
                } else {
                    if (idx == startIdx) {
                        return null;
                    } else {
                        startIdx = idx;
                    }
                }
            } else {
                endIdx = idx;
            }
        }

        return null;
    }

    /**
     * Get the index of the feature just to the right of the given position.
     * If there is no feature to the right return -1;
     *
     * @param position
     * @param features
     * @return
     */
    public static Feature getFeatureAfter(double position, List<? extends Feature> features) {

        if (features.size() == 0 ||
                features.get(features.size() - 1).getStart() <= position) {
            return null;
        }

        int startIdx = 0;
        int endIdx = features.size();

        // Narrow the list to ~ 10
        while (startIdx != endIdx) {
            int idx = (startIdx + endIdx) / 2;
            double distance = features.get(idx).getStart() - position;
            if (distance <= 0) {
                startIdx = idx;
            } else {
                endIdx = idx;
            }
            if (endIdx - startIdx < 10) {
                break;
            }
        }

        // Now find feature
        for (int idx = startIdx; idx < features.size(); idx++) {
            if (features.get(idx).getStart() > position) {
                return features.get(idx);
            }
        }

        return null;

    }

    public static Feature getFeatureBefore(double position, List<? extends Feature> features) {

        int index = getIndexBefore(position, features);
        while (index >= 0) {
            htsjdk.tribble.Feature f = features.get(index);
            if (f.getStart() < position) {
                return f;
            }
            index--;
        }
        return null;

    }

    public static Feature getFeatureClosest(double position, List<? extends htsjdk.tribble.Feature> features) {
        // look for exact match at position:
        htsjdk.tribble.Feature f0 = getFeatureAt(position, features);
        if (f0 != null) {
            return f0;
        }
        // otherwise look for features on either side and return the closest:
        htsjdk.tribble.Feature f1 = getFeatureBefore(position, features);
        htsjdk.tribble.Feature f2 = getFeatureAfter(position, features);

        double d1 = f1 == null ? Double.MAX_VALUE : Math.abs(position - f1.getEnd());
        double d2 = f2 == null ? Double.MAX_VALUE : Math.abs(f2.getStart() - position);

        return (d1 < d2 ? f1 : f2);

    }

    /**
     * Return a feature that encompasses the supplied position.
     *
     * @param position Query position.
     * @param features List of features.
     * @return The feature whose start overlaps with position, or null.
     */
    private static Feature getFeatureAt(double position, List<? extends Feature> features) {
        int strt = (int) position;
        Feature key = new BasicFeature("", strt, strt + 1);

        int r = Collections.binarySearch(features, key, FEATURE_START_COMPARATOR);

        if (r >= 0) {
            return features.get(r);
        } else {
            return null;
        }
    }


    /**
     * Return the index to the last feature in the list with a start < the given position
     *
     * @param position
     * @param features
     * @return
     */
    public static int getIndexBefore(double position, List<? extends Feature> features) {

        if (features == null || features.size() == 0) {
            return -1;
        }
        if (features.get(features.size() - 1).getStart() <= position) {
            return features.size() - 1;
        }
        if (features.get(0).getStart() >= position) {
            return 0;
        }

        int startIdx = 0;
        int endIdx = features.size() - 1;

        while (startIdx != endIdx) {
            int idx = (startIdx + endIdx) / 2;
            double distance = features.get(idx).getStart() - position;
            if (distance <= 0) {
                startIdx = idx;
            } else {
                endIdx = idx;
            }
            if (endIdx - startIdx < 10) {
                break;
            }
        }

        if (features.get(endIdx).getStart() >= position) {
            for (int idx = endIdx; idx >= 0; idx--) {
                if (features.get(idx).getStart() < position) {
                    return idx;
                }
            }
        } else {
            for (int idx = endIdx + 1; idx < features.size(); idx++) {
                if (features.get(idx).getStart() >= position) {
                    return idx - 1;
                }

            }
        }
        return -1;
    }

    /**
     * Return a feature from the supplied list at the given position.
     *
     * @param position
     * @param maxLength
     * @param features
     * @return
     */
    public static List<Feature> getAllFeaturesAt(double position,
                                                 double maxLength,
                                                 double minWidth,
                                                 List<? extends htsjdk.tribble.Feature> features) {

        List<Feature> returnList = null;

        double adjustedPosition = Math.max(0, position - maxLength);
        int startIdx = Math.max(0, getIndexBefore(adjustedPosition, features));
        for (int idx = startIdx; idx < features.size(); idx++) {
            Feature feature = features.get(idx);
            int start = feature.getStart() - (int) (minWidth / 2);

            if (start > position) {
                break;
            }

            int end = feature.getEnd() + (int) (minWidth / 2);

            if (position >= start && position <= end) {
                if (returnList == null) returnList = new ArrayList();
                returnList.add(feature);
            }
        }

        return returnList;
    }


    /**
     * Export a gene in "edx" format
     * LYZ
     * hg19 12  +  69742133  69748013  69742188  69746999
     * 4
     * 69742133  69742324  0
     * 69743887  69744052  1
     * 69745999  69746078  1
     * 69746932  69748013  2
     * 69742083  69742374  AAGGGTGGAGCC
     *
     * @param feature
     */
    public static void exportEDX(IGVFeature feature) {

        Genome genome = GenomeManager.getInstance().getCurrentGenome();

        System.out.println(feature.getName());
        System.out.print(genome.getId() + "\t");
        String str = feature.getStrand() == Strand.POSITIVE ? "+" : "-";
        System.out.print(feature.getChr() + "\t" + str + "\t" + feature.getStart() + "\t" + feature.getEnd() + "\t");

        List<Exon> exons = feature.getExons();

        // Find coding start & end
        int cdStart = 0;
        int cdEnd = 0;
        for (Exon ex : exons) {
            if (ex.getCdStart() > ex.getStart()) {
                cdStart = ex.getCdStart();
                break;
            }
        }
        for (int i = 0; i < exons.size(); i++) {
            Exon ex = exons.get(i);
            if (ex.getCdEnd() < ex.getEnd()) {
                cdEnd = ex.getCdEnd();
                break;
            }
        }
        System.out.println(cdStart + "\t" + cdEnd);
        System.out.println(exons.size());

        for(Exon ex : exons) {
            int rs = ex.getReadingFrame();
            int fs = rs == 0 ? 0 : 3 - rs;
            System.out.println(ex.getStart() + "\t" + ex.getEnd() + "\t" + fs);
        }


        // Sequence
        int buffer = 10;
        for(int i=0; i<exons.size(); i++) {
            Exon ex = exons.get(i);
            // Buffer -- 50 bp by default but can be smaller

            int seqStart = ex.getStart() - buffer;

            int nextExonStart = (i < exons.size() - 1 ? exons.get(i+1).getStart() : Integer.MAX_VALUE);
            buffer = Math.min(50, (nextExonStart - ex.getEnd()) / 2);
            int seqEnd = ex.getEnd() + buffer;

            byte [] sequence = genome.getSequence(feature.getChr(), seqStart, seqEnd);
            String seqString = new String(sequence);
            System.out.println(seqStart + "\t" + seqEnd + "\t" + seqString);
        }

    }


    private static final Comparator<Feature> FEATURE_CONTAINS_COMPARATOR = new Comparator<Feature>() {
        public int compare(Feature o1, Feature o2) {
            int genomeStart2 = o2.getStart();
            int genomeStart1 = o1.getEnd();
            if (genomeStart2 >= genomeStart1 && o2.getEnd() <= o1.getEnd()) {
                return 0;
            } else {
                return genomeStart1 - genomeStart2;
            }
        }
    };

    public static final Comparator<Feature> FEATURE_START_COMPARATOR = new Comparator<Feature>() {
        public int compare(Feature o1, Feature o2) {
            return o1.getStart() - o2.getStart();
        }
    };
}
TOP

Related Classes of org.broad.igv.feature.FeatureUtils

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.