Package org.broad.igv.ui.action

Source Code of org.broad.igv.ui.action.SearchCommand$SearchResult

/*
* Copyright (c) 2007-2012 The Broad Institute, Inc.
* SOFTWARE COPYRIGHT NOTICE
* This software and its documentation are the copyright of the Broad Institute, Inc. All rights are reserved.
*
* This software is supplied without any warranty or guaranteed support whatsoever. The Broad Institute is not responsible for its use, misuse, or functionality.
*
* This software is licensed under the terms of the GNU Lesser General Public License (LGPL),
* Version 2.1 which is available at http://www.opensource.org/licenses/lgpl-2.1.php.
*/


package org.broad.igv.ui.action;

//~--- non-JDK imports --------------------------------------------------------

import org.apache.log4j.Logger;
import org.broad.igv.Globals;
import org.broad.igv.PreferenceManager;
import org.broad.igv.annotations.ForTesting;
import org.broad.igv.dev.api.NamedFeatureSearcher;
import org.broad.igv.feature.*;
import org.broad.igv.feature.genome.Genome;
import org.broad.igv.feature.genome.GenomeManager;
import org.broad.igv.lists.GeneList;
import org.broad.igv.ui.IGV;
import org.broad.igv.ui.event.ViewChange;
import org.broad.igv.ui.panel.ReferenceFrame;
import org.broad.igv.ui.util.MessageUtils;
import htsjdk.tribble.Feature;

import javax.swing.*;
import java.awt.*;
import java.awt.event.MouseAdapter;
import java.awt.event.MouseEvent;
import java.util.*;
import java.util.List;

/**
* A class for performing search actions.  The class takes a view context and
* search string as parameters.   The search string can be either
* (a) a feature (e.g. gene),  or
* (b) a locus string in the UCSC form,  e.g. chr1:100,000-200,000
* <p/>
* Note:  Currently the only recognized features are genes
* <p/>
* Custom searchers can be registered, see {@link #registerNamedFeatureSearcher(org.broad.igv.dev.api.NamedFeatureSearcher)}
*
* @author jrobinso
*/
public class SearchCommand {

    private static Logger log = Logger.getLogger(SearchCommand.class);
    public static int SEARCH_LIMIT = 20;
    private boolean askUser = false;

    String searchString;
    ReferenceFrame referenceFrame;
    boolean recordHistory = true;
    Genome genome;


    private static Set<NamedFeatureSearcher> nameSearchers;

    private static HashMap<ResultType, String> tokenMatchers;

    static {
        resetNamedFeatureSearchers();

        //Regexp for a number with commas in it (no periods)
        String num_withcommas = "(((\\d)+,?)+)";

        //chromosome can include anything except whitespace
        String chromo_string = "(\\S)+";

        String chromo = chromo_string;
        //This will match chr1:1-100, chr1:1, chr1  1, chr1 1   100
        String chromo_range = chromo_string + "(:|(\\s)+)" + num_withcommas + "(-|(\\s)+)?" + num_withcommas + "?(\\s)*";

        //Simple feature
        String feature = chromo_string;
        //Amino acid mutation notation. e.g. KRAS:G12C. * is stop codon
        String featureMutAA = chromo_string + ":[A-Z,a-z,*]" + num_withcommas + "[A-Z,a-z,*]";

        //Nucleotide mutation notation. e.g. KRAS:123A>T
        String nts = "[A,C,G,T,a,c,g,t]";
        String featureMutNT = chromo_string + ":" + num_withcommas + nts + "\\>" + nts;

        tokenMatchers = new HashMap<ResultType, String>();
        tokenMatchers.put(ResultType.CHROMOSOME, chromo);
        tokenMatchers.put(ResultType.FEATURE, feature);
        tokenMatchers.put(ResultType.LOCUS, chromo_range);
        tokenMatchers.put(ResultType.FEATURE_MUT_AA, featureMutAA);
        tokenMatchers.put(ResultType.FEATURE_MUT_NT, featureMutNT);
    }


    public SearchCommand(ReferenceFrame referenceFrame, String searchString) {
        this(referenceFrame, searchString, GenomeManager.getInstance().getCurrentGenome());
    }

    public SearchCommand(ReferenceFrame referenceFrame, String searchString, boolean recordHistory) {
        this(referenceFrame, searchString);
        this.recordHistory = recordHistory;
    }

    SearchCommand(ReferenceFrame referenceFrame, String searchString, Genome genome) {
        this.referenceFrame = referenceFrame;
        this.searchString = searchString.trim();
        this.genome = genome;
    }


    public void execute() {

        if (log.isDebugEnabled()) {
            log.debug("Run search: " + searchString);
        }

        List<SearchResult> results = runSearch(searchString);
        if (askUser) {
            results = askUserFeature(results);
            if (results == null) {
                if (log.isDebugEnabled()) {
                    log.debug("Multiple results, show cancelled: " + searchString);
                }
                return;
            }
        }

        showSearchResult(results);

        if (log.isDebugEnabled()) {
            log.debug("End search: " + searchString);
        }
    }

    /**
     * Given a string, search for the appropriate data to show the user.
     * Different syntaxes are accepted.
     * <p/>
     * In general, whitespace delimited tokens are treated separately and each are shown.
     * There is 1 exception to this. A locus of form chr1   1   10000 will be treated the same
     * as chr1:1-10000. Only one entry of this form can be entered, chr1    1   10000 chr2:1-1000 will
     * not be recognized.
     *
     * @param searchString Feature name (EGFR), chromosome (chr1), or locus string (chr1:1-100 or chr1:6)
     *                     Partial matches to a feature name (EG) will return multiple results, and
     *                     ask the user which they want.
     * @return result
     *         List<SearchResult> describing the results of the search. Will never
     *         be null, field type will equal ResultType.ERROR if something went wrong.
     */
    public List<SearchResult> runSearch(String searchString) {

        List<SearchResult> results = new ArrayList<SearchResult>();

        searchString = searchString.replace("\"", "");

        Set<ResultType> wholeStringType = checkTokenType(searchString);
        if (wholeStringType.contains(ResultType.LOCUS)) {
            results.add(calcChromoLocus(searchString));
            return results;
        }

        // Space delimited?
        String[] tokens = searchString.split("\\s+");
        for (String s : tokens) {
            results.addAll(parseToken(s));
        }

        if (results.size() == 0) {
            SearchResult result = new SearchResult();
            result.setMessage("Invalid Search String: " + searchString);
            results.add(result);
        }

        return results;
    }

    public void showSearchResult(List<SearchResult> results) {
        int origZoom = referenceFrame.getZoom();
        if (results == null || results.size() == 0) {
            results = new ArrayList<SearchResult>();

            results.add(new SearchResult());
        }

        boolean showMessage = false;
        boolean success = true;
        String message = "Invalid search string: " + searchString;

        if (results.size() == 1) {
            SearchResult result = results.get(0);
            if (result.type != ResultType.ERROR) {//FrameManager.isGeneListMode()) {
                IGV.getInstance().getSession().setCurrentGeneList(null);
            }

            switch (result.type) {
                case FEATURE:
                    showFlankedRegion(result.chr, result.start, result.end);
                    break;
                case LOCUS:
                    referenceFrame.jumpTo(result.chr, result.start, result.end);
                    break;
                case CHROMOSOME:
                    referenceFrame.getEventBus().post(new ViewChange.ChromosomeChangeCause(this, result.chr));
                    referenceFrame.getEventBus().post(new ViewChange.ZoomCause(0));
                    break;
                case ERROR:
                default: {
                    message = "Cannot find feature or locus: " + searchString;
                    success = false;
                    showMessage = true;
                }
            }
        } else {
            List<String> loci = new ArrayList<String>(results.size());
            message = "";
            for (SearchResult res : results) {
                if (res.type != ResultType.ERROR) {
                    loci.add(res.getLocus());
                } else {
                    message = message + res.getMessage() + "\n";
                    showMessage = true;
                }
            }
            GeneList geneList = new GeneList("", loci, false);
            IGV.getInstance().getSession().setCurrentGeneList(geneList);
        }

        IGV.getInstance().resetFrames();


        if (success && recordHistory) {
            IGV.getInstance().getSession().getHistory().push(searchString, origZoom);
        }
        if (showMessage) {
            MessageUtils.showMessage(message);
        }

    }

    /**
     * Get a list of strings of feature names suitable for display, containing only
     * those search results which were not an error
     *
     * @param results
     * @param longName Whether to use the long (true) or short (false)
     *                 of search results.
     * @return Array of strings of results found.
     */
    public static Object[] getSelectionList(List<SearchResult> results, boolean longName) {
        ArrayList<String> options = new ArrayList<String>(Math.min(results.size(), SEARCH_LIMIT));
        for (SearchResult result : results) {
            if (result.type == ResultType.ERROR) {
                continue;
            }
            if (longName) {
                options.add(result.getLongName());
            } else
                options.add(result.getShortName());
        }

        return options.toArray();
    }

    /**
     * Display a dialog asking user which search result they want
     * to display. Number of results are limited to SEARCH_LIMIT.
     * The user can select multiple options, in which case all
     * are displayed.
     *
     * @param results
     * @return SearchResults which the user has selected.
     *         Will be null if cancelled
     */
    private List<SearchResult> askUserFeature(List<SearchResult> results) {

        Object[] list = getSelectionList(results, true);
        JList ls = new JList(list);
        ls.setSelectionMode(ListSelectionModel.SINGLE_SELECTION);
        //ls.setSelectionMode(ListSelectionModel.MULTIPLE_INTERVAL_SELECTION);

        final JOptionPane pane = new JOptionPane(ls, JOptionPane.PLAIN_MESSAGE, JOptionPane.OK_CANCEL_OPTION);
        final Dialog dialog = pane.createDialog("Features");
        dialog.setModalityType(Dialog.ModalityType.APPLICATION_MODAL);

        //On double click, show that option
        ls.addMouseListener(new MouseAdapter() {
            @Override
            public void mouseClicked(MouseEvent e) {
                if (e.getClickCount() >= 2) {
                    dialog.setVisible(false);
                    pane.setValue(JOptionPane.OK_OPTION);
                    dialog.dispose();
                }
            }
        });

        dialog.setVisible(true);

        int resp = (Integer) pane.getValue();

        List<SearchResult> val = null;
        if (resp == JOptionPane.OK_OPTION) {
            int[] selected = ls.getSelectedIndices();
            val = new ArrayList<SearchResult>(selected.length);
            for (int ii = 0; ii < selected.length; ii++) {
                val.add(ii, results.get(selected[ii]));
            }
        }
        return val;

    }

    /**
     * Check token type using regex.
     * Intended to be inclusive, returns all possible matches
     *
     * @param token
     * @return
     */
    Set<ResultType> checkTokenType(String token) {
        token = token.trim();


        Set<ResultType> possibles = new HashSet<ResultType>();
        for (ResultType type : tokenMatchers.keySet()) {
            if (token.matches(tokenMatchers.get(type))) { //note: entire string must match
                possibles.add(type);
            }
        }

        return possibles;
    }

    /**
     * Determine searchResult for white-space delimited search query.
     *
     * @param token
     * @return searchResult
     */
    private List<SearchResult> parseToken(String token) {

        List<SearchResult> results = new ArrayList<SearchResult>();
        List<NamedFeature> features;

        //Guess at token type via regex.
        //We don't assume success
        Set<ResultType> types = checkTokenType(token);
        SearchResult result;
        if (types.contains(ResultType.LOCUS) || types.contains(ResultType.CHROMOSOME)) {
            //Check if a full or partial locus string
            result = calcChromoLocus(token);
            if (result.type != ResultType.ERROR) {
                results.add(result);
                return results;
            }
        }

        //2 possible mutation notations, either amino acid (A123B) or nucleotide (123G>C)
        if (types.contains(ResultType.FEATURE_MUT_AA) || types.contains(ResultType.FEATURE_MUT_NT)) {
            //We know it has the right form, but may
            //not be valid feature name or mutation
            //which exists.
            String[] items = token.toUpperCase().split(":");
            String name = items[0].trim().toUpperCase();
            String coords = items[1];
            int coordLength = coords.length();

            Map<Integer, BasicFeature> genomePosList;

            //Should never match both mutation notations
            if (types.contains(ResultType.FEATURE_MUT_AA)) {
                String refSymbol = coords.substring(0, 1);
                String mutSymbol = coords.substring(coordLength - 1);

                String strLoc = coords.substring(1, coordLength - 1);
                int location = Integer.parseInt(strLoc) - 1;

                genomePosList = FeatureDB.getMutationAA(name, location + 1, refSymbol, mutSymbol, genome);
            } else if (types.contains(ResultType.FEATURE_MUT_NT)) {
                //Exclude the "A>T" at end
                String strLoc = coords.substring(0, coordLength - 3);
                String refSymbol = coords.substring(coordLength - 3, coordLength - 2);
                int location = Integer.parseInt(strLoc) - 1;
                genomePosList = FeatureDB.getMutationNT(name, location + 1, refSymbol, genome);
            } else {
                //This should never happen
                throw new IllegalArgumentException("Something went wrong parsing input token");
            }
            askUser |= genomePosList.size() >= 2;

            for (int genomePos : genomePosList.keySet()) {
                Feature feat = genomePosList.get(genomePos);
                //Zoom in on mutation of interest
                //The +2 accounts for centering on the center of the amino acid, not beginning
                //and converting from 0-based to 1-based (which getStartEnd expects)
                int[] locs = getStartEnd("" + (genomePos + 2));
                result = new SearchResult(ResultType.LOCUS, feat.getChr(), locs[0], locs[1]);
                results.add(result);

            }
            return results;
        }

        if (types.contains(ResultType.FEATURE)) {
            //Check if we have an exact name for the feature name
            NamedFeature feat = FeatureDB.getFeature(token.toUpperCase().trim());
            if (feat != null) {
                results.add(new SearchResult(feat));
                return results;
            }

            //Check inexact match
            //We will later want to ask the user which of these to keep
            features = comprehensiveFeatureSearch(token);
            if (features.size() > 0) {
                askUser |= features.size() >= 2;
                return getResults(features);
            }
        }

        result = new SearchResult();
        result.setMessage("Invalid token: " + token);
        results.add(result);
        return results;

    }

    /**
     * Register a {@link org.broad.igv.dev.api.NamedFeatureSearcher} to be used when searching for features,
     * such as when the user enters text in the box.
     * This is idempotent, registering the same searcher multiple times is the same as
     * adding it once.
     *
     * @param searcher
     * @return Whether the searcher was added
     * @api
     */
    public static boolean registerNamedFeatureSearcher(NamedFeatureSearcher searcher) {
        return nameSearchers.add(searcher);
    }

    /**
     * @param searcher
     * @return Whether the searcher was removed
     * @api
     */
    public static boolean unregisterNamedFeatureSearcher(NamedFeatureSearcher searcher) {
        return nameSearchers.remove(searcher);
    }

    static void resetNamedFeatureSearchers() {
        nameSearchers = new LinkedHashSet<NamedFeatureSearcher>();
        registerNamedFeatureSearcher(new InexactLoadedFeatureSearcher());
    }

    /**
     * Search all known sources for features with the provided name.
     * This means our own database which gets updated when files are loaded,
     * as well as others (possibly plugins)
     *
     * @param searchString
     * @return
     */
    private List<NamedFeature> comprehensiveFeatureSearch(String searchString) {
        List<NamedFeature> features = new ArrayList<NamedFeature>();
        for (NamedFeatureSearcher searcher : nameSearchers) {
            Collection<? extends NamedFeature> tmp = searcher.search(searchString, SEARCH_LIMIT);
            if (tmp == null) {
                log.warn("Error searching with " + searcher);
            } else {
                features.addAll(tmp);
            }
        }
        return features;
    }

    /**
     * Parse a string of locus coordinates.
     * Can have whitespace delimiters, and be missing second coordinate,
     * but must have 1st coordinate.
     *
     * @param searchString
     * @return
     */
    private SearchResult calcChromoLocus(String searchString) {
        /*
        chromosome can have whitespace or : delimiter
        chromosome also might have : in the name
         */
        int[] startEnd = null;
        String[] tokens = searchString.split("\\s+");

        String chr = tokens[0];
        boolean whitespace_delim = tokens.length >= 2;
        if (whitespace_delim) {
            String posString = tokens[1];
            if (tokens.length >= 3) {
                posString += "-" + tokens[2];
            }
            startEnd = getStartEnd(posString);
        } else {
            //Not whitespace delimited
            //Could be chromoname:1-100, chromoname:1, chromoname

            int colonIdx = searchString.lastIndexOf(":");
            if (colonIdx > 0) {
                chr = searchString.substring(0, colonIdx);
                String posString = searchString.substring(colonIdx).replace(":", "");
                startEnd = getStartEnd(posString);
                //This MAY for case of chromoname having semicolon in it
                if (startEnd == null) {
                    chr = searchString;
                }
            }
        }

        //startEnd will have coordinates if found.
        chr = genome.getChromosomeAlias(chr);
        Chromosome chromosome = genome.getChromosome(chr);
        //If we couldn't find chromosome, check
        //whole string
        if (chromosome == null) {
            chr = genome.getChromosomeAlias(tokens[0]);
            chromosome = genome.getChromosome(chr);
            if (chromosome != null) {
                //Found chromosome
                startEnd = null;
            }
        }

        if (chromosome != null && !searchString.equals(Globals.CHR_ALL)) {
            if (startEnd != null) {
                return new SearchResult(ResultType.LOCUS, chr, startEnd[0], startEnd[1]);
            }
            return new SearchResult(ResultType.CHROMOSOME, chr, 0, chromosome.getLength() - 1);
        }
        return new SearchResult(ResultType.ERROR, chr, -1, -1);
    }

    private void showFlankedRegion(String chr, int start, int end) {
        int flankingRegion = PreferenceManager.getInstance().getAsInt(PreferenceManager.FLANKING_REGION);
        int delta;
        if((end - start) == 1) {
            delta = 20; // Don't show flanking region for single base jumps, use 40bp window
        }
        else if (flankingRegion < 0) {
            delta = (-flankingRegion * (end - start)) / 100;
        } else {
            delta = flankingRegion;
        }
        start = Math.max(0, start - delta);
        end = end + delta;

        if (PreferenceManager.getInstance().getAsBoolean(PreferenceManager.SEARCH_ZOOM)) {
            referenceFrame.jumpTo(chr, start, end);
        } else {
            int center = (start + end) / 2;
            referenceFrame.centerOnLocation(chr, center);
        }
    }

    /**
     * Return the start and end positions as a 2 element array for the input
     * position string.  UCSC conventions  are followed for coordinates,
     * specifically the internal representation is "zero" based (first base is
     * numbered 0) and end-exclusive, but the display representation is "one" based (first base is
     * numbered 1) and end-inclusive.   Consequently 1 is subtracted from the parsed positions
     */
    private static int[] getStartEnd(String posString) {
        try {
            String[] posTokens = posString.split("-");
            String startString = posTokens[0].replaceAll(",", "");
            int start = Math.max(0, Integer.parseInt(startString)) - 1;

            // Default value for end
            int end = start + 1;
            if (posTokens.length > 1) {
                String endString = posTokens[1].replaceAll(",", "");
                end = Integer.parseInt(endString);
            }

            if (posTokens.length == 1 || (end - start) < 10) {
                int center = (start + end) / 2;
                int widen = 20;
                start = center - widen;
                start = Math.max(0, start);
                end = center + widen + 1;
            }

            return new int[]{Math.min(start, end), Math.max(start, end)};
        } catch (NumberFormatException numberFormatException) {
            return null;
        }

    }

    public enum ResultType {
        FEATURE,
        FEATURE_MUT_AA,
        FEATURE_MUT_NT,
        LOCUS,
        CHROMOSOME,
        ERROR
    }

    private static class InexactLoadedFeatureSearcher implements NamedFeatureSearcher {
        @Override
        public Collection<NamedFeature> search(String name, int limit) {
            return FeatureDB.getFeaturesList(name, limit);
        }
    }

    /*
    Container class for search results
     */
    public static class SearchResult {
        String chr;
        private int start;
        private int end;
        ResultType type;

        private String locus;
        private String message;
        private NamedFeature feature;
        private String coords;

        public SearchResult() {
            this(ResultType.ERROR, null, -1, -1);
        }

        public SearchResult(ResultType type, String chr, int start, int end) {
            this.type = type;
            this.chr = chr;
            this.start = start;
            this.end = end;
            this.coords = Locus.getFormattedLocusString(chr, start + 1, end);
            this.locus = this.coords;
        }

        public SearchResult(NamedFeature feature) {
            this(ResultType.FEATURE, feature.getChr(), feature.getStart(), feature.getEnd());
            this.feature = feature;
            this.locus = this.feature.getName();
        }

        void setMessage(String message) {
            this.message = message;
        }

        public String getMessage() {
            return this.message;
        }

        /**
         * Always a coordinate string.
         * eg chr1:1-100
         *
         * @return
         */
        private String getCoordinates() {
            return this.coords;
        }

        /**
         * Either a feature name, or coordinates
         *
         * @return
         */
        String getLocus() {
            return this.locus;
        }

        String getShortName() {
            if (this.type == ResultType.FEATURE) {
                return this.feature.getName();
            } else {
                return this.getLocus();
            }
        }

        /**
         * Format for display. If a feature,
         * Featurename (chromosome:start-end)
         * eg EGFR (chr7:55,054,218-55,242,525)
         * <p/>
         * Otherwise, just locus
         *
         * @return
         */
        String getLongName() {
            if (this.type == ResultType.FEATURE) {
                return feature.getName() + " (" + this.getCoordinates() + ")";
            } else {
                return this.getLocus();
            }
        }

        public ResultType getType() {
            return type;
        }

        public String getChr() {
            return chr;
        }

        public int getStart() {
            return start;
        }

        public int getEnd() {
            return end;
        }

        //May be null
        @ForTesting
        public NamedFeature getFeature() {
            return feature;
        }
    }

    /**
     * Get a list of search results from the provided objects,
     * which must be NamedFeature objects.
     *
     * @param objects
     * @return
     */
    public static List<SearchResult> getResults(List<NamedFeature> objects) {
        List<SearchResult> results = new ArrayList<SearchResult>(objects.size());
        for (NamedFeature f : objects) {
            results.add(new SearchCommand.SearchResult(f));
        }
        return results;
    }


}
TOP

Related Classes of org.broad.igv.ui.action.SearchCommand$SearchResult

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.