Package gem

Source Code of gem.CrossPlatformMapper

package gem;

import gem.parser.TabDelimitedFileParser;
import gem.util.FileUtil;
import gem.util.Summary;

import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.util.*;

/**
* @author Ozgun Babur
*/
public class CrossPlatformMapper implements Constants
{
  public static Map<String, Gene> fetchGenes(Collection<String> ids, String filename) throws Throwable
  {
    Map<String, double[]> sourceMap = readCrossPlatExp(filename, null);
    Map<String, List<Gene>> geneMap = new HashMap<String, List<Gene>>();

    for (String id : ids)
    {
      String sym = Triplet.getGeneToSymbolMap().get(id);
      if (Triplet.getSymbolToGeneMap().containsKey(id))
      {
        sym = id;
        id = Triplet.getSymbolToGeneMap().get(id);
      }

      if (sourceMap.containsKey(id) || (sym != null && sourceMap.containsKey(sym)))
      {
        if (!geneMap.containsKey(id)) geneMap.put(id, new ArrayList<Gene>());

        double[] val = sourceMap.get(id);
        if (val == null) val = sourceMap.get(sym);
        if (sym == null) sym = id;
        Gene gene = new Gene(id, sym, val.length);
        gene.value = val;
        geneMap.get(id).add(gene);
      }
    }

    Map<String, Gene> resultMap = new HashMap<String, Gene>();

    for (List<Gene> geneList : geneMap.values())
    {
//      double maxvar = -1;
//      Gene maxGene = null;
//
//      for (Gene gene : geneList)
//      {
//        double var = gene.calcVariance();
//        if (var > maxvar)
//        {
//          maxvar = var;
//          maxGene = gene;
//        }
//      }
//      assert maxGene != null;
//
//      resultMap.put(maxGene.geneid, maxGene);

//      Gene gene = getAverage(geneList);
      assert geneList.size() == 1; // so below getmax code is unfunctional
      Gene gene = getMax(geneList);
      resultMap.put(gene.geneid, gene);
    }

    return resultMap;
  }

  public static Map<String, Gene> fetchGenes(Set<String> ids, String filename, String platform) throws Throwable
  {
    Map<String, List<double[]>> id2vals = readCross(platform, filename, ids);
    Map<String, double[]> id2val = selectMostVaried(id2vals);
    Map<String, Gene> id2gene = convertValuesToGenes(id2val);
    return id2gene;
  }

  private static Gene getAverage(List<Gene> genes)
  {
    assert !genes.isEmpty();
    if (genes.size() == 1) return genes.get(0);
    Gene first = genes.get(0);

    Gene gene = new Gene(first.geneid, ".|.", first.value.length);

    for (int i = 0; i < gene.value.length; i++)
    {
      double avg = 0;
      for (Gene g : genes)
      {
        avg += g.value[i];
      }
      avg /= genes.size();
      gene.value[i] = avg;
    }
    return gene;
  }

  private static Gene getMax(List<Gene> genes)
  {
    assert !genes.isEmpty();
    if (genes.size() == 1) return genes.get(0);

    double max = -1e10;
    Gene maxG = null;

    for (Gene gene : genes)
    {
      double sum = 0;
      for (int i = 0; i < gene.value.length; i++)
      {
        sum += gene.value[i];
      }
      if (sum > max)
      {
        max = sum;
        maxG = gene;
      }
    }

    return maxG;
  }

  public static Map<String, double[]> readCrossPlatExp(String filename, Set<String> ids) throws Throwable
  {
    // Optimized for niki's data. Should be updated for other formats.

    Map<String, List<double[]>> map = new HashMap<String, List<double[]>>();
    BufferedReader reader = new BufferedReader(new FileReader(filename));

    for (String line = reader.readLine(); line != null; line = reader.readLine())
    {
      if (line.startsWith("#") || line.startsWith("!") || line.startsWith("^") ||
        line.length() == 0 || line.startsWith("\"G") ||
        (line.startsWith("\"I") && !line.startsWith("\"ILMN")) ||
        line.startsWith("\"A") || line.startsWith("ID")) continue;

      String id = line.substring(0, line.indexOf("\t"));
      if (id.startsWith("\"")) id = id.substring(1, id.lastIndexOf("\""));

      if (ids != null && !ids.contains(id)) continue;

      if (line.startsWith("\""))
      {
        line = line.substring(line.indexOf("\"\t") + 2);
      }
      else
      {
        line = line.substring(line.indexOf("\t") + 1);
      }

      if (line.startsWith("\"")) line = line.substring(1, line.lastIndexOf("\""));

      // I am not handling missing values anymore
//      if (line.endsWith("\t")) line = line + "null";
     
      String[] tokens = line.split("\t");

      double[] val = new double[tokens.length];

      for (int i = 0; i < tokens.length; i++)
      {
        if (tokens[i].equals("null") || tokens[i].equals(""))
        {
          val[i] = Double.NaN;
        }
        else
        {
          val[i] = Double.parseDouble(tokens[i]);
        }
      }

      if (!map.containsKey(id)) map.put(id, new ArrayList<double[]>());
      map.get(id).add(val);
    }
    reader.close();

    Map<String, double[]> result = new HashMap<String,double[]>();

    for (String id : map.keySet())
    {
      result.put(id, chooseOne(map.get(id)));
    }

    return result;
  }

  private static double[] chooseOne(List<double[]> list)
  {
    double max = -1e10;
    double[] chosen = null;

    for (double[] v : list)
    {
      double var = Summary.variance(v);
      if (var > max)
      {
        max = var;
        chosen = v;
      }
    }
    return chosen;
  }

  public static void associateAndClean(List<Triplet> trips, String filename, String platform) throws Throwable
  {
    Set<String> genes = getIDs(trips);
    Map<String, List<double[]>> id2vals = readCross(platform, filename, genes);
    Map<String, double[]> id2val = selectMostVaried(id2vals);
    Map<String, Gene> id2gene = convertValuesToGenes(id2val);

    if (!id2gene.containsKey("367"))
    {
      Gene ar = new Gene("367", "AR", id2val.values().iterator().next().length);
      id2gene.put("367", ar);
    }

    genesMap = new HashMap<String, Gene>();
    associate(trips, id2gene);
  }

  private static Map<String, double[]> selectFirst(Map<String, List<double[]>> id2vals)
  {
    Map<String, double[]> map = new HashMap<String,double[]>();

    for (String key : id2vals.keySet())
    {
      map.put(key, id2vals.get(key).iterator().next());
    }
    return map;
  }

  private static Map<String, double[]> selectMostVaried(Map<String, List<double[]>> id2vals)
  {
    Map<String, double[]> map = new HashMap<String,double[]>();

    for (String key : id2vals.keySet())
    {
      double max = 0;
      double[] v = null;
      for (double[] vals : id2vals.get(key))
      {
        double var = Summary.variance(vals);
        if (var > max)
        {
          max = var;
          v = vals;
        }
      }
      if (v != null) map.put(key, v);
    }
    return map;
  }

  public static void associateAndClean(List<Triplet> trips, String filename) throws Throwable
  {
    Set<String> genes = getIDs(trips);
    Map<String, Gene> map = fetchGenes(genes, filename);

    genesMap = new HashMap<String, Gene>();
    associate(trips, map);
  }

  private static void associate(List<Triplet> trips, Map<String, Gene> map)
  {
    Iterator<Triplet> iter = trips.iterator();
    while (iter.hasNext())
    {
      Triplet t = iter.next();

//      t.M = copy(map.get(t.modulator), t.mod_id);
//      t.F = copy(map.get(t.factor), t.fac_id);
//      t.T = copy(map.get(t.target), t.tar_id);
      t.M = map.get(t.modulator);
      t.F = map.get(t.factor);
      t.T = map.get(t.target);

      if (t.M == null || t.F == null || t.T == null) iter.remove();
    }
  }

  private static Set<String> getIDs(List<Triplet> trips)
  {
    Set<String> genes = new HashSet<String>();
    for (Triplet t : trips)
    {
      t.backFromURLToIDs();
      genes.add(t.modulator);
      genes.add(t.factor);
      genes.add(t.target);
    }
    return genes;
  }

  static Map<String, Gene> genesMap;

  static Gene copy(Gene g, String id)
  {
    if (g == null) return null;

    if (genesMap.containsKey(id)) return genesMap.get(id);

    Gene gene = new Gene(g.geneid, id, g.value.length);
    System.arraycopy(g.value, 0, gene.value, 0, g.value.length);
    genesMap.put(id, gene);
    return gene;
  }

  public static Map<String, List<double[]>> readCross(String platfile, String datafile,
    Set<String> egIDs) throws Throwable
  {
    TabDelimitedFileParser parser = new TabDelimitedFileParser(platfile);

    String[] egCols = new String[]{"ENTREZ_GENE_ID", "Entrez_Gene_ID", "GENE_SYMBOL", "GENE"};
    String egCol = null;
    for (String col : egCols)
    {
      if (FileUtil.columnsLineContains(platfile, col))
      {
        egCol = col;
        break;
      }
    }

    Map<String, Set<String>> eg2id = parser.getOneToManyMap(egCol, "ID");

    Map<String, String> g2s = Triplet.getGeneToSymbolMap();
    Set<String> idSet = new HashSet<String>();
    for (String eg : egIDs)
    {
      if (g2s.containsKey(eg) && eg2id.containsKey(g2s.get(eg)))
      {
        eg = g2s.get(eg);
      }
      if (eg2id.containsKey(eg)) idSet.addAll(eg2id.get(eg));
    }

    Map<String, double[]> id2val = readCrossPlatExp(datafile, idSet);

    Map<String, List<double[]>> eg2vals = new HashMap<String, List<double[]>>();

    for (String eg : egIDs)
    {
      String egg = eg;
      if (g2s.containsKey(eg) && eg2id.containsKey(g2s.get(eg)))
      {
        egg = g2s.get(eg);
      }
      if (eg2id.containsKey(egg))
      {
        for (String id : eg2id.get(egg))
        {
          if (id2val.containsKey(id))
          {
            if (!eg2vals.containsKey(eg)) eg2vals.put(eg, new ArrayList<double[]>());
            eg2vals.get(eg).add(id2val.get(id));
          }
        }
      }
    }
    return eg2vals;
  }

  public static void associateWithGSE6919(List<Triplet> trips, String platform) throws Throwable
  {
    Set<String> genes = getIDs(trips);

    Map<String, double[]> id2val = readGSE6919_exps(platform, genes);

    Map<String, Gene> id2gene = convertValuesToGenes(id2val);

    Gene ar = new Gene("367", "367|367", id2val.values().iterator().next().length);
    id2gene.put("367", ar);

    Iterator<Triplet> iter = trips.iterator();
    while (iter.hasNext())
    {
      Triplet t = iter.next();

      t.M = id2gene.get(t.modulator);
      t.F = id2gene.get(t.factor);
      t.T = id2gene.get(t.target);

      if (t.M == null || t.F == null || t.T == null) iter.remove();
    }
  }

  private static Map<String, Gene> convertValuesToGenes(Map<String, double[]> id2val)
  {
    Map<String, Gene> id2gene = new HashMap<String, Gene>();

    for (String id : id2val.keySet())
    {
      double[] val = id2val.get(id);
      String sym = Triplet.getGeneToSymbolMap().get(id);
      if (sym == null) sym = id;
      Gene gene = new Gene(id, sym, val.length);
      id2gene.put(id, gene);
      System.arraycopy(val, 0, gene.value, 0, val.length);
    }
    return id2gene;
  }

  public static Map<String, double[]> readGSE6919_exps(String platform, Set<String> ids) throws Throwable
  {
    Map<String, List<double[]>> m = readCross("resource/expdata/GSE6919/" + platform + ".txt",
      "resource/expdata/GSE6919/GSE6919-" + platform + "_series_matrix.txt", ids);

    for (String id : m.keySet())
    {
      for (double[] v : m.get(id))
      {
        for (int i = 0; i < v.length; i++)
        {
          v[i] = Math.log(v[i]);
        }
      }
    }

    boolean[] posN = getPosInGSE6919(platform, true);
    boolean[] posC = getPosInGSE6919(platform, false);

    Map<String, double[]> map = selectMostChanged(m, posN, posC);
    return map;
  }

  public static Map<String, double[]> selectMostChanged(Map<String, List<double[]>> map,
    boolean[] posNormal, boolean[] posCancer)
  {
    Map<String, double[]> mc = new HashMap<String,double[]>();

    for (String id : map.keySet())
    {
      double[] most = null;
      double max = -1;

      for (double[] v : map.get(id))
      {
        Gene g = new Gene("", "", 0);
        g.value = v;
        double ch = Math.abs(CellTypeMatcher.getChangeBetweenTissues(
          g, posNormal, posCancer) - .5);

        if (ch > max)
        {
          max = ch;
          most = v;
        }
      }

      assert most != null;

      mc.put(id, most);
    }
    return mc;
  }

  public static List<String> getGSE6919_names(String platform) throws IOException
  {
    List<String> names = null;
    BufferedReader reader = new BufferedReader(new FileReader("resource/expdata/GSE6919/GSE6919-" + platform + "_series_matrix.txt"));

    for (String line = reader.readLine(); line != null; line = reader.readLine())
    {
      if (line.startsWith("!") || line.length() == 0) continue;

      if (line.startsWith("\"ID"))
      {
        String[] tokens = line.substring(line.indexOf("\"\t") + 2).split("\t");
        names = new ArrayList<String>(tokens.length);
        for (String token : tokens)
        {
          names.add(token.substring(1, token.length() - 1));
        }

        break;
      }
    }

    reader.close();
    return names;
  }

  public static Set<String> getGSE6919Normal()
  {
    return getGSE6919Select("Normal", "free");
  }

  public static Set<String> getGSE6919Cancer()
  {
    return getGSE6919Select("umor samples");
  }

  public static Set<String> getGSE6919Select(String... words)
  {
    Set<String> set = new HashSet<String>();

    TabDelimitedFileParser parser = new TabDelimitedFileParser(
      "resource/expdata/GSE6919/exptypes.txt");

    Map<String, String> map = parser.getOneToOneMap("ID", "Description");

    for (String exp : map.keySet())
    {
      String desc = map.get(exp);

      boolean containsAll = true;
      for (String word : words)
      {
        if (!desc.contains(word))
        {
          containsAll = false;
          break;
        }
      }
      if (containsAll) set.add(exp);
    }
    return set;
  }

  public static boolean[] getPosInGSE6919(String platform, boolean normal) throws IOException
  {
    List<String> names = getGSE6919_names(platform);

    Set<String> select = normal ? getGSE6919Normal() : getGSE6919Cancer();

    boolean[] pos = new boolean[names.size()];

    for (int i = 0; i < pos.length; i++)
    {
      pos[i] = select.contains(names.get(i));
    }
    return pos;
  }
}
TOP

Related Classes of gem.CrossPlatformMapper

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.