Package gem

Source Code of gem.Gene$ComparableGene

package gem;

import java.io.*;
import java.util.*;

import com.sun.deploy.util.ArrayUtil;
import gem.util.ArrayUtils;
import gem.util.Progress;
import gem.util.Histogram;
import gem.parser.HGNCParser;

/**
* A Gene with its values in microarray experiments.
*
* @author Ozgun Babur
*         Date: Apr 17, 2008
*         Time: 5:45:33 PM
*/
public class Gene implements Constants, Cloneable, Comparable
{
  /**
   * Entrez Gene ID.
   */
  public String geneid;

  /**
   * GenBank and spot ID in the microarray.
   */
  public String id;

  /**
   * Discretized status array.
   */
  public int[] status;

  /**
   * Expression values array.
   */
  public double[] value;

  /**
   * Constructor with IDs and experiment dataset size.
   *
   * @param geneid
   * @param id
   * @param expSize
   */
  public Gene(String geneid, String id, int expSize)
  {
    this.geneid = geneid;
    this.id = id;
    this.value = new double[expSize];
  }

  /**
   * Constructor with gene ID and experiment values.
   *
   * @param geneid
   */
  public Gene(String geneid, double[] value)
  {
    this.geneid = geneid;
    this.id = Triplet.getGeneToSymbolMap().get(geneid);
    this.value = value;
  }

  /**
   * Constructor with the row from the data file.
   *
   * @param line
   */
  public Gene(String line)
  {
    this(line.split("\t"));
  }

  /**
   * Constructor with the data row.
   *
   * @param terms
   */
  public Gene(String[] terms)
  {
    this.geneid = terms[0];
    this.id = terms[1];

    this.value = new double[terms.length - 2];

    for (int i = 2; i < terms.length; i++)
    {
      String s = terms[i];

      String[] tuple = s.split(" ");

      if (tuple[1].equals("NaN"))
      {
        this.value[i-2] = Double.NaN;
      }
      else
      {
        this.value[i-2] = Double.parseDouble(tuple[1]);
      }
    }
  }

  /**
   * Constructor that loads expression data only on the specified indices.
   *
   * @param terms
   * @param inds
   */
  public Gene(String[] terms, int[] inds)
  {
    this.geneid = terms[0];
    this.id = terms[1];

    this.value = new double[inds.length];

    for (int i = 0; i < inds.length; i++)
    {
      String s = terms[inds[i]+2];

      String[] tuple = s.split(" ");

      if (tuple[1].equals("NaN"))
      {
        this.value[i] = Double.NaN;
      }
      else
      {
        this.value[i] = Double.parseDouble(tuple[1]);
      }
    }
  }

  protected Object clone() throws CloneNotSupportedException
  {
    Gene o = (Gene) super.clone();

    int size = this.value.length;
    o.value = new double[size];
    System.arraycopy(this.value,  0, o.value,  0, size);
    if (this.status != null)
    {
      o.status = new int[size];
      System.arraycopy(this.status,  0, o.status,  0, size);
    }

    return o;
  }

  /**
   * Gets GenBank ID of the gene.
   * @return
   */
  public String getGenBank()
  {
    return id.substring(0, id.indexOf("|"));
  }

  /**
   * Rank orders values, selects highest 1/ headRatio as high, and lowest 1 / headRatio as low.
   *
   * @param headRatio
   * @return
   */
  public double[] rankAdjustStatus(double headRatio)
  {
    return rankAdjustStatus(headRatio, 1 - headRatio);
  }

  /**
   * Rank orders values, selects highest 1/ headRatio as high, and lowest 1 / (1 - tailRatio) as
   * low.
   *
   * @param headRatio
   * @param tailRatio
   * @return threshold values [low, high]
   */
  public double[] rankAdjustStatus(double headRatio, double tailRatio)
  {
    if (status == null)
    {
      status = new int[getExpSize()];
    }

    double[] v = new double[value.length];
    System.arraycopy(value, 0, v, 0, v.length);

    Arrays.sort(v);

    double low = v[((int) Math.round(v.length * headRatio))];
    double high = v[((int) Math.round(v.length * tailRatio))];

    thrAdjustStatus(low, high);
   
    return new double[] {low, high};
  }

  public double[] rankAdjustStatus(double headRatio, double tailRatio, boolean[] ignore, int tc)
  {
    if (status == null)
    {
      status = new int[getExpSize()];
    }

    double[] v = new double[value.length - tc];
    int j = 0;
    for (int i = 0; i < value.length; i++)
    {
      if (!ignore[i]) v[j++] = value[i];
    }
    assert j == v.length;

    Arrays.sort(v);

    double low = v[((int) Math.round(v.length * headRatio))];
    double high = v[((int) Math.round(v.length * tailRatio))];

    thrAdjustStatus(low, high, ignore);

    return new double[] {low, high};
  }

  /**
   * Adjusts status values according to the given threshold values for the value array.
   *
   * @param low
   * @param high
   */
  public void thrAdjustStatus(double low, double high)
  {
    if (status == null)
    {
      status = new int[getExpSize()];
    }

    for (int i = 0; i < value.length; i++)
    {
      if (value[i] < low) status[i] = ABSENT;
      else if (value[i] > high) status[i] = PRESENT;
      else status[i] = MARGINAL;
    }
  }

  public void thrAdjustStatus(double low, double high, boolean[] ignore)
  {
    if (status == null)
    {
      status = new int[getExpSize()];
    }

    for (int i = 0; i < value.length; i++)
    {
      if (ignore[i]) status[i] = MARGINAL;
      else
      {
        if (value[i] < low) status[i] = ABSENT;
        else if (value[i] > high) status[i] = PRESENT;
        else status[i] = MARGINAL;
      }
    }
  }

  /**
   * Gets number of ABSENT and PRESENT in the status array.
   *
   * @return
   */
  public int[] getAPCount()
  {
    int[] cnt = new int[2];

    for (int i : status)
    {
      if (i == ABSENT) cnt[0]++;
      else if (i == PRESENT) cnt[1]++;
    }
    return cnt;
  }

  boolean presentAt(int index)
  {
    return status[index] == PRESENT;
  }

  public int[] getIndexesOfCondition(int condition)
  {
    return getIndexesOfCondition(status, condition);
  }

  /**
   * Calculates the presence ratio of this gene in the given tissue.
   *
   * @param pos position array of the tissue
   * @return presence ratio
   */
  public double calcPresenceOnTissue(boolean[] pos)
  {
    assert pos.length == status.length;

    int poscnt = 0;
    int tot = 0;
    for (int i = 0; i < pos.length; i++)
    {
      if (!pos[i]) continue;
      if (status[i] == PRESENT) poscnt++;
      else  if (status[i] == ABSENT) tot++;
    }

    tot += poscnt;
    return tot == 0 ? -1 : poscnt / (double) tot;
  }

  public int[] getStatusCounts(boolean[] pos)
  {
    int[] cnt = new int[3];

    for (int i = 0; i < status.length; i++)
    {
      if (!pos[i]) continue;

      cnt[status[i] == PRESENT ? 2 : status[i] == ABSENT ? 0 : 1] ++;
    }
    return cnt;
  }

  /**
   * This method is used for finding the F=1 subset in the specific tissue.
   * @param pos position array of the tissue
   * @return subset where F=1
   */
  public boolean[] getPresentSubset(boolean[] pos)
  {
    boolean[] p = new boolean[pos.length];

    for (int i = 0; i < p.length; i++)
    {
      p[i] = pos[i] && status[i] == PRESENT;
    }
    return p;
  }

  /**
   * Counts the occurance of specified value in the int array.
   *
   * @param ints
   * @param condition
   * @return
   */
  public static int countCondition(int[] ints, int condition)
  {
    int size = 0;
    for (int c : ints)
    {
      if (c == condition) size++;
    }
    return size;
  }

  /**
   * Gets the positions of the specified value in the int array.
   *
   * @param ints
   * @param condition
   * @return
   */
  public static int[] getIndexesOfCondition(int[] ints, int condition)
  {

    int[] indexes = new int[countCondition(ints, condition)];

    int j =0;
    for (int i = 0; i < ints.length; i++)
    {
      if (ints[i] == condition)
      {
        indexes[j++] = i;
      }
    }
    return indexes;
  }

  /**
   * Gene as a string for recording.
   *
   * @return
   */
  public String toString()
  {
    String s = geneid + "\t" + id;

    for (int i = 0; i< value.length; i++)
    {
      s += "\t" + (Double.isNaN(value[i]) ? "NaN" : fmt.format(value[i]));
    }

    return s;
  }

  public String getPrintable()
  {
    return Triplet.getGeneToSymbolMap().get(geneid) + "|" + id;
  }

  /**
   * Crops experiment data to the specified indexes.
   *
   * @param inds
   */
  public void cropExps(int[] inds)
  {
    double[] vals = new double[inds.length];

    for (int i = 0; i < inds.length; i++)
    {
      vals[i] = value[inds[i]];
    }

    value = vals;
  }

  /**
   * Crops experiment data to the specified indexes.
   *
   * @param pos
   */
  public void cropExps(boolean[] pos)
  {
    int[] inds = new int[ArrayUtils.countTrue(pos)];
    int j = 0;
    for (int i = 0; i < pos.length; i++)
    {
      if (pos[i]) inds[j++] = i;
    }
    cropExps(inds);
  }

  /**
   * Records gene set with expression values.
   *
   * @param set
   * @param filename
   */
  public static void writeGenes(Collection<Gene> set, String filename)
  {
    try
    {
      Progress p = new Progress(set.size());
      BufferedWriter writer = new BufferedWriter(new FileWriter(filename));

      for (Gene gene : set)
      {
        writer.write(gene + "\n");
        p.tick();
      }

      writer.close();
    }
    catch (IOException e)
    {
      e.printStackTrace();
    }
  }

  public static Map<String, List<Gene>> readGenes(String filename)
  {
    return readGenes(filename, null);
  }

  public static Map<String, List<Gene>> readGenes(Set<String> ids, double minvar, String... filename)
  {
    Map<String, List<Gene>> map = readGenes(filename[0], ids);

    for (int i = 1; i < filename.length; i++)
    {
      map = merge(map, readGenes(filename[i], ids), true);
    }

    Set<String> toRem = new HashSet<String>();
    for (String key : map.keySet())
    {
      List<Gene> list = map.get(key);
      Iterator<Gene> iter = list.iterator();

      while(iter.hasNext())
      {
        Gene gene = iter.next();

        if (gene.calcVariance() < minvar) iter.remove();
      }
      if (list.isEmpty()) toRem.add(key);
    }

    for (String key : toRem)
    {
      map.remove(key);
    }

    return map;
  }

  /**
   * Reads expression data.
   *
   * @param ids
   * @param minvar
   * @param filename
   * @return
   */
  public static Map<String, Gene> readGenesWithID(Set<String> ids, double minvar,
    String... filename)
  {
    Map<String, Gene> map = readGenesWithID(filename[0], ids);

    for (int i = 1; i < filename.length; i++)
    {
      map = merge(map, readGenesWithID(filename[i], ids));
    }

    if (minvar > 0)
    {
      Set<String> toRem = new HashSet<String>();
      for (String key : map.keySet())
      {
        Gene gene = map.get(key);

        if (gene.calcVariance() < minvar) toRem.add(key);
      }

      for (String key : toRem)
      {
        map.remove(key);
      }
    }
    return map;
  }

  /**
   * Reads expression data.
   *
   * @param filename
   * @param ids
   * @return
   */
  public static Map<String, List<Gene>> readGenes(String filename, Set<String> ids)
  {
    Map<String, List<Gene>> map = null;

    try
    {
      map = new HashMap<String, List<Gene>>();
      System.out.print("Reading experiments (" + filename + ") ... ");
      BufferedReader reader = new BufferedReader(new FileReader(filename));

      String line;
      while ((line = reader.readLine()) != null)
      {
        if (line.length() > 0)
        {
          if (ids != null)
          {
            String id = line.substring(0, line.indexOf("\t"));
            if (!ids.contains(id)) continue;
          }

          Gene g = new Gene(line);

          if (!map.containsKey(g.geneid))
          {
            map.put(g.geneid, new ArrayList<Gene>());
          }

          map.get(g.geneid).add(g);
        }
      }

      reader.close();
      System.out.println("ok");
    }
    catch (IOException e)
    {
      e.printStackTrace();
    }
    return map;
  }

  public static Map<String, Gene> readGenesWithID(String filename, Set<String> ids)
  {
    Map<String, Gene> map = null;

    try
    {
      map = new HashMap<String, Gene>();
      System.out.print("Reading experiments (" + filename + ") ... ");
      BufferedReader reader = new BufferedReader(new FileReader(filename));

      String line;
      while ((line = reader.readLine()) != null)
      {
        if (line.length() > 0)
        {
          if (ids != null)
          {
            int strt = line.indexOf("\t") + 1;
            String id = line.substring(strt, line.indexOf("\t", strt));
            if (!ids.contains(id)) continue;
          }

          Gene g = new Gene(line);

          map.put(g.id, g);
        }
      }

      reader.close();
      System.out.println("ok");
    }
    catch (IOException e)
    {
      e.printStackTrace();
    }
    return map;
  }

  public static List<Gene> readGene(String id, String filename)
  {
    try
    {
      List<Gene> list = new ArrayList<Gene>();
      BufferedReader reader = new BufferedReader(new FileReader(filename));

      String line;
      while ((line = reader.readLine()) != null)
      {
        if (line.length() > 0)
        {
          if (line.substring(0, line.indexOf("\t")).equals(id))
          {
            list.add(new Gene(line));
          }
        }
      }

      reader.close();
      return list;
    }
    catch (IOException e)
    {
      e.printStackTrace();
    }
    return null;
  }

  public static void keepMostVaried(Map<String, List<Gene>> map)
  {
    for (String key : map.keySet())
    {
      List<Gene> geneList = map.get(key);

      double maxVar = -1;
      Gene most = null;

      for (Gene gene : geneList)
      {
        double var = gene.calcVariance();
        if (var > maxVar)
        {
          maxVar = var;
          most = gene;
        }
      }
      Iterator<Gene> iter = geneList.iterator();
      while (iter.hasNext())
      {
        Gene gene = iter.next();
        if (gene != most) iter.remove();
      }
    }
  }

  /**
   * Calculates the variance of expression values.
   *
   * @return
   */
  public double calcVariance()
  {
    double mean = calcMean();

    double var = 0;

    for (double v : value)
    {
      double d = v - mean;
      var += d * d;
    }
    return var / value.length;
  }

  /**
   * Calculates the variance of expression values at the specified positions.
   *
   * @return
   */
  public double calcVariance(boolean[] pos)
  {
    double mean = calcMean();

    double var = 0;

    int total = 0;
    for (int i = 0; i < value.length; i++)
    {
      if (pos[i] && !Double.isNaN(value[i]))
      {
        double d = value[i] - mean;
        var += d * d;
        total++;
      }
    }
    return total == 0 ? Double.NaN : var / total;
  }

  /**
   * Calculates the mean of expression values.
   *
   * @return
   */
  public double calcMean()
  {
    double mean = 0;
    int cnt = 0;
    for (double v : value)
    {
      if (!Double.isNaN(v))
      {
        mean += v;
        cnt++;
      }
    }
    mean /= cnt;
    return cnt == 0 ? Double.NaN : mean;
  }

  /**
   * Calculates the mean of expression values.
   *
   * @return
   */
  public double calcMean(boolean[] pos)
  {
    double mean = 0;
    int cnt = 0;
    for (int i = 0; i < value.length; i++)
    {
      if (pos[i] && !Double.isNaN(value[i]))
      {
        mean += value[i];
        cnt++;
      }
    }
    mean /= cnt;
    return cnt == 0 ? Double.NaN : mean;
  }

  public String getSymbol()
  {
    return Triplet.getGeneToSymbolMap().get(geneid);
  }

  public static List<Gene> sortAndFilterWithVariation(Collection<Gene> genes,
    int size)
  {
    ComparableGene[] comp = new ComparableGene[genes.size()];

    int i = 0;
    for (Gene gene : genes)
    {
      comp[i++] = new ComparableGene(gene);
    }

    Arrays.sort(comp);

    List<Gene> list = new ArrayList<Gene>();

    i = 0;
    for (ComparableGene cg : comp)
    {
      list.add(cg.gene);
      if (++i == size) break;
    }
    return list;
  }

  public Gene getNegative()
  {
    Gene neg = new Gene(geneid, id, getExpSize());
    neg.status = new int[getExpSize()];
    System.arraycopy(value, 0, neg.value, 0, getExpSize());
    System.arraycopy(status, 0, neg.status, 0, getExpSize());
    for (int i = 0; i < neg.status.length; i++)
    {
      neg.status[i] *= -1;
    }
    return neg;
  }

  public int compareTo(Object o)
  {
    Gene gene = (Gene) o;
    return getSymbol().compareTo(gene.getSymbol());
  }

  static class ComparableGene implements Comparable
  {
    Gene gene;
    Double var;

    ComparableGene(Gene gene)
    {
      this.gene = gene;
      this.var = gene.calcVariance();
    }

    public int compareTo(Object o)
    {
      ComparableGene cg = (ComparableGene) o;
      return cg.var.compareTo(var);
    }
  }

  /**
   * Checks of the given gene is in experiment data.
   *
   * @param map
   */
  public static void reportExistence(Map<String, List<Gene>> map)
  {
//    String[] genes = new String[]{"E2F7", "MICAL1", "MTA1", "SYTL2", "ZNF277", "ZNF333", "ZNF488",
//      "CYP26B1", "PLA2G2F", "TGM2", "YME1L1", "DRD3", "GABRE", "MUC4", "OPCML", "UNC5C",
//      "C2orf1", "PON2", "GPR15", "PTGDR", "CD109", "AGR2", "ARR3", "C4orf1", "CADPS", "DEPDC4",
//      "FLJ32810", "TMEPAI"};

    String[] genes = new String[]{"SYTL2"};

    Map<String, String> s2id = HGNCParser.getSymbolToGeneMap();

    for (String gene : genes)
    {
      String id = s2id.get(gene);

      if (id == null)
      {
        System.out.println("No gene ID: " + gene);
        continue;
      }

      if (!map.containsKey(id))
      {
        System.out.println("Absent in experiments: " + gene + " gene id = " + id);
      }
      else
      {
        System.out.println("Gene exists = " + gene);
      }
    }
  }

  public static List<Triplet> compileRandomTriplets(Collection<List<Gene>> genes, int size)
  {
    List<Gene> list = new ArrayList<Gene>();

    for (List<Gene> glist : genes)
    {
      list.addAll(glist);
    }
    return compileRandomTriplets(list, size);
  }

  public static List<Triplet> compileRandomTriplets(List<Gene> genes, int size)
  {
    Random rand = new Random();
    List<Triplet> trips = new ArrayList<Triplet>();

    while (trips.size() < size)
    {
      int r = rand.nextInt(genes.size());
      int f = rand.nextInt(genes.size());
      int t = rand.nextInt(genes.size());

      if (r != f && f != t && r != t)
      {
        trips.add(new Triplet(genes.get(r), genes.get(f), genes.get(t)));
      }
    }
    return trips;
  }

  /**
   * Merges experiments for two gene sets that contain identical genes but different experiments.
   *
   * @param map1
   * @param map2
   * @param nothing
   * @return
   */
  public static Map<String, List<Gene>> merge(
    Map<String, List<Gene>> map1, Map<String, List<Gene>> map2, boolean nothing)
  {
    int size1 = map1.values().iterator().next().iterator().next().value.length;
    int size2 = map2.values().iterator().next().iterator().next().value.length;

    for (String key : map1.keySet())
    {
      assert map2.containsKey(key) : "map2 does not contain key = " + key;

      List<Gene> list1 = map1.get(key);
      List<Gene> list2 = map2.get(key);

      assert list1.size() == list2.size() : "List sizes not equal. List1 = " + list1.size() +
        " List2 = " + list2.size();

      for(int i = 0; i < list1.size(); i++)
      {
        Gene g1 = list1.get(i);
        Gene g2 = list2.get(i);

        double[] val = g1.value;

        g1.value = new double[size1 + size2];

        System.arraycopy(val, 0, g1.value, 0, size1);
        System.arraycopy(g2.value, 0, g1.value, size1, size2);
      }
    }
    return map1;
  }

  /**
   * Merges experiments for two gene sets that contain identical genes but different experiments.
   *
   * @param map1
   * @param map2
   * @return
   */
  public static Map<String, Gene> merge(
    Map<String, Gene> map1, Map<String, Gene> map2)
  {
    int size1 = map1.values().iterator().next().value.length;
    int size2 = map2.values().iterator().next().value.length;

    for (String key : map1.keySet())
    {
      assert map2.containsKey(key) : "map2 does not contain key = " + key;

      Gene g1 = map1.get(key);
      Gene g2 = map2.get(key);

      double[] val = g1.value;

      g1.value = new double[size1 + size2];

      System.arraycopy(val, 0, g1.value, 0, size1);
      System.arraycopy(g2.value, 0, g1.value, size1, size2);
    }
    return map1;
  }

  /**
   * Reads M-F-T Entrez gene IDs, matches to the genes in the experiments and creates new triplets
   * accordingly. Since there is generally more than one gene for each ID, multiple matched
   * triplets are created per initial triplet.
   *
   * @param map
   * @param trips
   * @return
   */
  public static List<Triplet> associateExpsToTrips(Map<String, List<Gene>> map,
    List<Triplet> trips)
  {
    List<Triplet> list = new ArrayList<Triplet>();

    for (Triplet trip : trips)
    {
      List<Gene> regs = map.get(trip.modulator);
      List<Gene> facts = map.get(trip.factor);
      List<Gene> targs = map.get(trip.target);

      if (regs == null || facts == null || targs == null) continue;

      for (Gene reg : regs)
      {
        for (Gene fact : facts)
        {
          for (Gene targ : targs)
          {
            Triplet t = new Triplet(reg, fact, targ);
            list.add(t);
          }
        }
      }
    }
    return list;
  }


  public Gene clone(String newID, String newGBID)
  {
    Gene cln = new Gene(newID, newGBID, getExpSize());
    if (status != null)
    {
      cln.status = new int[getExpSize()];
      System.arraycopy(status, 0, cln.status, 0, getExpSize());
    }
    System.arraycopy(value, 0, cln.value, 0, getExpSize());
    return cln;
  }

  public static int getCall(String sign)
  {
    if (sign.equals("A")) return ABSENT;
    else if (sign.equals("P")) return PRESENT;
    else
    {
      assert sign.equals("M") : "sign = " + sign;
      return MARGINAL;
    }
  }

  public int getExpSize()
  {
    return value.length;
  }

  /**
   * Given the list of triplets, all other genes are replaced with modulators.
   *
   * @return all possible triplets
   */
  public static List<Triplet> prepareAllModTriplets(List<Triplet> trips,
    Map<String, List<Gene>> geneMap)
  {
    List<String> factars = new ArrayList<String>();
    List<String> used = new ArrayList<String>();

    for (Triplet t : trips)
    {
      factars.add(t.factor + "\t" + t.target);
      used.add(t.factor);
      used.add(t.target);
      used.add(t.modulator);
    }

    List<String> regs = new ArrayList<String>(geneMap.keySet());
    regs.removeAll(used);

    System.out.println("other regs size = " + regs.size());

    trips.clear();

    for (String reg : regs)
    {
      for (String factar : factars)
      {
        String[] ft = factar.split("\t");

        trips.add(new Triplet(reg, ft[0], ft[1]));
      }
    }

    trips = Gene.associateExpsToTrips(geneMap, trips);

    return trips;
  }

  /**
   * Randomizes order of experiments. This breaks dependencies between genes.
   */
  public void randomize()
  {
    for (int k = 0; k < 1; k++)
    {
      for (int i = 0; i < value.length; i++)
      {
        int j = (int) (Math.random() * value.length);

        if (status != null)
        {
          int temp = status[i];
          status[i] = status[j];
          status[j] = temp;
        }

        double tmp = value[i];
        value[i] = value[j];
        value[j] = tmp;
      }
    }
  }

  /**
   * Randomizes order of experiments. This breaks dependencies between genes.
   */
  public static void randomize(Collection<List<Gene>> genes)
  {
    System.out.print("Randomizing experiments ... ");

    for (Collection<Gene> col : genes)
    {
      for (Gene gene : col)
      {
        gene.randomize();
      }
    }
    System.out.println(" ok");
  }

  public static List<Gene> sortGenes(List<GeneWrap> wraped)
  {
    Collections.sort(wraped);
    List<Gene> list = new ArrayList<Gene>(wraped.size());
    for (GeneWrap w : wraped)
    {
      list.add(w.gene);
    }
    return list;
  }

  private static class GeneWrap implements Comparable
  {
    Gene gene;
    Double val;

    private GeneWrap(Gene gene, Double val)
    {
      this.gene = gene;
      this.val = val;
    }

    public int compareTo(Object o)
    {
      return ((GeneWrap) o).val.compareTo(val);
    }
  }

  public static List<Triplet> replaceWithRandomModulator(List<Triplet> trips, Map<String,
    List<Gene>> genes)
  {
    Random rand = new Random();
    List<String> ids = new ArrayList<String>(genes.keySet());

    Set<String> regs = new HashSet<String>();
    Set<String> tars = new HashSet<String>();

    String fac = null;
    for (Triplet t : trips)
    {
      regs.add(t.modulator);
      tars.add(t.target);
      fac = t.factor;
    }

    int size = regs.size();
    regs.clear();

    while (regs.size() < size)
    {
      String rReg = ids.get(rand.nextInt(ids.size()));
      regs.add(rReg);
    }

    List<Triplet> tt = new ArrayList<Triplet>();
    for (String reg : regs)
    {
      for (String tar : tars)
      {
        tt.add(new Triplet(reg, fac, tar));
      }
    }
    return tt;
  }

  /**
   * Prints a histogram of expression values on the console.
   */
  public void printValueHisto()
  {
    Histogram h = new Histogram(1);
    for (double v : value)
    {
      h.count(v);
    }
    h.printDensity();
  }

  public static List<Gene> fetchGeneFromExpo(String geneName)
  {
    Set<String> set = new HashSet<String>();
    String geneid = Triplet.getSymbolToGeneMap().get(geneName);
    set.add(geneid);
    Map<String, List<Gene>> map = Gene.readGenes(set, 0,
      "resource/experiments_expO_1.txt", "resource/experiments_expO_2.txt");
//      "resource/expop/expop_1.txt", "resource/expop/expop_2.txt", "resource/expop/expop_3.txt");
     
    return map.get(geneid);
  }

  public static void printVarianceInExpo(String geneName)
  {
    List<Gene> list = fetchGeneFromExpo(geneName);
    for (Gene gene : list)
    {
      System.out.println(gene.id);
//      System.out.println(gene.geneid + " -- " + gene.id + " -- " + gene.calcVariance());
      gene.printValueHisto();
    }
  }

  public static void printTissuePresence(String geneName) throws Throwable
  {
    List<Gene> list = fetchGeneFromExpo(geneName);
    List<String> cells = CellTypeMatcher.getExpoTissues(10);

    System.out.print("-----\n" + geneName);
    for (Gene gene : list)
    {
      gene.rankAdjustStatus(1 / 3D);
      System.out.print("\t" + gene.id);
    }
    System.out.println();
    for (String cell : cells)
    {
      System.out.print(cell);
      boolean[] pos = CellTypeMatcher.getTissueHitArrayForExpO(cell);

      for (Gene gene : list)
      {
        double ratio = gene.calcPresenceOnTissue(pos);

//        int[] cnt = gene.getStatusCounts(pos);
//        String s = cnt[0] + "|" + cnt[1] + "|" + cnt[2];
//        System.out.print("\t" + s);

        System.out.print("\t" + fmt.format(ratio));
      }
      System.out.println("");
    }
  }

  static void printMostVariedTargetsInTissue() throws Throwable
  {
    List<Triplet> trips = Triplet.readTripsAndAssociate("result/All_fdr0.05_var1.0.xls",
      "resource/experiments_expO_1.txt""resource/experiments_expO_2.txt");
    List<GeneWrap> w = new ArrayList<GeneWrap>();
    boolean[] pos = CellTypeMatcher.getTissueHitArrayForExpO("breast");
    Set<Gene> rem = new HashSet<Gene>();
    for (Triplet t : trips)
    {
      if (!rem.contains(t.T))
      {
        w.add(new GeneWrap(t.T, t.T.calcVariance(pos)));
        rem.add(t.T);
      }
    }
    List<Gene> tars = sortGenes(w);
    for (int i = 0; i < 10; i++)
    {
      Gene g = tars.get(i);
      System.out.println(Triplet.getGeneToSymbolMap().get(g.geneid) + "\t" + g.id + "\t" + g.calcVariance(pos));
    }
  }

  static void printIsoformCorrelationOfSome()
  {
//    List<String> symbols = Arrays.asList("ESR1", "ERG", "PPARA", "ETS2", "MAF", "SNRPN", "FABP1", "VDR", "HNF4A", "EPHB2", "RBPMS", "NR2F6", "TFAP2A");
    List<String> symbols = Arrays.asList("ESR1");
    Set<String> ids = new HashSet<String>();
    for (String sym : symbols)
    {
      ids.add(Triplet.getSymbolToGeneMap().get(sym));
    }
    Map<String, List<Gene>> map = Gene.readGenes(ids, 1, "resource/experiments_expO_1.txt", "resource/experiments_expO_2.txt");

    Histogram h = new Histogram(0.1);
    for (String id : ids)
    {
      List<Gene> genes = map.get(id);
      for (int i = 0; i < genes.size()-1; i++)
      {
        for (int k = i+1; k < genes.size(); k++)
        {
          Gene g1 = genes.get(i);
          Gene g2 = genes.get(k);
          if (g1.status==null) g1.rankAdjustStatus(1D/3);
          if (g2.status==null) g2.rankAdjustStatus(1D/3);
          h.count(Difference.calcPairwiseCoefficient(g1, g2));
        }
      }
    }
    h.print();
  }

  static void printGenesInTrips()
  {
    Set<String> ids = Triplet.readGeneIDs("resource/factor-trips/AR.txt");
    Map<String, String> g2s = Triplet.getGeneToSymbolMap();
    for (String id : ids)
    {
      if (g2s.containsKey(id)) System.out.println(g2s.get(id));
    }
  }
 
  public Histogram getDistribution(boolean[] pos, double range)
  {
    Histogram h = new Histogram(range);
    for (int i = 0; i < value.length; i++)
    {
      if (pos == null || pos[i]) h.count(value[i]);
    }
    return h;
  }

  public void takeLog()
  {
    for (int i = 0; i < value.length; i++)
    {
      value[i] = Math.log(value[i]) / LOG2;
    }
  }

  public static List<Gene> sortWithSym(Collection<Gene> genes)
  {
    class Holder implements Comparable
    {
      Gene g;

      Holder(Gene g)
      {
        this.g = g;
      }

      @Override
      public int compareTo(Object o)
      {
        Holder h = (Holder) o;
        return g.getSymbol().compareTo(h.g.getSymbol());
      }
    }
    List<Holder> hold = new ArrayList<Holder>();
    for (Gene g : genes)
    {
      hold.add(new Holder(g));
    }
    Collections.sort(hold);
    List<Gene> sorted = new ArrayList<Gene>();
    for (Holder h : hold)
    {
      sorted.add(h.g);
    }
    return sorted;

  }

  public static Map<String, Gene> getSymMap(Collection<Gene> genes)
  {
    Map<String, Gene> map = new HashMap<String, Gene>();
    for (Gene gene : genes)
    {
      map.put(gene.getSymbol(), gene);
    }
    return map;
  }
 
 
  public static void main(String[] args) throws Throwable
  {
    String gene = "AR";
//    printVarianceInExpo(gene);
    printTissuePresence(gene);
  }
}
TOP

Related Classes of gem.Gene$ComparableGene

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.