Package gem

Source Code of gem.Triplet

package gem;

import gem.parser.HGNCParser;

import java.io.*;
import java.util.*;

import gem.util.Histogram;
import gem.util.Histogram2D;
import gem.util.TermCounter;
import gem.util.Pearson;

/**
* Modulator - factor - target triplet for GEM analysis.
*
* @author Ozgun Babur
*/
public class Triplet implements Constants
{
  /**
   * Map from Entrez Gene ID to HGNC Gene Symbol
   */
  protected static Map<String, String> geneToSymbolMap;

  /**
   * Map from HGNC Gene Symbol to Entrez Gene ID
   */
  protected static Map<String, String> symbolToGeneMap;

  /**
   * Modulation category of the triplet.
   */
  public String categ;

  /**
   * Entrez Gene ID of modulator
   */
  public String modulator;

  /**
   * Entrez Gene ID of factor.
   */
  public String factor;

  /**
   * Entrez Gene ID of target.
   */
  public String target;

  /**
   * GenBank ID of modulator.
   */
  public String mod_id;

  /**
   * GenBank ID of factor.
   */
  public String fac_id;

  /**
   * GenBank ID of target.
   */
  public String tar_id;

  /**
   * Modulator gene.
   */
  public Gene M;

  /**
   * Factor gene.
   */
  public Gene F;

  /**
   * Target gene.
   */
  public Gene T;

  /**
   * Counts of 8 bins (M-F-T statuses) that is used in GEM calculation.
   */
  public int[] cnt = new int[8];

  /**
   * Counts of 8 bins for a given tissue type.
   */
  public int[] cnt_tiss;

  /**
   * P-value of gamma.
   */
  public double pvalGamma;

  /**
   * P-value of betaM.
   */
  public double pvalBetaM;

  public double dcmi;
  public double pvalDcmi;

  /**
   * Constructor with Entrez Gene IDs.
   *
   * @param modulator
   * @param factor
   * @param target
   */
  public Triplet(String modulator, String factor, String target)
  {
    this.modulator = modulator;
    this.factor = factor;
    this.target = target;
  }

  /**
   * Contructor with genes.
   *
   * @param M
   * @param F
   * @param T
   */
  public Triplet(Gene M, Gene F, Gene T)
  {
    this.M = M;
    this.F = F;
    this.T = T;

    this.modulator = M.geneid;
    this.factor = F.geneid;
    this.target = T.geneid;
    this.mod_id = M.id;
    this.fac_id = F.id;
    this.tar_id = T.id;
  }

  /**
   * Constructor with the data line from the triplet file.
   *
   * @param line
   */
  public Triplet(String line)
  {
    String[] terms = line.split("\t");

    int i = 0;
    try
    {
      if (terms.length >= 3)
      {
        this.modulator = terms[i++];
        this.factor = terms[i++];
        this.target = terms[i++];
      }

      if (terms.length >= 20)
      {
        this.mod_id = terms[i++];
        this.fac_id = terms[i++];
        this.tar_id = terms[i++];

        this.cnt[0] = Integer.parseInt(terms[i++]);
        this.cnt[4] = Integer.parseInt(terms[i++]);
        this.cnt[1] = Integer.parseInt(terms[i++]);
        this.cnt[5] = Integer.parseInt(terms[i++]);
        this.cnt[2] = Integer.parseInt(terms[i++]);
        this.cnt[6] = Integer.parseInt(terms[i++]);
        this.cnt[3] = Integer.parseInt(terms[i++]);
        this.cnt[7] = Integer.parseInt(terms[i++]);

        i += 13;

        this.pvalGamma = Double.parseDouble(terms[i++]);
//        this.dcmi = Double.parseDouble(terms[i++]);
//        this.pvalDcmi = Double.parseDouble(terms[i++]);
        this.categ = terms[i++];
      }
    }
    catch (RuntimeException e)
    {
      System.out.println(line);
      throw e;
    }
  }

  /**
   * Column names of the triplet file.
   *
   * @return
   */
  public static String getColNames()
  {
    return "Modulator Gene Symbol\tFactor Gene Symbol\tTarget Gene Symbol\t" +
      "Modulator GenBank ID\tFactor GenBank ID\tTarget GenBank ID\t" +
      "f000\tf001\tf010\tf011\tf100\tf101\tf110\tf111\t" +
      "p00\tp01\tp10\tp11\t" +
      "alpha_F\tpval of alpha_F\t" +
      "beta_F\tpval of beta_F\t" +
      "beta_M\tpval of beta_M\t" +
      "alpha_F + beta_M\tpval of alpha_F + beta_M\t" +
      "gamma\tpval of gamma\t" +
//      "dcmi\tpval of dcmi\t" +
      "Mode of action"
      ;
  }

  /**
   * Used for writing the triplet in a text file.
   *
   * @return
   */
  public String toString()
  {
    String s =  getGenes();

    for (int i = 0; i< 4; i++)
    {
      s += "\t" + cnt[i] + "\t" + cnt[i + 4];
    }

    for (int i = 0; i < 4; i++)
    {
      s += "\t" + cnt[i+4] / (cnt[i] + (double) cnt[i+4]);
    }

    int[] n = Difference.calcTotals(cnt);
    double[] p = Difference.calcProportions(cnt, n);

    s += "\t" + Difference.calcAlphaF(p);
    s += "\t" + Difference.calcAlphaFpval(cnt, n);
    s += "\t" + Difference.calcBetaF(p);
    s += "\t" + Difference.calcBetaFpval(cnt, n);
    s += "\t" + Difference.calcBetaM(p);
    s += "\t" + Difference.calcBetaMpval(cnt, n);
    s += "\t" + Difference.calcAlphaFplusBetaM(p);
    s += "\t" + Difference.calcAlphaFplusBetaMpval(cnt, n);
    s += "\t" + Difference.calcGamma(p);
    s += "\t" + Difference.calcGammaPval(cnt);
//    s += "\t" + dcmi + "\t" + pvalDcmi;

    s += "\t" + categ;

//    s += "\n\t\t\t\t\t";
//    for (int i = 0; i< 4; i++)
//    {
//      s += "\t" + cnt_tiss[i] + "\t" + cnt_tiss[i + 4];
//    }
   
    return s;
  }

  public String getProportionsInString()
  {
    String s = "";
    for (int i = 0; i < 4; i++)
    {
      s += " " + fmt.format(cnt[i+4] / (cnt[i] + (double) cnt[i+4]));
    }
    return s;
  }

  public String getCoeffInString()
  {
    String s = "";

    double[] p = Difference.calcProportions(cnt, Difference.calcTotals(cnt));
    s += " " + fmt.format(Difference.calcAlphaF(p));
    s += " " + fmt.format(Difference.calcAlphaM(p));
    s += " " + fmt.format(Difference.calcBetaF(p));
    s += " " + fmt.format(Difference.calcBetaM(p));
    return s;
  }

  public String getGenes()
  {
    return modulator + "\t" + factor + "\t" + target + "\t" +
      mod_id + "\t" + fac_id + "\t" + tar_id;
  }

  public String getMod_id()
  {
    return mod_id.substring(0, mod_id.indexOf("|"));
  }

  public String getFac_id()
  {
    return fac_id.substring(0, fac_id.indexOf("|"));
  }

  public String getTar_id()
  {
    return tar_id.substring(0, tar_id.indexOf("|"));
  }

  public String getEnrezGeneOnlySignature()
  {
    return modulator + factor + target;
  }
 
  public String getGeneSymbols()
  {
    if (getGeneToSymbolMap().containsKey(modulator))
    {
      return getGeneToSymbolMap().get(modulator) + "\t" +
        getGeneToSymbolMap().get(factor) + "\t" +
        getGeneToSymbolMap().get(target);
    }
    else
    {
      return modulator + "\t" + factor + "\t" + target;
    }
  }

  public static Set<Gene> collectGenes(Collection<Triplet> trips)
  {
    Set<Gene> genes = new HashSet<Gene>();
    for (Triplet t : trips)
    {
      genes.add(t.M);
      genes.add(t.F);
      genes.add(t.T);
    }
    return genes;
  }

  public String getMSym()
  {
    return getGeneToSymbolMap().get(modulator);
  }

  public String getFSym()
  {
    return getGeneToSymbolMap().get(factor);
  }

  public String getTSym()
  {
    return getGeneToSymbolMap().get(target);
  }

  public boolean isDebug()
  {
    return mod_id.equals("NM_007295") && fac_id.equals("M73069") && tar_id.equals("NM_001099");
  }

  /**
   * Checks if the depdendency type in this triplet is a logical-and.
   *
   * @return
   */
  public boolean isLogicalAND()
  {
    return TripletClassifier.isLogicalAND(this);
  }

  public boolean isModulation()
  {
    return categ.startsWith("A") || categ.startsWith("I") || categ.startsWith("E") || categ.startsWith("X");
  }

  public void backFromURLToIDs()
  {
    modulator = extractGeneIDFromURL(modulator);
    factor = extractGeneIDFromURL(factor);
    target = extractGeneIDFromURL(target);
    mod_id = extractGBIDFromURL(mod_id);
    fac_id = extractGBIDFromURL(fac_id);
    tar_id = extractGBIDFromURL(tar_id);
  }

  public void backFromURLToSymbol()
  {
    modulator = modulator.substring(modulator.indexOf(",\"") + 2, modulator.lastIndexOf("\""));
    factor = factor.substring(factor.indexOf(",\"") + 2, factor.lastIndexOf("\""));
    target = target.substring(target.indexOf(",\"") + 2, target.lastIndexOf("\""));
    mod_id = extractGBIDFromURL(mod_id);
    fac_id = extractGBIDFromURL(fac_id);
    tar_id = extractGBIDFromURL(tar_id);
  }

  public void backFromIDToSymbol()
  {
    Map<String, String> g2s = getGeneToSymbolMap();
    modulator = g2s.containsKey(modulator) ? g2s.get(modulator) : modulator;
    factor = g2s.containsKey(factor) ? g2s.get(factor) : factor;
    target = g2s.containsKey(target) ? g2s.get(target) : target;
  }

  private String extractGeneIDFromURL(String url)
  {
    if (!url.contains("uids=")) return url;
    return url.substring(url.indexOf("uids=") + 5, url.indexOf("\",\""));
  }

  private String extractGBIDFromURL(String url)
  {
    if (!url.contains(",\"")) return url;
    return url.substring(url.lastIndexOf(",\"") + 2, url.lastIndexOf("\""));
  }

  public void writeExpValues(String dir)
  {
    Map<String, String> map = HGNCParser.getGeneToSymbolMap();

    try
    {
      BufferedWriter writer = new BufferedWriter(new FileWriter(
        dir+"/"+map.get(modulator)+"_"+map.get(factor)+"_"+map.get(target)+".txt"));

      for (int i = 0; i < M.getExpSize(); i++)
      {
        writer.write(M.value[i] + "\t" + F.value[i] + "\t" + T.value[i] + "\n");
      }

      writer.close();
    }
    catch (Exception e)
    {
      e.printStackTrace();
    }
  }

  public static void recordGenes(Collection<Triplet> set, String filename)
  {
    try
    {
      BufferedWriter writer = new BufferedWriter(new FileWriter(filename));

      for (Triplet triplet : set)
      {
        writer.write(triplet.getGenes() + "\n");
      }

      writer.close();
    }
    catch (IOException e)
    {
      e.printStackTrace();
    }
  }

  public static void record(List<Triplet> trips, String filename)
  {
    trips = orderToGenes(trips);
    replaceIDsWithURL(trips);

    try
    {
      BufferedWriter writer = new BufferedWriter(new FileWriter(filename));

      writer.write(getColNames() + "\n");

      for (Triplet triplet : trips)
      {
        writer.write(triplet + "\n");
      }

      writer.close();
      System.out.println("Wrote " + trips.size() + " triplets into file " + filename);
    }
    catch (IOException e)
    {
      e.printStackTrace();
    }
  }

  public static void recordUniqueGene(List<Triplet> trips, String filename)
  {
    trips = orderToGenes(trips);
    replaceIDsWithURL(trips);

    try
    {
      BufferedWriter writer = new BufferedWriter(new FileWriter(filename));

      writer.write(getColNames() + "\n");

      int i = 0;
      Triplet prev = null;
      for (Triplet triplet : trips)
      {
        if (prev != null)
        {
          if (sameGenes(prev, triplet))
          {
            triplet = triplet.pvalGamma > prev.pvalGamma ? triplet : prev;
          }
          else
          {
            writer.write(prev + "\n");
            i++;
          }
        }
        prev = triplet;
      }
      if (prev != null)
      {
        writer.write(prev.toString());
        i++;
      }

      writer.close();
      System.out.println("Wrote " + i + " of " + trips.size() + " triplets into file " +
        filename);
    }
    catch (IOException e)
    {
      e.printStackTrace();
    }
  }
  static boolean sameGenes(Triplet t1, Triplet t2)
  {
    return t1.modulator.equals(t2.modulator) &&
      t1.factor.equals(t2.factor) &&
      t1.target.equals(t2.target);
  }

  /**
   * Writes down common triplets in the given two sets. Same triplets are alined in two lines.
   * @param set1
   * @param set2
   * @param filename
   */
  public static void record(Collection<Triplet> set1, Collection<Triplet> set2, String filename)
  {
    try
    {
      Map<String, Triplet> map = new HashMap<String, Triplet>();

      for (Triplet t : set2)
      {
        map.put(t.getGenes(), t);
      }

      BufferedWriter writer = new BufferedWriter(new FileWriter(filename));

      writer.write(getColNames() + "\n");

      int i = 0;
      for (Triplet t1 : set1)
      {
        if (map.containsKey(t1.getGenes()))
        {
          writer.write(t1 + "\n");
          writer.write(map.get(t1.getGenes()) + "\n");
          i++;
        }
      }

      writer.close();
      System.out.println("Wrote " + i + " triplets into file " + filename);
    }
    catch (IOException e)
    {
      e.printStackTrace();
    }
  }

  public static List<Triplet> readTrips(String filename)
  {
    List<Triplet> set = null;

    try
    {
      set = new ArrayList<Triplet>();
      BufferedReader reader = new BufferedReader(new FileReader(filename));

      // For header
      reader.readLine();

      String line;
      while ((line = reader.readLine()) != null)
      {
        if (line.contains("\t"))
        {
          set.add(new Triplet(line));
        }
      }

      reader.close();

    }
    catch (IOException e)
    {
      e.printStackTrace();
    }

    return set;
  }

  public static List<Triplet> readTripsAndAssociate(String filename, String ... expfile)
  {
    List<Triplet> trips = readTrips(filename);

    for (Triplet t : trips)
    {
      t.backFromURLToIDs();
    }

    Set<String> ids = getIDs(trips);

    Map<String, Gene> map = Gene.readGenesWithID(ids, 0, expfile);

    for (Gene gene : map.values())
    {
      gene.rankAdjustStatus(1D / 3);
    }

    for (Triplet t : trips)
    {
      t.M = map.get(t.mod_id);
      t.F = map.get(t.fac_id);
      t.T = map.get(t.tar_id);
    }

    return trips;
  }

  public static Set<String> readGeneIDs(String filename)
  {
    Set<String> names = new HashSet<String>();

    try
    {
      BufferedReader reader = new BufferedReader(new FileReader(filename));

      String line;
      while ((line = reader.readLine()) != null)
      {
        String[] terms = line.split("\t");

        if (terms.length > 2)
        {
          names.add(terms[0]);
          names.add(terms[1]);
          names.add(terms[2]);
        }
      }

      reader.close();
    }
    catch (Exception e)
    {
      e.printStackTrace();
    }

    return names;
  }


  public static Set<String> getGeneIDs(Collection<Triplet> trips)
  {
    Set<String> set = new HashSet<String>();

    for (Triplet t : trips)
    {
      set.add(t.modulator);
      set.add(t.factor);
      set.add(t.target);
    }

    return set;
  }

  public static Set<String> getIDs(Collection<Triplet> trips)
  {
    Set<String> set = new HashSet<String>();

    for (Triplet t : trips)
    {
      set.add(t.mod_id);
      set.add(t.fac_id);
      set.add(t.tar_id);
    }

    return set;
  }

  public static Set<String> getMFGeneIDs(Collection<Triplet> trips)
  {
    Set<String> set = new HashSet<String>();

    for (Triplet t : trips)
    {
      set.add(t.modulator);
      set.add(t.factor);
    }

    return set;
  }

  public boolean equals(Object obj)
  {
    if (obj instanceof Triplet)
    {
      Triplet t = (Triplet) obj;

      boolean eq = modulator.equals(t.modulator) &&
        factor.equals(t.factor) &&
        target.equals(t.target);

      if (!eq) return eq;
     
      if (mod_id != null && t.mod_id != null) eq = eq && mod_id.equals(t.mod_id);
      if (fac_id != null && t.fac_id != null) eq = eq && fac_id.equals(t.fac_id);
      if (tar_id != null && t.tar_id != null) eq = eq && tar_id.equals(t.tar_id);

      return eq;
    }
    return false;
  }

  private int hcd = 0;
  public int hashCode()
  {
    if (hcd == 0)
    {
      if (target != null) hcd += target.hashCode();
      if (factor != null) hcd += factor.hashCode();
      if (modulator != null) hcd += modulator.hashCode();
      if (tar_id != null) hcd += tar_id.hashCode();
      if (fac_id != null) hcd += fac_id.hashCode();
      if (mod_id != null) hcd += mod_id.hashCode();
    }
    return hcd;
  }

  public double calcFTCorr()
  {
    double[][] x = new double[2][F.value.length];

    x[0] = F.value;
    x[1] = T.value;

    return Pearson.calcCorrelation(x);
  }

  public double calcFTCorrCondM(int mCond)
  {
    int size = Pearson.frequency(M.status, mCond);
    double[][] x = new double[2][size];

    int k = 0;

    for (int i = 0; i < M.status.length; i++)
    {
      if (M.status[i] == mCond)
      {
        x[0][k] = F.value[i];
        x[1][k] = T.value[i];
        k++;
      }
    }
    assert k == size;

    return Pearson.calcCorrelation(x);
  }


  private static List<Triplet> order(List<Triplet> trips, HoldFact fact)
  {
    THolder[] holders = new THolder[trips.size()];

    int i = 0;
    for (Triplet t : trips)
    {
      holders[i++] = fact.hold(t);
    }
    Arrays.sort(holders);

    trips.clear();
    for (THolder holder : holders)
    {
      trips.add(holder.t);
    }
    return trips;
  }

  public static List<Triplet> orderRegTarg(List<Triplet> trips)
  {
    return order(trips, new HoldFact()
    {
      public THolder hold(Triplet t)
      {
        return new THolder(t)
        {
          Comparable getCriter()
          {
            return t.modulator + "|" + t.target;
          }
        };
      }
    });
  }

  public static List<Triplet> orderFactTarg(List<Triplet> trips)
  {
    return order(trips, new HoldFact()
    {
      public THolder hold(Triplet t)
      {
        return new THolder(t)
        {
          Comparable getCriter()
          {
            return t.factor + "|" + t.target;
          }
        };
      }
    });
  }

  public static List<Triplet> orderToGenes(List<Triplet> trips)
  {
    final Map<String, String> map = getGeneToSymbolMap();
    return order(trips, new HoldFact()
    {
      public THolder hold(Triplet t)
      {
        return new THolder(t)
        {
          Comparable getCriter()
          {
            String reg = map.containsKey(t.modulator) ? map.get(t.modulator) : t.modulator;
            String fac = map.containsKey(t.factor) ? map.get(t.factor) : t.factor;
            String tar = map.containsKey(t.target) ? map.get(t.target) : t.target;
            return fac + reg + tar + t.fac_id + t.mod_id + t.tar_id;
          }
        };
      }
    });
  }

  public static List<Triplet> orderTargFact(List<Triplet> trips)
  {
    return order(trips, new HoldFact()
    {
      public THolder hold(Triplet t)
      {
        return new THolder(t)
        {
          Comparable getCriter()
          {
            return t.target + "|" + t.factor;
          }
        };
      }
    });
  }

  public static List<Triplet> orderPvalGamma(List<Triplet> trips)
  {
    return order(trips, new HoldFact()
    {
      public THolder hold(Triplet t)
      {
        return new THolder(t)
        {
          Comparable getCriter()
          {
            return t.pvalGamma;
          }
        };
      }
    });
  }

  public static List<Triplet> orderModulation(List<Triplet> trips)
  {
    class Holder extends THolder
    {
      double val;
      protected Holder(Triplet t)
      {
        super(t);
        int[] n = Difference.calcTotals(t.cnt);
        double[] p = Difference.calcProportions(t.cnt, n);
        double betaM = Difference.calcBetaM(p);
        double alfaM = Difference.calcAlphaM(p);
        double gamma = Difference.calcGamma(p);

        if (betaM > 0)
        {
          if (alfaM < betaM) val = Math.min(gamma, betaM);
          else val = 0;
        }
        else
        {
          if (alfaM > betaM) val = -Math.max(gamma, betaM);
          else val = 0;
        }
      }

      Comparable getCriter()
      {
        return val;
      }
    }

    return order(trips, new HoldFact()
    {
      public THolder hold(Triplet t)
      {
        return new Holder(t);
      }
    });
  }

  public static List<Triplet> orderPvalDcmi(List<Triplet> trips)
  {
    return order(trips, new HoldFact()
    {
      public THolder hold(Triplet t)
      {
        return new THolder(t)
        {
          Comparable getCriter()
          {
            return t.pvalDcmi;
          }
        };
      }
    });
  }

  public static List<Triplet> orderPvalBetaM(List<Triplet> trips)
  {
    return order(trips, new HoldFact()
    {
      public THolder hold(Triplet t)
      {
        return new THolder(t)
        {
          Comparable getCriter()
          {
            return t.pvalBetaM;
          }
        };
      }
    });
  }

  public static List<Triplet> orderCnt(List<Triplet> trips, final int cntIndex)
  {
    return order(trips, new HoldFact()
    {
      public THolder hold(Triplet t)
      {
        return new THolder(t)
        {
          Comparable getCriter()
          {
            return t.cnt[cntIndex];
          }
        };
      }
    });
  }

  private static void prepareGeneSymbolMaps()
  {
    geneToSymbolMap = HGNCParser.getGeneToSymbolMap();
    symbolToGeneMap = new HashMap<String, String>();
     
    for (String s : geneToSymbolMap.keySet())
    {
      symbolToGeneMap.put(geneToSymbolMap.get(s), s);
    }
  }

  public static Map<String, String> getGeneToSymbolMap()
  {
    if (geneToSymbolMap == null)
    {
      prepareGeneSymbolMaps();
    }
    return geneToSymbolMap;
  }

  public static Map<String, String> getSymbolToGeneMap()
  {
    if (symbolToGeneMap == null)
    {
      prepareGeneSymbolMaps();
    }
    return symbolToGeneMap;
  }

  public Triplet createCopy()
  {
    return new Triplet(M, F, T);
  }

  /**
   * Inner class used for sorting triplets.
   */
  private static abstract class THolder implements Comparable
  {
    Triplet t;
    Comparable crt;

    protected THolder(Triplet t)
    {
      this.t = t;
      this.crt = getCriter();
    }

    abstract Comparable getCriter();

    public int compareTo(Object o)
    {
      if (o instanceof THolder)
      {
        return crt.compareTo(((THolder) o).crt);
      }
      return 0;
    }
  }

  private interface HoldFact
  {
    THolder hold(Triplet t);
  }

  //----------------------------------------------------------------------------------------------
  // Section: Accessors
  //----------------------------------------------------------------------------------------------

  public static void removeNonModulation(List<Triplet> trips)
  {
    Iterator<Triplet> iter = trips.iterator();
    while (iter.hasNext())
    {
      Triplet t = iter.next();
      if (!t.isModulation()) iter.remove();
    }
  }

  public static void removeCateg(List<Triplet> trips, String categ)
  {
    Iterator<Triplet> iter = trips.iterator();
    while (iter.hasNext())
    {
      Triplet t = iter.next();
      if (t.categ.equals(categ)) iter.remove();
    }
  }

  public static List<String> getAllFactors(Collection<Triplet> trips)
  {
    List<String> facts = new ArrayList<String>();

    for (Triplet t : trips)
    {
      if (!facts.contains(t.factor)) facts.add(t.factor);
    }
    return facts;
  }

  public static List<Triplet> selectTarget(Collection<Triplet> trips, String target, boolean entrez)
  {
    Map<String, String> map = getSymbolToGeneMap();

    target = map.containsKey(target) ? map.get(target) : target;
   
    List<Triplet> list = new ArrayList<Triplet>();

    for (Triplet t : trips)
    {
      String id = entrez ? t.target : t.tar_id;

      if (id.equals(target))
      {
        list.add(t);
      }
    }
    return list;
  }

  public static List<Triplet> selectFactor(Collection<Triplet> trips, String factor, boolean entrez)
  {
    Map<String, String> map = HGNCParser.getSymbolToGeneMap();

    factor = map.containsKey(factor) ? map.get(factor) : factor;

    List<Triplet> list = new ArrayList<Triplet>();

    for (Triplet t : trips)
    {
      String id = entrez ? t.factor : t.fac_id;

      if (id.equals(factor))
      {
        list.add(t);
      }
    }
    return list;
  }

  public static List<Triplet> selectModulator(Collection<Triplet> trips, String modulator)
  {
    Map<String, String> map = HGNCParser.getSymbolToGeneMap();

    modulator = map.containsKey(modulator) ? map.get(modulator) : modulator;

    List<Triplet> list = new ArrayList<Triplet>();

    for (Triplet t : trips)
    {
      if (t.modulator.equals(modulator))
      {
        list.add(t);
      }
    }
    return list;
  }

  public static List<Triplet> selectTargets(Collection<Triplet> trips, Collection<String> targets)
  {
    List<Triplet> list = new ArrayList<Triplet>();

    for (String target : targets)
    {
      list.addAll(selectTarget(trips, target, true));
    }
    return list;
  }

  public static List<Triplet> selectModulators(Collection<Triplet> trips,
    Collection<String> modulators)
  {
    List<Triplet> list = new ArrayList<Triplet>();

    for (String modulator : modulators)
    {
      list.addAll(selectModulator(trips, modulator));
    }
    return list;
  }

  public static List<Triplet> selectFactors(Collection<Triplet> trips, Collection<String> factors)
  {
    List<Triplet> list = new ArrayList<Triplet>();

    for (String factor : factors)
    {
      list.addAll(selectFactor(trips, factor, true));
    }
    return list;
  }

  public static void printTargets(Collection<Triplet> trips)
  {
    Map<String, String> map = HGNCParser.getGeneToSymbolMap();

    TermCounter cnt = new TermCounter();
    for (Triplet t : trips)
    {
      String sym = map.get(t.target);
      cnt.addTerm(sym != null ? sym : t.target);
    }
    cnt.print();
  }

  public static List<Triplet> filterToLogicalAND(Collection<Triplet> trips)
  {
    List<Triplet> list = new ArrayList<Triplet>();

    for (Triplet t : trips)
    {
      // If OR relations were not discarded before, keep them.
      // Just filter out other non-modulation cases.
      if (t.categ != null && t.categ.startsWith("OR")) list.add(t);

      else if (t.isLogicalAND()) list.add(t);
    }
    return list;
  }

  public static List<Triplet> filterToMonotonic(Collection<Triplet> trips)
  {
    List<Triplet> list = new ArrayList<Triplet>();

    for (Triplet t : trips)
    {
      if (!Difference.complexBetaM(t, 0.05)) list.add(t);
    }
    return list;
  }

  public static List<Triplet> filterToPvalGamma(Collection<Triplet> trips, double thr)
  {
    List<Triplet> list = new ArrayList<Triplet>();

    for (Triplet t : trips)
    {
      if (t.pvalGamma <= thr) list.add(t);
    }
    return list;
  }

  public static List<Triplet> filterToPvalDcmi(Collection<Triplet> trips, double thr)
  {
    List<Triplet> list = new ArrayList<Triplet>();

    for (Triplet t : trips)
    {
      if (t.pvalDcmi <= thr) list.add(t);
    }
    return list;
  }

  public static List<Triplet> filterToPvalBetaM(Collection<Triplet> trips, double thr)
  {
    return filterToPvalBetaM(trips, thr, false);
  }

  public static List<Triplet> filterToPvalBetaM(Collection<Triplet> trips, double thr, boolean keepor)
  {
    List<Triplet> list = new ArrayList<Triplet>();

    for (Triplet t : trips)
    {
      if (t.pvalBetaM <= thr)
      {
        list.add(t);
      }
      else if (keepor)
      {
        int[] n = Difference.calcTotals(t.cnt);
        double ampv = Difference.calcAlphaMpval(t.cnt, n);
        double afpv = Difference.calcAlphaFpval(t.cnt, n);

        if (ampv < thr && afpv < thr)
        {
          double[] p = Difference.calcProportions(t.cnt, n);
          double am = Difference.calcAlphaM(p);
          double af = Difference.calcAlphaF(p);
          double bm = Difference.calcBetaM(p);
          double bf = Difference.calcBetaF(p);

          if (am * af > 0 && Math.abs(am) > Math.abs(bm) && Math.abs(af) > Math.abs(bf))
          {
            list.add(t);
            t.categ = "OR_" + (am > 0 ? "ACTIVATION" : "INHIBITION");
          }
        }
      }
    }
    return list;
  }

  /**
   * Checks if all M-F cases present. Filters out triplets that do not have.
   *
   * @param trips
   * @return
   */
  public static List<Triplet> filterToStateExistence(Collection<Triplet> trips)
  {
    List<Triplet> list = new ArrayList<Triplet>();

    for (Triplet t : trips)
    {
      boolean allExists = true;

      for (int i = 0; i < 4; i++)
      {
        if (t.cnt[i] + t.cnt[i + 4] == 0)
        {
          allExists = false;
          break;
        }
      }

      if (allExists) list.add(t);
    }
    return list;
  }

  /**
   * Temporary filtering method for debugging.
   *
   * @param trips
   * @return
   */
  public static List<Triplet> filterTemp(Collection<Triplet> trips)
  {
    List<Triplet> list = new ArrayList<Triplet>();

    for (Triplet t : trips)
    {
      if (t.pvalGamma < 1)
        list.add(t);
    }
    return list;
  }

  /**
   * Selects a fixed number of most significant triplets.
   *
   * @param trips
   * @param size
   * @return
   */
  public static List<Triplet> filterHighestRanked(List<Triplet> trips, int size)
  {
    if (trips.size() <= size) return trips;

    orderPvalGamma(trips);

    List<Triplet> list = new ArrayList<Triplet>(size);

    for (int i = 0; i < size; i++)
    {
      list.add(trips.get(i));
    }

    System.out.println("Rank filtered first " + size + ", " +
      "lowest signif = " + trips.get(size - 1).pvalGamma);

    return list;
  }

  /**
   * Keeps only one M-F-T isoform, the one with the most significant and highest gamma.
   *
   * @param trips
   */
  public static void keepMostSignif(List<Triplet> trips)
  {
    Map<String, Double> mapPval = new HashMap<String, Double>();
    Map<String, Double> mapGamma = new HashMap<String, Double>();

    for (Triplet t : trips)
    {
      String key = t.getGeneSymbols();
      if (!mapPval.containsKey(key) || mapPval.get(key) > t.pvalGamma) mapPval.put(key, t.pvalGamma);
    }

    Iterator<Triplet> iter = trips.iterator();
    while (iter.hasNext())
    {
      Triplet t = iter.next();
      if (mapPval.get(t.getGeneSymbols()) < t.pvalGamma) iter.remove();
    }

    for (Triplet t : trips)
    {
      String key = t.getGeneSymbols();
      double gamma = Math.abs(Difference.calcGamma(t));
      if (!mapGamma.containsKey(key) || mapGamma.get(key) < gamma) mapGamma.put(key, gamma);
    }

    iter = trips.iterator();
    while (iter.hasNext())
    {
      Triplet t = iter.next();
      double gamma = Math.abs(Difference.calcGamma(t));
      if (mapGamma.get(t.getGeneSymbols()) > gamma) iter.remove();
    }
  }

  /**
   * Removes repeating M-F-T isforms, keeps only the first.
   *
   * @param trips
   */
  public static void keepFirstUnique(List<Triplet> trips)
  {
    Map<String, Triplet> map = new HashMap<String, Triplet>();

    for (Triplet t : trips)
    {
      String key = t.getGeneSymbols();
      if (!map.containsKey(key)) map.put(key, t);
    }

    trips.clear();
    trips.addAll(map.values());
  }

  /**
   * Removes any "MoA Insignificant" triplets.
   *
   * @param trips
   */
  public static void keepClassified(List<Triplet> trips)
  {
    Iterator<Triplet> iter = trips.iterator();
    while (iter.hasNext())
    {
      Triplet t = iter.next();
      if (t.categ.startsWith("MoA") || t.categ.startsWith("OR")) iter.remove();
    }
  }

  public double[] getConstants()
  {
    int[] n = Difference.calcTotals(cnt);
    double[] p = Difference.calcProportions(cnt, n);

    double am = Difference.calcAlphaM(p);
    double af = Difference.calcAlphaF(p);
    double g = Difference.calcGamma(this);
//    double ampv = Difference.calcAlphaMpval(cnt, n);
//    double afpv = Difference.calcAlphaFpval(cnt, n);
//    double gpv = Difference.calcGammaPval(this);
    return new double[]{am, af, g};
  }

  /**
   * Finds gamma pval thresold for the target FDR.
   *
   * @param trips
   * @param target_fdr targeted false discovery rate
   * @return pval threshold
   */
  public static double getPvalGammaThreshold(List<Triplet> trips, double target_fdr)
  {
    assert target_fdr > 0 && target_fdr < 1;

    trips = orderPvalGamma(trips);
    int size = trips.size();

    double thr = 0;
    int i = 0;
    for (Triplet t : trips)
    {
      i++;
      thr = t.pvalGamma;
      double fd = t.pvalGamma * size;
      double fdr = fd / i;

      if (fdr >= target_fdr)
      {
        return thr;
      }
    }
    return thr;
  }

  /**
   * Finds the betaM pval threshold for the targeted FDR.
   *
   * @param trips
   * @param target_fdr targeted false discovery rate
   * @return pval threshold
   */
  public static double getPvalBetaMThreshold(List<Triplet> trips, double target_fdr)
  {
    assert target_fdr > 0 && target_fdr < 1;

    trips = orderPvalBetaM(trips);
    int size = trips.size();

    double thr = 0;

    int i = 0;
    for (Triplet t : trips)
    {
      i++;
      thr = t.pvalBetaM;
      double fd = t.pvalBetaM * size;
      double fdr = fd / i;

      if (fdr >= target_fdr)
      {
        return thr - 0.00000001;
      }
    }
    return thr;
  }

  /**
   * Returns debug triplets.
   *
   * @param trips
   * @return
   */
  public static List<Triplet> getDebug(List<Triplet> trips)
  {
    List<Triplet> list = new ArrayList<Triplet>();

    for (Triplet t : trips)
    {
      if (t.isDebug()) list.add(t);
    }
    return list;
  }

  /**
   * Replaces Entrez Gene IDs and GenBank IDs with Excel hyperlinks for navigation.
   */
  public void replaceIDsWithURL()
  {
    mod_id = geneToSymbolMap.get(modulator);
    fac_id = geneToSymbolMap.get(factor);
    tar_id = geneToSymbolMap.get(target);
//    modulator = getGeneHyperlink(modulator);
//    factor = getGeneHyperlink(factor);
//    target = getGeneHyperlink(target);
//    mod_id = getGBHyperlink(mod_id);
//    fac_id = getGBHyperlink(fac_id);
//    tar_id = getGBHyperlink(tar_id);
  }

  /**
   * Replaces Entrez Gene IDs and GenBank IDs with Excel hyperlinks for navigation.
   */
  public static void replaceIDsWithURL(Collection<Triplet> trips)
  {
    geneToSymbolMap = Triplet.getGeneToSymbolMap();

    for (Triplet t : trips)
    {
      t.replaceIDsWithURL();
    }
  }

  /**
   * Removes URL from gene IDs.
   *
   * @param trips
   */
  public static void restoreIDs(Collection<Triplet> trips)
  {
    for (Triplet t : trips)
    {
      t.backFromURLToIDs();
    }
  }

  /**
   * Prepares a hyperlink for the associated Entrez Gene ID.
   *
   * @param geneID
   * @return
   */
  private static String getGeneHyperlink(String geneID)
  {
    getGeneToSymbolMap();

    String link = "=HYPERLINK(\"http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?" +
      "db=gene&cmd=Retrieve&dopt=Graphics&list_uids=" + geneID + "\",\"";

    if (geneToSymbolMap.containsKey(geneID))
    {
      link += geneToSymbolMap.get(geneID);
    }
    else
    {
      link += geneID;
    }

    return link + "\")";
  }

  /**
   * Prepares a hyperlink for the given GenBank ID.
   *
   * @param gbID
   * @return
   */
  private static String getGBHyperlink(String gbID)
  {
    return "=HYPERLINK(\"http://www.ncbi.nlm.nih.gov/entrez/viewer.fcgi?val=" +
      gbID.substring(0, gbID.indexOf("|")) + "\",\"" + gbID + "\")";
  }

  //----------------------------------------------------------------------------------------------
  // Debug code
  //----------------------------------------------------------------------------------------------

  private static void printTarsWithMostFactors()
  {
    Map<String, Set<String>> tar2facs = new HashMap<String, Set<String>>();
    List<Triplet> trips = readTrips("result/All_fdr0.05_var1.0.xls");
    for (Triplet t : trips)
    {
      t.backFromURLToSymbol();
      if (!tar2facs.containsKey(t.target)) tar2facs.put(t.target, new HashSet<String>());
      tar2facs.get(t.target).add(t.factor);
    }

    TermCounter cnt = new TermCounter();

    for (String tar : tar2facs.keySet())
    {
      for (String fac : tar2facs.get(tar))
      {
        cnt.addTerm(tar);
      }
    }

    cnt.print();
  }

  private static void printFT()
  {
    List<Triplet> trips = readTripsAndAssociate("result/All_fdr0.05_var1.0.xls",
      "resource/experiments_expO_1.txt", "resource/experiments_expO_2.txt");

    Triplet.orderPvalGamma(trips);
    for (Triplet t : trips)
    {
      Histogram2D h1 = new Histogram2D(0.2);
      h1.setName(Triplet.getGeneToSymbolMap().get(t.F.geneid) + " -> " +
        Triplet.getGeneToSymbolMap().get(t.T.geneid) + " in " +
        Triplet.getGeneToSymbolMap().get(t.M.geneid) + "-");

      Histogram2D h2 = new Histogram2D(0.2);
      h2.setName(Triplet.getGeneToSymbolMap().get(t.F.geneid) + " -> " +
        Triplet.getGeneToSymbolMap().get(t.T.geneid) + " in " +
        Triplet.getGeneToSymbolMap().get(t.M.geneid) + "+");

      for (int i = 0; i < t.F.value.length; i++)
      {
        if (t.M.status[i] == ABSENT)
          h1.count(t.F.value[i], t.T.value[i]);
        else if (t.M.status[i] == PRESENT)
          h2.count(t.F.value[i], t.T.value[i]);
      }

      h1.plot(false);
      h2.plot(true);
    }
  }

  private static void printFTEffect()
  {
    List<Triplet> trips = readTripsAndAssociate("result/All_fdr0.05_var1.0.xls",
      "resource/experiments_expO_1.txt", "resource/experiments_expO_2.txt");

    Random r = new Random();
    Histogram h = new Histogram(0.05);
    Set<String> counted = new HashSet<String>();

    for (Triplet t : trips)
    {
      Gene g1 = t.M;
      Gene g2 = trips.get(r.nextInt(trips.size())).M;

      if (g1 == g2) continue;
     
      String s = g1.id + g2.id;
      if (!counted.contains(s))
      {
        double eff = TripletGraphMLWriter.calcPairwiseEffect(g1, g2);
        h.count(eff);
        counted.add(s);
      }
    }
    h.printDensity();
  }

  private static void printFactorPresenceInLeukemia() throws IOException
  {
    List<Triplet> trips = readTripsAndAssociate("result/Big_all_fdr0.05_var1.0.xls",
      "resource/exp_big_1.txt", "resource/exp_big_2.txt", "resource/exp_big_3.txt", "resource/exp_big_4.txt");

    Set<Gene> factors = new HashSet<Gene>();
    for (Triplet t : trips)
    {
      factors.add(t.F);
    }

    boolean[] pos = CellTypeMatcher.getLeukemiaHitArrayForBigdata();


    for (Gene F : factors)
    {
      System.out.println(F.getPrintable() + "\t" + fmt.format(F.calcPresenceOnTissue(pos)));
    }

  }

  public static void printGammaAndSupportPlot()
  {
    List<Triplet> trips = readTrips("result/All_big_fdr0.05_var1.0.xls");

    Histogram2D h = new Histogram2D(0.05);
    for (Triplet t : trips)
    {
      double gamma = Difference.calcGamma(t);
      double support = Difference.getGammaSupport(t.cnt, true);
      h.count(gamma, (2 * support) - 1);
    }
    h.takeLog();
    h.plot();
  }

  public static void printSymInFile()
  {
    Set<String> set = Triplet.readGeneIDs("resource/factor-trips/AR.txt");

    for (String s : set)
    {
      if (getGeneToSymbolMap().containsKey(s)) System.out.println(getGeneToSymbolMap().get(s));
    }
  }

  public static void main(String[] args) throws Throwable
  {
    BufferedReader reader = new BufferedReader(new FileReader("test.txt"));

    for (String line = reader.readLine(); line != null; line = reader.readLine())
    {
      Map<String, String> s2g = Triplet.getSymbolToGeneMap();
      String id = s2g.containsKey(line) ? s2g.get(line) : "";
      System.out.println(line + "\t" + id);
    }

    reader.close();
  }
}
TOP

Related Classes of gem.Triplet

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.