Package gem

Source Code of gem.GEMAllPairs

package gem;

import gem.parser.HPRDParser;
import gem.parser.TabDelimitedFileParser;
import gem.util.Kronometre;
import gem.util.Progress;
import gem.util.SetUtils;

import java.util.*;

/**
* Main class to run the GEM implementation. Make sure that the constant <code>TRIPLET_FILE</code>
* points to the hypothesis file, where each row contains Entrez Gene IDs of triplets. The false
* discovery rate is controlled by the <code>FDR</code> constant. The constant <code<MINVAR</code>
* is for filtering genes with low variance in the expression dataset. Any triplet hypothesis
* containing a gene with variance under this threshold is filtered out.
*/
public class GEMAllPairs implements Constants
{
  public static final String FACNAME = "AR";

  // These sets of M, F, and T are used for debugging, for filtering triplets to the desired
  // M, F, or T.

  private static String[] MODULATOR = new String[]{};//"NR3C1"};
  private static String[] FACTOR = new String[]{};//"AR"};
  private static String[] TARGET = new String[]{};//"NOL3"};

  /**
   * Desired false discovery rate.
   */
  private static final double FDR = 0.01;

  /**
   * Variance threshold for genes. Any triplet that contains a gene with a lower variance will be
   * filtered out.
   */
  public static final double MINVAR = 10;

  /**
   * Name of the result file.
   */
  private static String RESULT_FILE = "result/Result" +
    "_fdr" + FDR + "_var" + MINVAR + "_" + FACNAME +
    (MODULATOR.length > 0 ? "_M_" + MODULATOR[0] : "") +
    (FACTOR.length > 0 ? "_F_" + FACTOR[0] : "") +
    (TARGET.length > 0 ? "_T_" + TARGET[0] : "") +
    "_allpairs_expo.xls";

  /**
   * Method that runs the analysis.
   */
  public void run() throws Throwable
  {
    Map<String,Set<String>> map =
      HPRDParser.readFor(new HashSet<String>(Arrays.asList(FACNAME)));
    Set<String> interactors = map.get(FACNAME);

    TabDelimitedFileParser parser =
      new TabDelimitedFileParser("resource/factors/" + FACNAME + ".txt");
    Set<String> targets = parser.getColumnSet(0);

    SetUtils.removeCommon(interactors, targets);

    // Remove ETV1-AR interaction from triplets. This is a false positive entry in HPRD. We
    // noticed it while investigating our results. GEM reported ETV1 as a strong modulator of
    // AR, but we could not find any evidence in the literature. Then we investigated the
    // evidence of ETV1-AR interaction in the HPRD database, and realized that the related paper
    // in HPRD databse is actually not related to ETV1. Dr. Renu Goel from HPRD approved that
    // ETV1-AR interaction is an incorrect entry in HPRD database.
    interactors.remove("ETV1");

    interactors.add(FACNAME);

    Map<String, String> s2g = Triplet.getSymbolToGeneMap();

    Set<String> mods = new HashSet<String>();
    for (String inter : interactors) if (s2g.containsKey(inter)) mods.add(s2g.get(inter));

    Set<String> tars = new HashSet<String>();
    for (String tar : targets) if (s2g.containsKey(tar)) tars.add(s2g.get(tar));

    // If the above processing didn't leave any triplet, return
    if (mods.isEmpty() || tars.isEmpty()) return;

    Set<String> ids = new HashSet<String>(mods);
    ids.addAll(tars);

    Map<String, Gene> id2gene = ExpDataReader.readGenes(ids, "resource/expdata/expo", MINVAR, 0.25);

    Map<String, Double> t2pv = new HashMap<String, Double>();

    double pv_thr = 5.418500919305291E-4;

    List <Triplet> trips = new ArrayList<Triplet>();

    Progress p = new Progress(mods.size());

    for (String u1 : mods)
    {
      p.tick();

      Gene U1 = id2gene.get(u1);
      if (U1 == null) continue;

      for (String u2 : mods)
      {
        if (u1.compareTo(u2) < 0)
        {
          Gene U2 = id2gene.get(u2);
          if (U2 == null) continue;

          for (String tar : tars)
          {
            Gene T = id2gene.get(tar);
            if (T == null) continue;

            Triplet t = new Triplet(U1, U2, T);
            CaseCounter.count(t);
            double pv = Difference.calcGammaPval(t);

            if (pv_thr > 0)
            {
              if (pv < pv_thr) trips.add(t);
            }
            else t2pv.put(u1 + " " + u2 + " " + tar, pv);
          }
        }
      }
    }

    if (pv_thr < 0)
    {
      System.out.println("t2pv.size() = " + t2pv.size());
      List<Double> pvals = new ArrayList<Double>(t2pv.values());
      System.out.println("pvals.size() = " + pvals.size());


      // Find the gamma significance thresold for the desired FDR
      pv_thr = findThreshold(pvals, FDR);
      System.out.println("Threshold gamma significance for fdr " + FDR + " = " + pv_thr);

      // Re-create significant triplets

      for (String key : t2pv.keySet())
      {
        double pv = t2pv.get(key);
        if (pv < pv_thr)
        {
          String[] id = key.split(" ");
          Triplet t = new Triplet(id2gene.get(id[0]), id2gene.get(id[1]), id2gene.get(id[2]));
          CaseCounter.count(t);
          trips.add(t);
        }
      }

      System.out.println("Re-created significant trips. Size = " + trips.size());
    }

    System.out.println("trips.size = " + trips.size());

    // Assign modulation categories
    TripletClassifier.assignClass(trips, 0.05);


    if (!trips.isEmpty())
    {
      //Record significant modulations to the result file
      Triplet.record(trips, RESULT_FILE);
    }
  }

  private static double findThreshold(List<Double> list, double fdr)
  {
    Collections.sort(list);
    int i = 0;
    for (Double pv : list)
    {
      i++;
      double fd = pv * list.size();
      double rate = fd / i;

      if (rate > fdr) return pv;
    }
    return 1;
  }

  public static void main(String[] args) throws Throwable
  {
    Kronometre kron = new Kronometre();
    GEMAllPairs m = new GEMAllPairs();
    m.run();
    kron.stop();
    kron.print();
  }
}
TOP

Related Classes of gem.GEMAllPairs

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.