Package gem

Source Code of gem.Pair

package gem;

import gem.parser.EntrezGeneParser;
import gem.parser.HPRDParser;
import gem.parser.TabDelimitedFileParser;
import gem.util.*;
import gem.util.Point;

import java.awt.*;
import java.io.*;
import java.util.*;
import java.util.List;

import static gem.StageAnalyzer.getPos;
import static gem.StageAnalyzer.intersects;

/**
* @author Ozgun Babur
*/
public class Tester implements Constants
{
  public static void main(String[] args) throws Throwable
  {
//    test2();
//    printCellLineDiff();
//    printClassCounts();
//    printProstateAndLNCapPresence();
//    printPSAResults();
//    printAverages();
    findActivityInOtherTissue();
//    doGEMForBreast();
  }

  private static void test1() throws IOException
  {
    String dir = "resource/expdata/MSKCC/";
    Map<String, String> g2s = Triplet.getGeneToSymbolMap();
    BufferedReader reader = new BufferedReader(new FileReader(dir + "data.txt"));

    BufferedWriter writer = new BufferedWriter(new FileWriter("mskcc_mapping.txt"));
    writer.write("ID\tSymbol\n");
    reader.readLine();
    for (String line = reader.readLine(); line != null; line = reader.readLine())
    {
      String id = line.substring(0, line.indexOf("\t"));
      writer.write(id + "\t" + g2s.get(id) + "\n");
    }

    reader.close();
    writer.close();
  }

  private static void test2() throws Throwable
  {
    String dir = "resource/expdata/MSKCC/";

//    TabDelimitedFileParser p = new TabDelimitedFileParser(dir + "platform.txt");
//    Map<String, String> id2sym = p.getOneToOneMap("ID", "Gene Symbol");

    Map<String, String> g2s = Triplet.getGeneToSymbolMap();

//    TabDelimitedFileParser p = new TabDelimitedFileParser("resource/factors/AR-select-small.txt");
    TabDelimitedFileParser p = new TabDelimitedFileParser("resource/NuclearReceptors.txt");
    Set<String> syms = p.getColumnSet(0);

    BufferedReader reader = new BufferedReader(new FileReader(dir + "data.txt"));

    int i = 0;
    for (String line = reader.readLine(); line != null; line = reader.readLine())
    {
      String id = line.substring(0, line.indexOf("\t"));

      if (g2s.containsKey(id) && syms.contains(g2s.get(id)))
//      if (id2sym.containsKey(id) && syms.contains(id2sym.get(id)))
      {
        System.out.print(i + ",");
      }
      i++;
    }

    reader.close();
  }

  public static double corRand(double x, double cor, Random r)
  {
    boolean negative = cor < 0;
    if (negative) cor = -cor;
    double num = (cor * x) + (Math.sqrt(1 - (cor * cor)) * r.nextDouble());
    if (negative) num = 1-num;
    return num;
  }

  private static List<String> getSortedNames(Set<String> modIDs)
  {
    Set<String> modNamesSet = new HashSet<String>();
    for (String modid : modIDs)
    {
      modNamesSet.add(Triplet.getGeneToSymbolMap().get(modid));
    }
    List<String> modNames = new ArrayList<String>(modNamesSet);
    Collections.sort(modNames);
    return modNames;
  }

  private static void test4() throws IOException
  {
    String dir = "resource/expdata/philip/";
    String id = "16621";
    Set<String> ids = new HashSet<String>(Arrays.asList(id));
    Map<String, Gene> geneMap = ExpDataReader.readGenes(ids, dir, 0, 0);
    Gene g = geneMap.get(id);
    boolean[][] pos = StageAnalyzer.getPos(dir);

    Histogram h = new Histogram(4);
    for (int i = 0; i < g.value.length; i++)
    {
      if (pos[4][i]) h.count(g.value[i]);
    }
    h.print();
  }

  private static void test5() throws IOException
  {
    File dir  = new File("mapping");
    BufferedWriter writer = new BufferedWriter(new FileWriter("resource/human2mouse_symbols.txt"));
    writer.write("Human\tMouse");

    for (File file : dir.listFiles())
    {
      BufferedReader reader = new BufferedReader(new FileReader(file));

      String hum = null;
      String mou = null;

      for (String line = reader.readLine(); !line.equals("  <tbody>"); line = reader.readLine());

      for (String line = reader.readLine(); line != null; line = reader.readLine())
      {
        if (line.startsWith("        <a href=\"http://www.ncbi.nlm.nih.gov/sites/entrez?db=gene&amp;cmd=retrieve&amp;dopt=full_report&amp;list_uids="))
        {
          assert hum == null || mou == null;
          String sym = line.substring(line.lastIndexOf("\">") + 2, line.lastIndexOf("<"));
          String id = line.substring(line.lastIndexOf("=") + 1, line.lastIndexOf("\""));
          if (hum == null)
          {
            assert mou == null;
            hum = id;
          }
          else
          {
            assert mou == null;
            mou = id;
          }
        }
        else if (line.startsWith("    </tr>"))
        {
          assert hum != null && mou != null;

          writer.write("\n" + hum + "\t" + mou);
        }
        else if (line.startsWith("    <tr"))
        {
          hum = null;
          mou = null;
        }
      }

      reader.close();
    }

    writer.close();

  }

  private static void test3() throws IOException
  {
    TabDelimitedFileParser parser = new TabDelimitedFileParser("resource/factors/AR_andr.txt");
    Map<String, String> score = parser.getOneToOneMap("Target", "Score");

    parser = new TabDelimitedFileParser("resource/factors/AR-select.txt");
    Set<String> select = parser.getColumnSet(0);

    List<String> up = new ArrayList<String>();
    List<String> dw = new ArrayList<String>();

    for (String s : select)
    {
      if (score.get(s).startsWith("-")) dw.add(s);
      else up.add(s);
    }
    System.out.println("up.size() = " + up.size());
    System.out.println("dw.size() = " + dw.size());

    Collections.sort(up);
    Collections.sort(dw);

    for (String s : up)
    {
      System.out.println(s);
    }
    System.out.println();
    for (String s : dw)
    {
      System.out.println(s);
    }
  }


  // Number of targets per modulator
  private static void test7() throws IOException
  {
    List<Triplet> trips = Triplet.readTrips("result/Result_fdr0.05_var10.0_AR_expo.xls");
    Triplet.removeNonModulation(trips);

    Map<String, Set<Triplet>> groups = new HashMap<String, Set<Triplet>>();

    for (Triplet t : trips)
    {
      t.backFromURLToIDs();
      if (!groups.containsKey(t.modulator)) groups.put(t.modulator, new HashSet<Triplet>());
      groups.get(t.modulator).add(t);
    }

//    printClassCounts(groups);

    Histogram h = new Histogram(50);
    for (String modid : groups.keySet())
    {
      int size = groups.get(modid).size();
      h.count(size);
//      if (size > 19)
      {
        System.out.println(Triplet.getGeneToSymbolMap().get(modid) + "\t" + size);
      }
    }
    h.print();
  }

  // Similarity of modulators
  private static void test8() throws IOException
  {
    List<Triplet> trips = Triplet.readTrips("result/Result_fdr0.01_var10.0_AR_expo.xls");
    Triplet.removeNonModulation(trips);

    Map<String, Set<String>> positive = new HashMap<String, Set<String>>();
    Map<String, Set<String>> negative = new HashMap<String, Set<String>>();

    for (Triplet t : trips)
    {
      t.backFromURLToIDs();
      if (!positive.containsKey(t.modulator)) positive.put(t.modulator, new HashSet<String>());
      if (!negative.containsKey(t.modulator)) negative.put(t.modulator, new HashSet<String>());
      if (Difference.calcGamma(t) < 0)
      {
        negative.get(t.modulator).add(t.target);
      }
      else
      {
        positive.get(t.modulator).add(t.target);
      }
    }

    class Pair implements Comparable
    {
      String m1;
      String m2;

      Integer similarity;
      Integer opposition;

      Double simScore;
      Double oppScore;

      int m1TarNum;

      Pair(String m1, String m2, Integer similarity, Integer opposition, int m1TarNum)
      {
        this.m1 = m1;
        this.m2 = m2;
        this.similarity = similarity;
        this.opposition = opposition;
        this.m1TarNum = m1TarNum;
        simScore = similarity / (double) m1TarNum;
        oppScore = opposition / (double) m1TarNum;
      }

      public int compareTo(Object o)
      {
        Pair p = (Pair) o;
        return p.getScore().compareTo(getScore());
      }

      @Override
      public String toString()
      {
        return m1 + "\t" + m2 + "\t" + fmt.format(simScore) + "\t" +
          fmt.format(oppScore);
      }

      public Double getScore()
      {
        if (similarity >= opposition) return simScore;
        else return -oppScore;
      }
    }

    List<Pair> pairs = new ArrayList<Pair>();
    Set<String> processed = new HashSet<String>();
    for (String mod1 : positive.keySet())
    {
      String symb1 = Triplet.getGeneToSymbolMap().get(mod1);
      for (String mod2 : positive.keySet())
      {
        if (mod1.equals(mod2) || processed.contains(mod1 + mod2)) continue;

        String symb2 = Triplet.getGeneToSymbolMap().get(mod2);

        int poscomm = SetUtils.countCommon(positive.get(mod1), positive.get(mod2));
        int negcomm = SetUtils.countCommon(negative.get(mod1), negative.get(mod2));
        int diff1 = SetUtils.countCommon(positive.get(mod1), negative.get(mod2));
        int diff2 = SetUtils.countCommon(negative.get(mod1), positive.get(mod2));

        pairs.add(new Pair(symb1, symb2, poscomm + negcomm, diff1 + diff2,
          positive.get(mod1).size() + negative.get(mod2).size()));
        processed.add(mod1 + mod2);
      }
    }
    Collections.sort(pairs);

    for (Pair p : pairs)
    {
//      System.out.println(p);
    }

    BufferedWriter writer = new BufferedWriter(new FileWriter("temp.graphml"));
    GraphML.writeHeader(writer);

    double thr = 0.35;
    processed.clear();
    for (Pair p : pairs)
    {
      if (p.simScore > thr)
      {
        if (!processed.contains(p.m1))
        {
          writer.write(GraphML.createNodeData(p.m1, p.m1, Color.WHITE, 0, true));
          processed.add(p.m1);
        }
        if (!processed.contains(p.m2))
        {
          writer.write(GraphML.createNodeData(p.m2, p.m2, Color.WHITE, 0, true));
          processed.add(p.m2);
        }

        int c = (int) (256 - (p.simScore * 256 * 2));
        if (c < 0) c = 0;
        writer.write(GraphML.createEdgeData(p.m1, p.m2, new Color(c, c, 250), true, true));
      }
//      if (p.opposition > thr)
//      {
//        int c = (int) (256 - (p.oppScore * 256));
//        if (c < 0) c = 0;
//        writer.write(GraphML.createEdgeData(p.m1, p.m2, new Color(255, c, c), true, true));
//      }
    }

    GraphML.writeFooter(writer);
    writer.close();
  }

  private static void printClassCounts()
  {
    List<Triplet> trips = Triplet.readTrips("result/Result_fdr0.05_var10.0_AR_expo_select.xls");
    Map<String, Set<Triplet>> map = new HashMap<String, Set<Triplet>>();
//    trips = filterToSign(trips, -1);

//    Iterator<Triplet> iter = trips.iterator();
//    while (iter.hasNext())
//    {
//      Triplet t =  iter.next();
//      t.backFromURLToIDs();
//      if (!t.getTSym().equals("KLK3")) iter.remove();
//    }
    for (Triplet t : trips)
    {
      t.backFromURLToIDs();
      if (!map.containsKey(t.target)) map.put(t.target, new HashSet<Triplet>());
      map.get(t.target).add(t);
    }
    printClassCounts(map);
  }

  private static List<Triplet> filterToSign(List<Triplet> trips, int sign)
  {
    TabDelimitedFileParser parser = new TabDelimitedFileParser("resource/factors/AR_andr.txt");
    Map<String, String> score = parser.getOneToOneMap("Target", "Score");

    List<Triplet> list = new ArrayList<Triplet>();

    for (Triplet t : trips)
    {
      int s = score.get(t.getTSym()).startsWith("-") ? -1 : 1;
      if (s == sign) list.add(t);
    }
    return list;
  }

  // Text form of histo-plot
  private static void printClassCounts(Map<String, Set<Triplet>> map)
  {
    String[] types = new String[]{
      ENHANCES_ACTIVATION, ATTENUATES_ACTIVATION, INVERTS_ACTIVATION,    XOR_ACTIVATE, OR_ACTIVATE, FMOD_ACTIVATE,
      ENHANCES_INHIBITION, ATTENUATES_INHIBITION, INVERTS_INHIBITION,   XOR_INHIBIT, OR_INHIBIT, FMOD_INHIBIT,
      MOA_INSIGNIFICANT};

    TabDelimitedFileParser p = new TabDelimitedFileParser("resource/factors/AR_andr_small.txt");
    Map<String, String> score = p.getOneToOneMap("Target", "Score");

    List<String> names = getSortedNames(map.keySet());
    TermCounter tc = new TermCounter();

    for (String name : names)
    {
      String id = Triplet.getSymbolToGeneMap().get(name);
      Set<Triplet> trips = map.get(id);

      int[] cnt = new int[types.length];
      for (Triplet t : trips)
      {
        for (int i = 0; i < types.length; i++)
        {
          if (t.categ.equals(types[i]))
          {
            cnt[i]++;
            break;
          }
        }
      }
     
      double up = cnt[0] + cnt[1] + cnt[2] + cnt[3] + cnt[4] + cnt[5];
      double down = cnt[6] + cnt[7] + cnt[8] + cnt[9] + cnt[10] + cnt[11];

      int sign = score.get(name).startsWith("-") ? -1 : 1;
      int sign2 = up > down ? 1 : -1;
      String status = (sign2 * sign > 0) ? "good" : "bad";
      tc.addTerm(status);

      System.out.println(name + "\t" + cnt[0] + "\t" + cnt[1] + "\t" + cnt[2] + "\t\t" + cnt[3] + "\t" + cnt[4] + "\t" + cnt[5] + "\t\t" + status +
        "\n\t" + cnt[6] + "\t" + cnt[7] + "\t" + cnt[8] + "\t\t" + cnt[9] + "\t" + cnt[10] + "\t" + cnt[11] +"\t\t" + cnt[12] + "\n");
    }
    tc.print();
  }

  static Set<String> lowSet = new HashSet<String>(Arrays.asList("AHR", "CASP1", "CDK6", "EFCAB6", "FHL2", "FLNA", "GRIP1", "IFI16", "NR3C1", "PAK6", "RUNX1", "TCF4", "TGFB1I1"));

  public static void calcDependencyMatrix() throws IOException
  {
    List<Triplet> trips = Triplet.readTrips("result/Result_fdr0.01_var10.0_AR_expo.xls");
    for (Triplet t : trips) t.backFromURLToIDs();
    trips = ExpDataReader.associate(trips, "resource/expdata/expo", 0, 0);
    for (Triplet t : trips) CaseCounter.adjustStatus(t);
    calcDependencyMatrix(trips, 0.01);
  }
  /**
   * Trips should contain only one F. These must be the result triplets that were filtered by the
   * parameter gammaThr.
   */
  public static void calcDependencyMatrix(List<Triplet> trips, double pvalThr) throws IOException
  {
    Triplet.removeNonModulation(trips);

    List<Gene> mods = new ArrayList<Gene>();
    Map<Gene, Integer> sizes = new HashMap<Gene, Integer>();

    for (Triplet t : trips)
    {
      if (!mods.contains(t.M)) mods.add(t.M);

      if (!sizes.containsKey(t.M)) sizes.put(t.M, 1);
      else sizes.put(t.M, sizes.get(t.M) + 1);
    }

    Collections.sort(mods);
    int[][] dep = new int[mods.size()][mods.size()];

//    Map<Gene, Set<Gene>> tarsUnex = new HashMap<Gene, Set<Gene>>();

    for (Triplet trip : trips)
    {
      double val_orig = Difference.calcModulation(trip);
      double pval_orig = Difference.calcModulationPval(trip);

      if (pval_orig > pvalThr) continue;

      for (Gene mod : mods)
      {
        if (mod == trip.M) continue;

        Gene neg = mod.getNegative();

        Triplet t = new Triplet(trip.M, neg, trip.T);
        CaseCounter.count(t);

        double val = Difference.calcModulation(t);
        if (val * val_orig < 0) continue;

        double pval = Difference.calcModulationPval(t);
        if (pval < pvalThr)
        {
          dep[mods.indexOf(trip.M)][mods.indexOf(mod)] ++;

//          if (lowSet.contains(mod.getSymbol()))
//          {
//            if (!tarsUnex.containsKey(t.M)) tarsUnex.put(t.M, new HashSet<Gene>());
//
//            tarsUnex.get(t.M).add(trip.T);
//          }
        }
      }
    }

//    System.out.println("-----------");
//    for (Gene mod : tarsUnex.keySet())
//    {
//      double ratio = tarsUnex.get(mod).size() / (double) sizes.get(mod);
//      System.out.println(mod.getSymbol() + "\t" + tarsUnex.get(mod).size() + "\t" + sizes.get(mod) + "\t" + ratio);
//    }
//    System.out.println("-----------");
//    if (true) return;

    double[][] rat = new double[dep.length][dep.length];

    for (int i = 0; i < dep.length; i++)
    {
      int size = sizes.get(mods.get(i));
      for (int j = 0; j < dep.length; j++)
      {
        if (i == j) continue;

        rat[i][j] = dep[i][j] / (double) size;
      }
    }

    for (Gene mod : mods)
    {
      System.out.print("\t" + mod.getSymbol());
    }
    for (int i = 0; i < rat.length; i++)
    {
      System.out.print("\n" + mods.get(i).getSymbol());

      for (int j = 0; j < rat.length; j++)
      {
        System.out.print("\t" + fmt.format(rat[i][j]));
      }
    }

    BufferedWriter writer = new BufferedWriter(new FileWriter("dependency_positive.graphml"));
    GraphML.writeHeader(writer);

    double thr = 0.5;

    for (Gene mod : mods)
    {
      String sym = mod.getSymbol();

      int i = 0;
      Color color = i == 1 ? new Color(200, 200, 255) : i == 2 ? new Color(255, 200, 200) : Color.WHITE;

      writer.write(GraphML.createNodeData(sym, sym, color, i, true));
    }

    for (int i = 0; i < rat.length; i++)
    {
      for (int j = 0; j < rat.length; j++)
      {
        if (rat[i][j] > thr)
        {
          int c = (int) (256 - (((rat[i][j]-thr) / (1 - thr)) * 256)) - 100;
          if (c < 0) c = 0;
          Color edgeColor = new Color(c, 200, c); // green
//          Color edgeColor = new Color(200, c, c); // red
          writer.write(GraphML.createEdgeData(mods.get(i).getSymbol(), mods.get(j).getSymbol(), edgeColor, true, true));

        }
      }
    }
    GraphML.writeFooter(writer);
    writer.close();
  }


  static Set<String> enh = new HashSet<String>(Arrays.asList("SPDEF", "PAK6", "FOXA1", "RUNX1", "TGFB1I1", "APOL2", "NSD1", "NR5A1", "PSMC3IP", "GRIP1"));
  static Set<String> att = new HashSet<String>(Arrays.asList("FHL2", "IFI16", "CDK1", "NRIP1", "GAPDH", "CDC25B", "CDK6", "AHR", "TCF4", "CASP1", "DDC", "BRCA1", "PRMT1", "NR3C1", "EGFR", "PNRC1", "MDM2"));
  private static int getNodeCluster(String s)
  {
    if (enh.contains(s)) return 1;
    else if (att.contains(s)) return 2;
    else return 0;
  }

  public static void printExpressionHisto() throws Throwable
  {
    String dir = "resource/expdata/Ling/";
    List<Triplet> trips = StageAnalyzer.readTrips(dir);
    boolean[][] pos = StageAnalyzer.getPos(dir);

//    String[] cellname = new String[]{"PC3", "DU145", "LNCaP", "22Rv", "WPMY1", "VCaP", "MDAPCa2b", "HPV7", "HPV10", "RWPE1", "RWPE2", "NB11", "W99", "PWR1E", "DUCaP", "NB26"};
    String[] cellname = new String[]{"DU145", "LNCaP", "LNCaP104R", "LNCaP104S", "PC3", "Vcap"}; // from indexes 179 to 184

    Map<String, Gene> map = new HashMap<String, Gene>();

    for (Triplet t : trips)
    {
      map.put(t.getMSym(), t.M);
    }

    List<String> names = new ArrayList<String>(map.keySet());
    Collections.sort(names);

    for (String name : names)
    {
      System.out.println("\n");
      Gene gene = map.get(name);

      System.out.println(gene.getSymbol() + "\n");

      Histogram h = new Histogram(1);
      int start = 179;
      for (int i = 0; i < gene.value.length; i++)
      {
        if (i >= start) System.out.println(cellname[i - start] + "\t" + gene.value[i]);

//      if (pos[1][i])
          h.count(gene.value[i]);
      }
      System.out.println();
      h.print();
    }
  }

  public static void printMostPopularTargets()
  {
    Set<String> conf = new HashSet<String>(Arrays.asList("CDK1", "DDC", "FLNA", "GAPDH", "PRMT1", "TGFB1I1"));
    List<Triplet> trips = Triplet.readTrips("result/Result_fdr0.05_var0.4_AR_expo.xls");
    Triplet.removeNonModulation(trips);

    System.out.println("trips.size() = " + trips.size());
   
    Set<String> tars = new HashSet<String>();
    Set<String> covd = new HashSet<String>();
    for (Triplet t : trips)
    {
      t.backFromURLToIDs();
      String s = t.getTSym();
      tars.add(s);
      if (conf.contains(t.getMSym())) covd.add(s);
    }
    System.out.println("covd = " + covd.size());
    System.out.println("tars = " + tars.size());
  }

  public static void printModPairInteraction() throws IOException
  {
    double thr = 0.05;
    String modName1 = "NRIP1";
    String modName2 = "TGFB1I1";
    Gene mod1 = null;
    Gene mod2 = null;
    List<Triplet> trips = DependencyCalculator.loadTrips();
    List<Gene> tars = new ArrayList<Gene>();

    for (Triplet t : trips)
    {
      tars.add(t.T);
      if (mod1 == null && t.getMSym().equals(modName1))
      {
        mod1 = t.M;
        if (mod2 != null) break;
      }
      if (mod2 == null && t.getMSym().equals(modName2))
      {
        mod2 = t.M;
        if (mod1 != null) break;
      }
    }

    List<Quad> quads1 = new ArrayList<Quad>();
    List<QuadOnMod> quads2 = new ArrayList<QuadOnMod>();
    List<Triplet> mmtrips = new ArrayList<Triplet>();
   
    for (Gene tar : tars)
    {
      Triplet t = new Triplet(mod1, mod2, tar);
      CaseCounter.adjustStatus(t);
      CaseCounter.count(t);
      if (Difference.calcGammaPval(t) < thr)
      {
        mmtrips.add(t);
      }
    }

    HashMap<String, Set<Triplet>> map = new HashMap<String, Set<Triplet>>();
    map.put(mod1.geneid, new HashSet<Triplet>(mmtrips));
    TripletClassifier.assignClass(mmtrips, thr);
    printClassCounts(map);

    CaseCounter.adjustStatus(mod1);

    for (Triplet t : trips)
    {
      if (t.getMSym().equals(modName2))
      {
        CaseCounter.adjustStatus(t);
        Quad q1 = new Quad(mod1, t.M, t.F, t.T);
        QuadOnMod q2 = new QuadOnMod(mod1, t.M, t.F, t.T);
        q1.count();
        q2.count();
        if (q1.getGammaPval() < thr)
        {
          quads1.add(q1);
        }
        if (q2.getGammaPval() < thr)
        {
          quads2.add(q2);
        }
      }
    }
    Quad.printCategDist(quads1);
    QuadOnMod.printCategDist(quads2);
  }

  public static void printGeneValues() throws IOException
  {
    String dir = "resource/expdata/GSE9633/";
    List<String> modNames = new ArrayList<String>(Arrays.asList((
      "AHR\n" +
        "AR\n" +
        "BRCA1\n" +
        "CASP1\n" +
        "CDK1\n" +
        "CDK6\n" +
        "DDC\n" +
        "EFCAB6\n" +
        "FHL2\n" +
        "FLNA\n" +
        "FOXA1\n" +
        "GAPDH\n" +
        "GRIP1\n" +
        "HIPK3\n" +
        "IFI16\n" +
        "NR0B2\n" +
        "NR3C1\n" +
        "NR5A1\n" +
        "NRIP1\n" +
        "NSD1\n" +
        "PAK6\n" +
        "PRMT1\n" +
        "PRPF6\n" +
        "RAD54L2\n" +
        "RUNX1\n" +
        "SPDEF\n" +
        "SRC\n" +
        "TCF4\n" +
        "TGFB1I1").split("\n")));

    Set<String> ids = new HashSet<String>();
    for (String name : modNames)
    {
      ids.add(Triplet.getSymbolToGeneMap().get(name));
    }
    Map<String, double[]> map = ExpDataReader.readSubset(ids, dir, 0, 0);

    System.out.println("\t" + FileUtil.getFirstLine(dir + "expnames.txt").replace("\"", ""));
    for (String name : modNames)
    {
      String id = Triplet.getSymbolToGeneMap().get(name);
      System.out.print(name);

      for (double v : map.get(id))
      {
        System.out.print("\t" + fmt.format(v));
      }
      System.out.println();
    }
  }

  private static void printCellLineDiff() throws IOException
  {
    HashSet<String> egids = new HashSet<String>();
    egids.add(Triplet.getSymbolToGeneMap().get("DAP3"));
    String dir = "resource/expdata/LNCaP/";
    Map<String, double[]> map = ExpDataReader.readSubset(egids, dir, 10, 0.25);
    double[] val = map.values().iterator().next();

    for (int i = 0; i < val.length; i++)
    {
      val[i] = Math.log(val[i]);
    }

    boolean[][] pos = StageAnalyzer.getPos(dir);

    double range = 0.5;
    Histogram h = new Histogram(range);
    Histogram hc = new Histogram(range);
    Histogram hp = new Histogram(range);

    for (int i = 0; i < val.length; i++)
    {
      h.count(val[i]);
      if (pos[8][i]) hc.count(val[i]);
      if (pos[9][i]) hp.count(val[i]);
    }

    h.printDensity();
    System.out.println("------");
    hc.printDensity();
    System.out.println("------");
    hp.printDensity();
  }

  private static void printProstateAndLNCapPresence() throws IOException
  {
    List<Triplet> trips = Triplet.readTrips("result/Result_fdr0.01_var10.0_AR_expo_LNCaP.xls");
    List<String> modIDs = new ArrayList<String>();
    Map<String, Integer> targcnt = new HashMap<String, Integer>();
    for (Triplet t : trips)
    {
      t.backFromURLToIDs();
      if (!modIDs.contains(t.modulator)) modIDs.add(t.modulator);
      if (!targcnt.containsKey(t.modulator)) targcnt.put(t.modulator, 0);
      if (t.isModulation()) targcnt.put(t.modulator, targcnt.get(t.modulator) + 1);
    }
    String dir = "resource/expdata/LNCaP/";
    boolean[][] pos = StageAnalyzer.getPos(dir);
    Map<String, double[]> map = ExpDataReader.readSubset(new HashSet<String>(modIDs), dir, 10, 0.25);

//    String prlist = "";
//    String ablist = "";

    for (String modID : modIDs)
    {
      double[] vals = map.get(modID);
      Gene g = new Gene(modID, modID, vals.length);
      g.value = vals;
      g.rankAdjustStatus(1./3);
      String sym = Triplet.getGeneToSymbolMap().get(modID);
      double ln = g.calcPresenceOnTissue(pos[8]);
      double pr = g.calcPresenceOnTissue(pos[9]);

//      if (pr > 0.8) prlist += "\"" + sym + "\", ";
//      else if (pr < 0.2) ablist += "\"" + sym + "\", ";

      String x = (ln < 0.2 && pr > 0.8) || (ln > 0.8 && pr < 0.2) ? "X" : "";
      System.out.println( sym +
        "\t" + fmt.format(ln) + "\t" + fmt.format(pr) + "\t" + x + "\t" + targcnt.get(modID));
    }
//    System.out.println("prlist = " + prlist);
//    System.out.println("ablist = " + ablist);
   
  }

  public static void printPSAResults()
  {
    List<Triplet> trips = Triplet.readTrips("result/Result_fdr0.01_var10.0_AR_expo.xls");
    System.out.println("trips.size() = " + trips.size());

    String psaid = Triplet.getSymbolToGeneMap().get("KLK3");

    int modcnt = 0;
    for (Triplet t : trips)
    {
      t.backFromURLToIDs();

      if (t.isModulation()) modcnt++;

      if (t.target.equals(psaid))
      {
        System.out.println(Triplet.getGeneToSymbolMap().get(t.modulator) + "\t" + t.categ);
      }
    }
    System.out.println("modcnt = " + modcnt);
  }

  public static void printAverages() throws Throwable
  {
//    TabDelimitedFileParser p = new TabDelimitedFileParser("resource/factors/AR_andr_small.txt");
//    Map<String, String> score = p.getOneToOneMap("Target", "Score");
//    List<String> modList = new ArrayList<String>(score.keySet());

    List<String> modList = new ArrayList<String>(TabDelimitedFileParser.getColumnSet(
      "resource/NuclearReceptors.txt", 0));
    Collections.sort(modList);

//    List<String> modList = new ArrayList<String>();
//    modList.add("AR");
//    modList.add("NR3C1");

    Set<String> ids = new HashSet<String>();
    for (String g : modList) ids.add(Triplet.getSymbolToGeneMap().get(g));

    String dir = "resource/expdata/expo";
    Map<String, Gene> map = ExpDataReader.readGenes(ids, dir, 0, 0);
//    Map<String, Gene> map = CrossPlatformMapper.fetchGenes(ids, dir + "/data.txt");
    boolean[][] pos = getPos(dir + "/");

    String[] expname = FileUtil.getFirstLine(dir + "/stages.txt").replace("\"", "").split("\t");

    assert expname.length == pos.length :
      "expname.length = " + expname.length + "\tpos.length = " + pos.length;

    for (int i = 0; i < expname.length; i++)
    {
      System.out.print("\t" + expname[i]);
    }
    for (String sym : modList)
    {
      Gene g = map.get(Triplet.getSymbolToGeneMap().get(sym));
      if (g == null) continue;

      System.out.print("\n" + sym);

      for (int i = 0; i < pos.length; i++)
      {
        System.out.print("\t" + CellTypeMatcher.getMeanValue(g, pos[i]));
      }
//      System.out.print("\t" + score.get(sym));
    }
    System.out.println();
  }
 
  public static void findActivityInOtherTissue() throws Throwable
  {
//    TabDelimitedFileParser p = new TabDelimitedFileParser("resource/factors/AR-select-small.txt");
    TabDelimitedFileParser p = new TabDelimitedFileParser("resource/NuclearReceptors.txt");
    Set<String> tarNames = p.getColumnSet(0);
    tarNames.add("AR");
    Set<String> tarIDs = IDUtil.covertSym2Gene(tarNames);
    String dir = "resource/expdata/expo/";
    Map<String, Gene> geneMap = ExpDataReader.readGenes(tarIDs, dir, 0, 0);
    boolean[][] pos = StageAnalyzer.getPos(dir);
    String[] stgNm = StageAnalyzer.getStageNames(dir);
    int indBr = ArrayUtils.getIndexOf(stgNm, "Breast");
    int indPr = ArrayUtils.getIndexOf(stgNm, "Prostate");

    List<Gene> genes = Gene.sortWithSym(geneMap.values());
    Gene ar = genes.get(5);
    int cnt = 0;
    tarNames.clear();
    for (Gene g : genes)
    {
      g.takeLog();
      double globMean = g.calcMean();
      double meanPr = g.calcMean(pos[indPr]);
      double meanBr = g.calcMean(pos[indBr]);
      double varBr = g.calcVariance(pos[indBr]);
      boolean select = meanBr > globMean && varBr > 1;
     
      double cor = Pearson.calcCorrelation(g.value, ar.value, pos[indBr]);

      if (select)
      {
        System.out.println(g.getSymbol() + "\t" + globMean + "\t" +
          meanPr + "\t" + meanBr + "\t" + varBr + "\t" + cor);
        cnt++;
        tarNames.add(g.getSymbol());
      }
    }
    System.out.println("cnt = " + cnt);

    p = new TabDelimitedFileParser("resource/NuclearReceptors.txt");
    Set<String> nucres = p.getColumnSet(0);
//    TripletMaker.make("AR", HPRDParser.readFor(Collections.singleton("AR")).get("AR"), tarNames,
//    TripletMaker.make("AR", nucres, tarNames,
//      "resource/tartrips/breast-AR-trips.txt");

    Map<String, Gene> symMap = Gene.getSymMap(genes);
    Gene g = symMap.get("ESR1");
    Histogram h = g.getDistribution(pos[indBr], 0.5);
    h.print();
  }
 
  public static void doGEMForBreast() throws IOException
  {
    List<Triplet> trips = Triplet.readTrips("resource/tartrips/breast-AR-trips.txt");
    String dir = "resource/expdata/expo/";
    trips = ExpDataReader.associate(trips, dir, 0, 0);
    boolean[][] pos = StageAnalyzer.getPos(dir);
    String[] stgNm = StageAnalyzer.getStageNames(dir);
    int indBr = ArrayUtils.getIndexOf(stgNm, "Breast");

    Set<Gene> genes = Triplet.collectGenes(trips);
    for (Gene gene : genes)
    {
      gene.cropExps(pos[indBr]);
    }

    CaseCounter.count(trips, 1D / 3);
    trips = Triplet.filterToStateExistence(trips);

    // Calculate gamma
    Difference.assignGammaPval(trips);

    // Find the gamma significance thresold for the desired FDR
//    double pv_thr = Triplet.getPvalGammaThreshold(trips, 0.2);
//    System.out.println("pv_thr = " + pv_thr);

    // Filter out triplets with insignificant gamma
    trips = Triplet.filterToPvalGamma(trips, 0.05);

    // Assign modulation categories
    TripletClassifier.assignClass(trips, 0.1);
    Triplet.removeCateg(trips, Triplet.MOA_INSIGNIFICANT);
    Triplet.record(trips, "result/Breast_results.txt");
  }
 
  public static final String[] AR_TARS_BREAST = ("ATAD2\n" +
    "C1orf21\n" +
    "DHCR24\n" +
    "FAM174B\n" +
    "GNMT\n" +
    "IGF1R\n" +
    "KCNMA1\n" +
    "LIFR\n" +
    "MAK\n" +
    "ORM1\n" +
    "PMEPA1\n" +
    "SLC16A6\n" +
    "SNAI2\n" +
    "SOCS2\n" +
    "SORD\n" +
    "TMEM79\n" +
    "ZBTB16").split("\n");
}
TOP

Related Classes of gem.Pair

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.