Package gem

Source Code of gem.CellLineCorrecter

package gem;

import gem.parser.HPRDParser;
import gem.parser.TabDelimitedFileParser;
import gem.util.TermCounter;

import java.io.File;
import java.io.IOException;
import java.util.*;

/**
* @author Ozgun Babur
*/
public class CellLineCorrecter
{
  public static final double THR = 0.05;
  public static void main(String[] args) throws Throwable
  {
    printAndrogenApprovedTargets();
//    getAndrogenApprovedTrips();
//    filterSignsOfAndrRespTrips();

//    List<Triplet> trips = getCellCorrectedTrips();
//    Triplet.record(trips, "result/Result_fdr0.05_var10.0_AR_expo_select_LNCaP.xls");
  }

  public static List<Triplet> getCellCorrectedTrips() throws IOException
  {
    List<Triplet> trips = loadTrips();
    System.out.println("Initial trips size = " + trips.size());

    Set<Gene> upMods = new HashSet<Gene>();
    Set<Gene> dwMods = new HashSet<Gene>();

    for (Triplet t : trips)
    {
      if (upNames_LNCaP.contains(t.getMSym()))
      {
        upMods.add(t.M);
      }
      else if (dwNames_LNCaP.contains(t.getMSym()))
      {
        dwMods.add(t.M);
      }
    }

    for (Gene mod : new HashSet<Gene>(dwMods))
    {
      mod.rankAdjustStatus(1D/3);
      dwMods.remove(mod);
      dwMods.add(mod.getNegative());
    }

    Set<Gene> mods = new HashSet<Gene>();
    mods.addAll(upMods);
    mods.addAll(dwMods);

    List<Triplet> filtered = new ArrayList<Triplet>();

    for (Triplet t : trips)
    {
      double modVal = Difference.calcModulation(t);
      boolean contradicts = false;

      for (Gene mod : mods)
      {
        if (t.M.geneid.equals(mod.geneid)) continue;

        Triplet trip = new Triplet(t.M, mod, t.T);
        CaseCounter.count(trip);
        double mVal = Difference.calcModulation(trip);
        double pv = Difference.calcModulationPval(trip);

        if (pv < THR && modVal * mVal < 0)
        {
          contradicts = true;
          break;
        }
      }

      if (!contradicts) filtered.add(t);
    }
    System.out.println("Filtered size = " + filtered.size());
    return filtered;
  }

  public static List<Triplet> getAndrogenApprovedTrips() throws Throwable
  {
    TabDelimitedFileParser parser = new TabDelimitedFileParser("resource/factors/AR_andr.txt");
    Map<String, String> score = parser.getOneToOneMap("Target", "Score");

    String tripFile = "result/Result_fdr0.05_var10.0_AR_expo_select.xls";

    List<Triplet> trips = Triplet.readTrips(tripFile);
    for (Triplet t : trips) t.backFromURLToIDs();

    System.out.println("initial size = " + trips.size());

    List<Triplet> verified = new ArrayList<Triplet>();

    for (Triplet t : trips)
    {
      int sign = score.get(t.getTSym()).startsWith("-") ? -1 : 1;

      double pv1 = Difference.calcAlphaFpval(t);
      double pv2 = Difference.calcBetaFpval(t);

      int s1 = pv1 < THR ? Difference.calcAlphaF(t) > 0 ? 1 : -1 : 0;
      int s2 = pv2 < THR ? Difference.calcBetaF(t) > 0 ? 1 : -1 : 0;

      if (s1 == sign || s2 == sign) verified.add(t);
    }

    Triplet.record(verified, "result/Result_fdr0.05_var10.0_AR_expo_select_filtered.xls");
    System.out.println("filtered size = " + verified.size());
    return verified;
  }

  public static void printAndrogenApprovedTargets() throws Throwable
  {
//    String[] dirs = new String[]{"Ling", "vivek", "MSKCC", "GSE7868", "GSE7708", "GSE846"};
//    int[][] pairs = new int[][]{ {0, 1}, {2, 0},  {3, 2},  {0, 2}, {0, 1}, {0, 1}};

    String[] dirs = new String[]{"Ling", "GSE7868", "GSE7708", "GSE846"};
    int[][] pairs = new int[][]{ {0, 1}, {0, 2}, {0, 1}, {0, 1}};

    Map<String, Integer> count = new HashMap<String, Integer>();

    for (int i = 0; i < dirs.length; i++)
    {
      String dir = "resource/expdata/" + dirs[i] + "/";
      boolean[][] pos = StageAnalyzer.getPos(dir);
//      for (Gene gene : getARInteractors(dir))
      for (Gene gene : getARTargets(dir))
      {
        double pv = CellTypeMatcher.getChangePvalBetweenTissues(
          gene, pos[pairs[i][0]], pos[pairs[i][1]]);

        double x0 = CellTypeMatcher.getMeanValue(gene, pos[pairs[i][0]]);
        double x1 = CellTypeMatcher.getMeanValue(gene, pos[pairs[i][1]]);

        if (x0 < 20 && x1 < 20)
        {
          x0 = Math.exp(x0);
          x1 = Math.exp(x1);
        }

        double rat = x0/x1;

        if (pv < THR)// && (rat < 0.5 || rat > 2))
        {
          int ch = (int) Math.signum(CellTypeMatcher.getMeanChange(
            gene, pos[pairs[i][0]], pos[pairs[i][1]]));

          if (!count.containsKey(gene.id)) count.put(gene.id, ch);
          else count.put(gene.id, count.get(gene.id) + ch);
        }
      }
    }

    int i = 0;
    TermCounter tc = new TermCounter();
    for (String sym : count.keySet())
    {
      int score = count.get(sym);
      tc.addTerm("" + score);
      if (Math.abs(score) > 1)
      {
        System.out.println(sym + "\t" + score);
        i++;
      }
    }
    System.out.println("i = " + i);
    tc.print();
  }

  private static void filterSignsOfAndrRespTrips()
  {
    List<Triplet> trips = Triplet.readTrips("result/Result_fdr0.05_var10.0_AR_expo_andr.xls");
    TabDelimitedFileParser parser = new TabDelimitedFileParser("resource/factors/AR_andr.txt");
    Map<String, String> score = parser.getOneToOneMap("Target", "Score");

    List<Triplet> list = new ArrayList<Triplet>();

    for (Triplet t : trips)
    {
      t.backFromURLToIDs();
      int sign = score.get(Triplet.getGeneToSymbolMap().get(t.target)).startsWith("-") ?
        -1 : 1;

      double pv1 = Difference.calcAlphaFpval(t);
      double pv2 = Difference.calcBetaFpval(t);

      int s1 = pv1 < THR ? Difference.calcAlphaF(t) > 0 ? 1 : -1 : 0;
      int s2 = pv2 < THR ? Difference.calcBetaF(t) > 0 ? 1 : -1 : 0;

      if (s1 * sign == 1 || s2 * sign == 1)
      {
        list.add(t);
      }
    }
    Triplet.record(list, "result/Result_fdr0.05_var10.0_AR_expo_andr_clean.xls");
  }

  private static Collection<Gene> getARTargets(String dir) throws Throwable
  {
//    TabDelimitedFileParser parser = new TabDelimitedFileParser("resource/factors/AR.txt");

    TabDelimitedFileParser parser = new TabDelimitedFileParser("resource/NuclearReceptors.txt");
    Set<String> targets = parser.getColumnSet(0);
//
    Set<String> ids = new HashSet<String>();

    for (String sym : targets)
    {
      if (Triplet.getSymbolToGeneMap().containsKey(sym))
        ids.add(Triplet.getSymbolToGeneMap().get(sym));
    }

//    ids.addAll(Triplet.getGeneToSymbolMap().keySet());

    Map<String, Gene> geneMap;

    File plat = new File(dir + "platform.txt");
    if (!plat.exists()) geneMap = CrossPlatformMapper.fetchGenes(ids, dir + "data.txt");
    else geneMap = CrossPlatformMapper.fetchGenes(ids, dir + "data.txt", plat.getPath());

    return geneMap.values();
  }

  private static Collection<Gene> getARInteractors(String dir) throws Throwable
  {
    Map<String, Set<String>> map = HPRDParser.readFor(new HashSet<String>(Arrays.asList("AR")));
    Set<String> ids = new HashSet<String>();
    for (String modSym : map.get("AR"))
    {
      String id = Triplet.getSymbolToGeneMap().get(modSym);
      if (id != null) ids.add(id);
    }
    File plat = new File(dir + "platform.txt");
    Map<String, Gene> geneMap;
    if (!plat.exists()) geneMap = CrossPlatformMapper.fetchGenes(ids, dir + "data.txt");
    else geneMap = CrossPlatformMapper.fetchGenes(ids, dir + "data.txt", plat.getPath());

    return geneMap.values();
  }

  static final List<String> upNames_LNCaP = Arrays.asList("AES", "AKT1", "CCNH", "CDC37", "CDK1", "CDK7", "CDK9", "DAP3", "DAXX", "DCAF6", "DDC", "FOXA1", "GAPDH", "GNB2L1", "GTF2F1", "GTF2F2", "HDAC1", "HIPK3", "IDE", "KAT5", "MYST2", "NCOR1", "NONO", "NR0B2", "NSD1", "PA2G4", "PELP1", "PIAS2", "PRMT1", "PRPF6", "RAD54L2", "RAF1", "RCHY1", "SART3", "SMAD4", "SPDEF", "STUB1", "TSG101", "UBE2I", "UBE3A", "XRCC5");
  static final List<String> dwNames_LNCaP = Arrays.asList("AHR", "APOL2", "CASP1", "CASP3", "CASP8", "CDC25B", "CDK6", "CTDSP2", "FHL2", "FLNA", "FOXO1", "GAK", "GSN", "GTF2H1", "HIF1A", "HMGB1", "IFI16", "IL6ST", "JMJD1C", "JUN", "MED14", "NCOA4", "NISCH", "NR2C1", "NR2C2", "NR3C1", "PAK6", "PIAS1", "PTEN", "RBAK", "RNASEL", "RUNX1", "SMAD1", "SP1", "TCF4", "TGFB1I1", "TGIF1", "TRIP4", "UXT", "WIPI1");

  static final List<String> upNames_prost = Arrays.asList("AES", "AKT1", "CCNH", "DCAF6", "FLNA", "FOXA1", "GTF2F1", "HDAC1", "IDE", "IL6ST", "JMJD1C", "JUN", "KAT2B", "NCOA4", "NISCH", "PIAS1", "RCHY1", "SMAD4", "SPDEF", "STAT3", "TGFB1I1");
  static final List<String> dwNames_prost = Arrays.asList("AHR", "BRCA1", "CASP1", "CASP3", "CASP8", "CDC25B", "CDK1", "CTDSP2", "DAXX", "DDC", "FOXO1", "GAPDH", "IFI16", "NR2C1", "PA2G4", "PRMT1", "PSMC3IP", "RB1", "RUNX1", "TCF4", "UBE2I", "XRCC5");

  public static List<Triplet> loadTrips() throws IOException
  {
    List<Triplet> trips = Triplet.readTrips("result/Result_fdr0.05_var10.0_AR_expo_select.xls");
    Triplet.removeNonModulation(trips);

    for (Triplet t : trips)
    {
      t.backFromURLToIDs();
    }

    trips = ExpDataReader.associate(trips, "resource/expdata/expo", 10, 0.25);
    return trips;
  }

}
TOP

Related Classes of gem.CellLineCorrecter

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.