package gem;
import gem.parser.EntrezGeneParser;
import gem.parser.HPRDParser;
import gem.parser.TabDelimitedFileParser;
import gem.util.*;
import gem.util.Point;
import java.awt.*;
import java.io.*;
import java.util.*;
import java.util.List;
import static gem.StageAnalyzer.getPos;
import static gem.StageAnalyzer.intersects;
/**
* @author Ozgun Babur
*/
public class Tester implements Constants
{
public static void main(String[] args) throws Throwable
{
// test2();
// printCellLineDiff();
// printClassCounts();
// printProstateAndLNCapPresence();
// printPSAResults();
// printAverages();
findActivityInOtherTissue();
// doGEMForBreast();
}
private static void test1() throws IOException
{
String dir = "resource/expdata/MSKCC/";
Map<String, String> g2s = Triplet.getGeneToSymbolMap();
BufferedReader reader = new BufferedReader(new FileReader(dir + "data.txt"));
BufferedWriter writer = new BufferedWriter(new FileWriter("mskcc_mapping.txt"));
writer.write("ID\tSymbol\n");
reader.readLine();
for (String line = reader.readLine(); line != null; line = reader.readLine())
{
String id = line.substring(0, line.indexOf("\t"));
writer.write(id + "\t" + g2s.get(id) + "\n");
}
reader.close();
writer.close();
}
private static void test2() throws Throwable
{
String dir = "resource/expdata/MSKCC/";
// TabDelimitedFileParser p = new TabDelimitedFileParser(dir + "platform.txt");
// Map<String, String> id2sym = p.getOneToOneMap("ID", "Gene Symbol");
Map<String, String> g2s = Triplet.getGeneToSymbolMap();
// TabDelimitedFileParser p = new TabDelimitedFileParser("resource/factors/AR-select-small.txt");
TabDelimitedFileParser p = new TabDelimitedFileParser("resource/NuclearReceptors.txt");
Set<String> syms = p.getColumnSet(0);
BufferedReader reader = new BufferedReader(new FileReader(dir + "data.txt"));
int i = 0;
for (String line = reader.readLine(); line != null; line = reader.readLine())
{
String id = line.substring(0, line.indexOf("\t"));
if (g2s.containsKey(id) && syms.contains(g2s.get(id)))
// if (id2sym.containsKey(id) && syms.contains(id2sym.get(id)))
{
System.out.print(i + ",");
}
i++;
}
reader.close();
}
public static double corRand(double x, double cor, Random r)
{
boolean negative = cor < 0;
if (negative) cor = -cor;
double num = (cor * x) + (Math.sqrt(1 - (cor * cor)) * r.nextDouble());
if (negative) num = 1-num;
return num;
}
private static List<String> getSortedNames(Set<String> modIDs)
{
Set<String> modNamesSet = new HashSet<String>();
for (String modid : modIDs)
{
modNamesSet.add(Triplet.getGeneToSymbolMap().get(modid));
}
List<String> modNames = new ArrayList<String>(modNamesSet);
Collections.sort(modNames);
return modNames;
}
private static void test4() throws IOException
{
String dir = "resource/expdata/philip/";
String id = "16621";
Set<String> ids = new HashSet<String>(Arrays.asList(id));
Map<String, Gene> geneMap = ExpDataReader.readGenes(ids, dir, 0, 0);
Gene g = geneMap.get(id);
boolean[][] pos = StageAnalyzer.getPos(dir);
Histogram h = new Histogram(4);
for (int i = 0; i < g.value.length; i++)
{
if (pos[4][i]) h.count(g.value[i]);
}
h.print();
}
private static void test5() throws IOException
{
File dir = new File("mapping");
BufferedWriter writer = new BufferedWriter(new FileWriter("resource/human2mouse_symbols.txt"));
writer.write("Human\tMouse");
for (File file : dir.listFiles())
{
BufferedReader reader = new BufferedReader(new FileReader(file));
String hum = null;
String mou = null;
for (String line = reader.readLine(); !line.equals(" <tbody>"); line = reader.readLine());
for (String line = reader.readLine(); line != null; line = reader.readLine())
{
if (line.startsWith(" <a href=\"http://www.ncbi.nlm.nih.gov/sites/entrez?db=gene&cmd=retrieve&dopt=full_report&list_uids="))
{
assert hum == null || mou == null;
String sym = line.substring(line.lastIndexOf("\">") + 2, line.lastIndexOf("<"));
String id = line.substring(line.lastIndexOf("=") + 1, line.lastIndexOf("\""));
if (hum == null)
{
assert mou == null;
hum = id;
}
else
{
assert mou == null;
mou = id;
}
}
else if (line.startsWith(" </tr>"))
{
assert hum != null && mou != null;
writer.write("\n" + hum + "\t" + mou);
}
else if (line.startsWith(" <tr"))
{
hum = null;
mou = null;
}
}
reader.close();
}
writer.close();
}
private static void test3() throws IOException
{
TabDelimitedFileParser parser = new TabDelimitedFileParser("resource/factors/AR_andr.txt");
Map<String, String> score = parser.getOneToOneMap("Target", "Score");
parser = new TabDelimitedFileParser("resource/factors/AR-select.txt");
Set<String> select = parser.getColumnSet(0);
List<String> up = new ArrayList<String>();
List<String> dw = new ArrayList<String>();
for (String s : select)
{
if (score.get(s).startsWith("-")) dw.add(s);
else up.add(s);
}
System.out.println("up.size() = " + up.size());
System.out.println("dw.size() = " + dw.size());
Collections.sort(up);
Collections.sort(dw);
for (String s : up)
{
System.out.println(s);
}
System.out.println();
for (String s : dw)
{
System.out.println(s);
}
}
// Number of targets per modulator
private static void test7() throws IOException
{
List<Triplet> trips = Triplet.readTrips("result/Result_fdr0.05_var10.0_AR_expo.xls");
Triplet.removeNonModulation(trips);
Map<String, Set<Triplet>> groups = new HashMap<String, Set<Triplet>>();
for (Triplet t : trips)
{
t.backFromURLToIDs();
if (!groups.containsKey(t.modulator)) groups.put(t.modulator, new HashSet<Triplet>());
groups.get(t.modulator).add(t);
}
// printClassCounts(groups);
Histogram h = new Histogram(50);
for (String modid : groups.keySet())
{
int size = groups.get(modid).size();
h.count(size);
// if (size > 19)
{
System.out.println(Triplet.getGeneToSymbolMap().get(modid) + "\t" + size);
}
}
h.print();
}
// Similarity of modulators
private static void test8() throws IOException
{
List<Triplet> trips = Triplet.readTrips("result/Result_fdr0.01_var10.0_AR_expo.xls");
Triplet.removeNonModulation(trips);
Map<String, Set<String>> positive = new HashMap<String, Set<String>>();
Map<String, Set<String>> negative = new HashMap<String, Set<String>>();
for (Triplet t : trips)
{
t.backFromURLToIDs();
if (!positive.containsKey(t.modulator)) positive.put(t.modulator, new HashSet<String>());
if (!negative.containsKey(t.modulator)) negative.put(t.modulator, new HashSet<String>());
if (Difference.calcGamma(t) < 0)
{
negative.get(t.modulator).add(t.target);
}
else
{
positive.get(t.modulator).add(t.target);
}
}
class Pair implements Comparable
{
String m1;
String m2;
Integer similarity;
Integer opposition;
Double simScore;
Double oppScore;
int m1TarNum;
Pair(String m1, String m2, Integer similarity, Integer opposition, int m1TarNum)
{
this.m1 = m1;
this.m2 = m2;
this.similarity = similarity;
this.opposition = opposition;
this.m1TarNum = m1TarNum;
simScore = similarity / (double) m1TarNum;
oppScore = opposition / (double) m1TarNum;
}
public int compareTo(Object o)
{
Pair p = (Pair) o;
return p.getScore().compareTo(getScore());
}
@Override
public String toString()
{
return m1 + "\t" + m2 + "\t" + fmt.format(simScore) + "\t" +
fmt.format(oppScore);
}
public Double getScore()
{
if (similarity >= opposition) return simScore;
else return -oppScore;
}
}
List<Pair> pairs = new ArrayList<Pair>();
Set<String> processed = new HashSet<String>();
for (String mod1 : positive.keySet())
{
String symb1 = Triplet.getGeneToSymbolMap().get(mod1);
for (String mod2 : positive.keySet())
{
if (mod1.equals(mod2) || processed.contains(mod1 + mod2)) continue;
String symb2 = Triplet.getGeneToSymbolMap().get(mod2);
int poscomm = SetUtils.countCommon(positive.get(mod1), positive.get(mod2));
int negcomm = SetUtils.countCommon(negative.get(mod1), negative.get(mod2));
int diff1 = SetUtils.countCommon(positive.get(mod1), negative.get(mod2));
int diff2 = SetUtils.countCommon(negative.get(mod1), positive.get(mod2));
pairs.add(new Pair(symb1, symb2, poscomm + negcomm, diff1 + diff2,
positive.get(mod1).size() + negative.get(mod2).size()));
processed.add(mod1 + mod2);
}
}
Collections.sort(pairs);
for (Pair p : pairs)
{
// System.out.println(p);
}
BufferedWriter writer = new BufferedWriter(new FileWriter("temp.graphml"));
GraphML.writeHeader(writer);
double thr = 0.35;
processed.clear();
for (Pair p : pairs)
{
if (p.simScore > thr)
{
if (!processed.contains(p.m1))
{
writer.write(GraphML.createNodeData(p.m1, p.m1, Color.WHITE, 0, true));
processed.add(p.m1);
}
if (!processed.contains(p.m2))
{
writer.write(GraphML.createNodeData(p.m2, p.m2, Color.WHITE, 0, true));
processed.add(p.m2);
}
int c = (int) (256 - (p.simScore * 256 * 2));
if (c < 0) c = 0;
writer.write(GraphML.createEdgeData(p.m1, p.m2, new Color(c, c, 250), true, true));
}
// if (p.opposition > thr)
// {
// int c = (int) (256 - (p.oppScore * 256));
// if (c < 0) c = 0;
// writer.write(GraphML.createEdgeData(p.m1, p.m2, new Color(255, c, c), true, true));
// }
}
GraphML.writeFooter(writer);
writer.close();
}
private static void printClassCounts()
{
List<Triplet> trips = Triplet.readTrips("result/Result_fdr0.05_var10.0_AR_expo_select.xls");
Map<String, Set<Triplet>> map = new HashMap<String, Set<Triplet>>();
// trips = filterToSign(trips, -1);
// Iterator<Triplet> iter = trips.iterator();
// while (iter.hasNext())
// {
// Triplet t = iter.next();
// t.backFromURLToIDs();
// if (!t.getTSym().equals("KLK3")) iter.remove();
// }
for (Triplet t : trips)
{
t.backFromURLToIDs();
if (!map.containsKey(t.target)) map.put(t.target, new HashSet<Triplet>());
map.get(t.target).add(t);
}
printClassCounts(map);
}
private static List<Triplet> filterToSign(List<Triplet> trips, int sign)
{
TabDelimitedFileParser parser = new TabDelimitedFileParser("resource/factors/AR_andr.txt");
Map<String, String> score = parser.getOneToOneMap("Target", "Score");
List<Triplet> list = new ArrayList<Triplet>();
for (Triplet t : trips)
{
int s = score.get(t.getTSym()).startsWith("-") ? -1 : 1;
if (s == sign) list.add(t);
}
return list;
}
// Text form of histo-plot
private static void printClassCounts(Map<String, Set<Triplet>> map)
{
String[] types = new String[]{
ENHANCES_ACTIVATION, ATTENUATES_ACTIVATION, INVERTS_ACTIVATION, XOR_ACTIVATE, OR_ACTIVATE, FMOD_ACTIVATE,
ENHANCES_INHIBITION, ATTENUATES_INHIBITION, INVERTS_INHIBITION, XOR_INHIBIT, OR_INHIBIT, FMOD_INHIBIT,
MOA_INSIGNIFICANT};
TabDelimitedFileParser p = new TabDelimitedFileParser("resource/factors/AR_andr_small.txt");
Map<String, String> score = p.getOneToOneMap("Target", "Score");
List<String> names = getSortedNames(map.keySet());
TermCounter tc = new TermCounter();
for (String name : names)
{
String id = Triplet.getSymbolToGeneMap().get(name);
Set<Triplet> trips = map.get(id);
int[] cnt = new int[types.length];
for (Triplet t : trips)
{
for (int i = 0; i < types.length; i++)
{
if (t.categ.equals(types[i]))
{
cnt[i]++;
break;
}
}
}
double up = cnt[0] + cnt[1] + cnt[2] + cnt[3] + cnt[4] + cnt[5];
double down = cnt[6] + cnt[7] + cnt[8] + cnt[9] + cnt[10] + cnt[11];
int sign = score.get(name).startsWith("-") ? -1 : 1;
int sign2 = up > down ? 1 : -1;
String status = (sign2 * sign > 0) ? "good" : "bad";
tc.addTerm(status);
System.out.println(name + "\t" + cnt[0] + "\t" + cnt[1] + "\t" + cnt[2] + "\t\t" + cnt[3] + "\t" + cnt[4] + "\t" + cnt[5] + "\t\t" + status +
"\n\t" + cnt[6] + "\t" + cnt[7] + "\t" + cnt[8] + "\t\t" + cnt[9] + "\t" + cnt[10] + "\t" + cnt[11] +"\t\t" + cnt[12] + "\n");
}
tc.print();
}
static Set<String> lowSet = new HashSet<String>(Arrays.asList("AHR", "CASP1", "CDK6", "EFCAB6", "FHL2", "FLNA", "GRIP1", "IFI16", "NR3C1", "PAK6", "RUNX1", "TCF4", "TGFB1I1"));
public static void calcDependencyMatrix() throws IOException
{
List<Triplet> trips = Triplet.readTrips("result/Result_fdr0.01_var10.0_AR_expo.xls");
for (Triplet t : trips) t.backFromURLToIDs();
trips = ExpDataReader.associate(trips, "resource/expdata/expo", 0, 0);
for (Triplet t : trips) CaseCounter.adjustStatus(t);
calcDependencyMatrix(trips, 0.01);
}
/**
* Trips should contain only one F. These must be the result triplets that were filtered by the
* parameter gammaThr.
*/
public static void calcDependencyMatrix(List<Triplet> trips, double pvalThr) throws IOException
{
Triplet.removeNonModulation(trips);
List<Gene> mods = new ArrayList<Gene>();
Map<Gene, Integer> sizes = new HashMap<Gene, Integer>();
for (Triplet t : trips)
{
if (!mods.contains(t.M)) mods.add(t.M);
if (!sizes.containsKey(t.M)) sizes.put(t.M, 1);
else sizes.put(t.M, sizes.get(t.M) + 1);
}
Collections.sort(mods);
int[][] dep = new int[mods.size()][mods.size()];
// Map<Gene, Set<Gene>> tarsUnex = new HashMap<Gene, Set<Gene>>();
for (Triplet trip : trips)
{
double val_orig = Difference.calcModulation(trip);
double pval_orig = Difference.calcModulationPval(trip);
if (pval_orig > pvalThr) continue;
for (Gene mod : mods)
{
if (mod == trip.M) continue;
Gene neg = mod.getNegative();
Triplet t = new Triplet(trip.M, neg, trip.T);
CaseCounter.count(t);
double val = Difference.calcModulation(t);
if (val * val_orig < 0) continue;
double pval = Difference.calcModulationPval(t);
if (pval < pvalThr)
{
dep[mods.indexOf(trip.M)][mods.indexOf(mod)] ++;
// if (lowSet.contains(mod.getSymbol()))
// {
// if (!tarsUnex.containsKey(t.M)) tarsUnex.put(t.M, new HashSet<Gene>());
//
// tarsUnex.get(t.M).add(trip.T);
// }
}
}
}
// System.out.println("-----------");
// for (Gene mod : tarsUnex.keySet())
// {
// double ratio = tarsUnex.get(mod).size() / (double) sizes.get(mod);
// System.out.println(mod.getSymbol() + "\t" + tarsUnex.get(mod).size() + "\t" + sizes.get(mod) + "\t" + ratio);
// }
// System.out.println("-----------");
// if (true) return;
double[][] rat = new double[dep.length][dep.length];
for (int i = 0; i < dep.length; i++)
{
int size = sizes.get(mods.get(i));
for (int j = 0; j < dep.length; j++)
{
if (i == j) continue;
rat[i][j] = dep[i][j] / (double) size;
}
}
for (Gene mod : mods)
{
System.out.print("\t" + mod.getSymbol());
}
for (int i = 0; i < rat.length; i++)
{
System.out.print("\n" + mods.get(i).getSymbol());
for (int j = 0; j < rat.length; j++)
{
System.out.print("\t" + fmt.format(rat[i][j]));
}
}
BufferedWriter writer = new BufferedWriter(new FileWriter("dependency_positive.graphml"));
GraphML.writeHeader(writer);
double thr = 0.5;
for (Gene mod : mods)
{
String sym = mod.getSymbol();
int i = 0;
Color color = i == 1 ? new Color(200, 200, 255) : i == 2 ? new Color(255, 200, 200) : Color.WHITE;
writer.write(GraphML.createNodeData(sym, sym, color, i, true));
}
for (int i = 0; i < rat.length; i++)
{
for (int j = 0; j < rat.length; j++)
{
if (rat[i][j] > thr)
{
int c = (int) (256 - (((rat[i][j]-thr) / (1 - thr)) * 256)) - 100;
if (c < 0) c = 0;
Color edgeColor = new Color(c, 200, c); // green
// Color edgeColor = new Color(200, c, c); // red
writer.write(GraphML.createEdgeData(mods.get(i).getSymbol(), mods.get(j).getSymbol(), edgeColor, true, true));
}
}
}
GraphML.writeFooter(writer);
writer.close();
}
static Set<String> enh = new HashSet<String>(Arrays.asList("SPDEF", "PAK6", "FOXA1", "RUNX1", "TGFB1I1", "APOL2", "NSD1", "NR5A1", "PSMC3IP", "GRIP1"));
static Set<String> att = new HashSet<String>(Arrays.asList("FHL2", "IFI16", "CDK1", "NRIP1", "GAPDH", "CDC25B", "CDK6", "AHR", "TCF4", "CASP1", "DDC", "BRCA1", "PRMT1", "NR3C1", "EGFR", "PNRC1", "MDM2"));
private static int getNodeCluster(String s)
{
if (enh.contains(s)) return 1;
else if (att.contains(s)) return 2;
else return 0;
}
public static void printExpressionHisto() throws Throwable
{
String dir = "resource/expdata/Ling/";
List<Triplet> trips = StageAnalyzer.readTrips(dir);
boolean[][] pos = StageAnalyzer.getPos(dir);
// String[] cellname = new String[]{"PC3", "DU145", "LNCaP", "22Rv", "WPMY1", "VCaP", "MDAPCa2b", "HPV7", "HPV10", "RWPE1", "RWPE2", "NB11", "W99", "PWR1E", "DUCaP", "NB26"};
String[] cellname = new String[]{"DU145", "LNCaP", "LNCaP104R", "LNCaP104S", "PC3", "Vcap"}; // from indexes 179 to 184
Map<String, Gene> map = new HashMap<String, Gene>();
for (Triplet t : trips)
{
map.put(t.getMSym(), t.M);
}
List<String> names = new ArrayList<String>(map.keySet());
Collections.sort(names);
for (String name : names)
{
System.out.println("\n");
Gene gene = map.get(name);
System.out.println(gene.getSymbol() + "\n");
Histogram h = new Histogram(1);
int start = 179;
for (int i = 0; i < gene.value.length; i++)
{
if (i >= start) System.out.println(cellname[i - start] + "\t" + gene.value[i]);
// if (pos[1][i])
h.count(gene.value[i]);
}
System.out.println();
h.print();
}
}
public static void printMostPopularTargets()
{
Set<String> conf = new HashSet<String>(Arrays.asList("CDK1", "DDC", "FLNA", "GAPDH", "PRMT1", "TGFB1I1"));
List<Triplet> trips = Triplet.readTrips("result/Result_fdr0.05_var0.4_AR_expo.xls");
Triplet.removeNonModulation(trips);
System.out.println("trips.size() = " + trips.size());
Set<String> tars = new HashSet<String>();
Set<String> covd = new HashSet<String>();
for (Triplet t : trips)
{
t.backFromURLToIDs();
String s = t.getTSym();
tars.add(s);
if (conf.contains(t.getMSym())) covd.add(s);
}
System.out.println("covd = " + covd.size());
System.out.println("tars = " + tars.size());
}
public static void printModPairInteraction() throws IOException
{
double thr = 0.05;
String modName1 = "NRIP1";
String modName2 = "TGFB1I1";
Gene mod1 = null;
Gene mod2 = null;
List<Triplet> trips = DependencyCalculator.loadTrips();
List<Gene> tars = new ArrayList<Gene>();
for (Triplet t : trips)
{
tars.add(t.T);
if (mod1 == null && t.getMSym().equals(modName1))
{
mod1 = t.M;
if (mod2 != null) break;
}
if (mod2 == null && t.getMSym().equals(modName2))
{
mod2 = t.M;
if (mod1 != null) break;
}
}
List<Quad> quads1 = new ArrayList<Quad>();
List<QuadOnMod> quads2 = new ArrayList<QuadOnMod>();
List<Triplet> mmtrips = new ArrayList<Triplet>();
for (Gene tar : tars)
{
Triplet t = new Triplet(mod1, mod2, tar);
CaseCounter.adjustStatus(t);
CaseCounter.count(t);
if (Difference.calcGammaPval(t) < thr)
{
mmtrips.add(t);
}
}
HashMap<String, Set<Triplet>> map = new HashMap<String, Set<Triplet>>();
map.put(mod1.geneid, new HashSet<Triplet>(mmtrips));
TripletClassifier.assignClass(mmtrips, thr);
printClassCounts(map);
CaseCounter.adjustStatus(mod1);
for (Triplet t : trips)
{
if (t.getMSym().equals(modName2))
{
CaseCounter.adjustStatus(t);
Quad q1 = new Quad(mod1, t.M, t.F, t.T);
QuadOnMod q2 = new QuadOnMod(mod1, t.M, t.F, t.T);
q1.count();
q2.count();
if (q1.getGammaPval() < thr)
{
quads1.add(q1);
}
if (q2.getGammaPval() < thr)
{
quads2.add(q2);
}
}
}
Quad.printCategDist(quads1);
QuadOnMod.printCategDist(quads2);
}
public static void printGeneValues() throws IOException
{
String dir = "resource/expdata/GSE9633/";
List<String> modNames = new ArrayList<String>(Arrays.asList((
"AHR\n" +
"AR\n" +
"BRCA1\n" +
"CASP1\n" +
"CDK1\n" +
"CDK6\n" +
"DDC\n" +
"EFCAB6\n" +
"FHL2\n" +
"FLNA\n" +
"FOXA1\n" +
"GAPDH\n" +
"GRIP1\n" +
"HIPK3\n" +
"IFI16\n" +
"NR0B2\n" +
"NR3C1\n" +
"NR5A1\n" +
"NRIP1\n" +
"NSD1\n" +
"PAK6\n" +
"PRMT1\n" +
"PRPF6\n" +
"RAD54L2\n" +
"RUNX1\n" +
"SPDEF\n" +
"SRC\n" +
"TCF4\n" +
"TGFB1I1").split("\n")));
Set<String> ids = new HashSet<String>();
for (String name : modNames)
{
ids.add(Triplet.getSymbolToGeneMap().get(name));
}
Map<String, double[]> map = ExpDataReader.readSubset(ids, dir, 0, 0);
System.out.println("\t" + FileUtil.getFirstLine(dir + "expnames.txt").replace("\"", ""));
for (String name : modNames)
{
String id = Triplet.getSymbolToGeneMap().get(name);
System.out.print(name);
for (double v : map.get(id))
{
System.out.print("\t" + fmt.format(v));
}
System.out.println();
}
}
private static void printCellLineDiff() throws IOException
{
HashSet<String> egids = new HashSet<String>();
egids.add(Triplet.getSymbolToGeneMap().get("DAP3"));
String dir = "resource/expdata/LNCaP/";
Map<String, double[]> map = ExpDataReader.readSubset(egids, dir, 10, 0.25);
double[] val = map.values().iterator().next();
for (int i = 0; i < val.length; i++)
{
val[i] = Math.log(val[i]);
}
boolean[][] pos = StageAnalyzer.getPos(dir);
double range = 0.5;
Histogram h = new Histogram(range);
Histogram hc = new Histogram(range);
Histogram hp = new Histogram(range);
for (int i = 0; i < val.length; i++)
{
h.count(val[i]);
if (pos[8][i]) hc.count(val[i]);
if (pos[9][i]) hp.count(val[i]);
}
h.printDensity();
System.out.println("------");
hc.printDensity();
System.out.println("------");
hp.printDensity();
}
private static void printProstateAndLNCapPresence() throws IOException
{
List<Triplet> trips = Triplet.readTrips("result/Result_fdr0.01_var10.0_AR_expo_LNCaP.xls");
List<String> modIDs = new ArrayList<String>();
Map<String, Integer> targcnt = new HashMap<String, Integer>();
for (Triplet t : trips)
{
t.backFromURLToIDs();
if (!modIDs.contains(t.modulator)) modIDs.add(t.modulator);
if (!targcnt.containsKey(t.modulator)) targcnt.put(t.modulator, 0);
if (t.isModulation()) targcnt.put(t.modulator, targcnt.get(t.modulator) + 1);
}
String dir = "resource/expdata/LNCaP/";
boolean[][] pos = StageAnalyzer.getPos(dir);
Map<String, double[]> map = ExpDataReader.readSubset(new HashSet<String>(modIDs), dir, 10, 0.25);
// String prlist = "";
// String ablist = "";
for (String modID : modIDs)
{
double[] vals = map.get(modID);
Gene g = new Gene(modID, modID, vals.length);
g.value = vals;
g.rankAdjustStatus(1./3);
String sym = Triplet.getGeneToSymbolMap().get(modID);
double ln = g.calcPresenceOnTissue(pos[8]);
double pr = g.calcPresenceOnTissue(pos[9]);
// if (pr > 0.8) prlist += "\"" + sym + "\", ";
// else if (pr < 0.2) ablist += "\"" + sym + "\", ";
String x = (ln < 0.2 && pr > 0.8) || (ln > 0.8 && pr < 0.2) ? "X" : "";
System.out.println( sym +
"\t" + fmt.format(ln) + "\t" + fmt.format(pr) + "\t" + x + "\t" + targcnt.get(modID));
}
// System.out.println("prlist = " + prlist);
// System.out.println("ablist = " + ablist);
}
public static void printPSAResults()
{
List<Triplet> trips = Triplet.readTrips("result/Result_fdr0.01_var10.0_AR_expo.xls");
System.out.println("trips.size() = " + trips.size());
String psaid = Triplet.getSymbolToGeneMap().get("KLK3");
int modcnt = 0;
for (Triplet t : trips)
{
t.backFromURLToIDs();
if (t.isModulation()) modcnt++;
if (t.target.equals(psaid))
{
System.out.println(Triplet.getGeneToSymbolMap().get(t.modulator) + "\t" + t.categ);
}
}
System.out.println("modcnt = " + modcnt);
}
public static void printAverages() throws Throwable
{
// TabDelimitedFileParser p = new TabDelimitedFileParser("resource/factors/AR_andr_small.txt");
// Map<String, String> score = p.getOneToOneMap("Target", "Score");
// List<String> modList = new ArrayList<String>(score.keySet());
List<String> modList = new ArrayList<String>(TabDelimitedFileParser.getColumnSet(
"resource/NuclearReceptors.txt", 0));
Collections.sort(modList);
// List<String> modList = new ArrayList<String>();
// modList.add("AR");
// modList.add("NR3C1");
Set<String> ids = new HashSet<String>();
for (String g : modList) ids.add(Triplet.getSymbolToGeneMap().get(g));
String dir = "resource/expdata/expo";
Map<String, Gene> map = ExpDataReader.readGenes(ids, dir, 0, 0);
// Map<String, Gene> map = CrossPlatformMapper.fetchGenes(ids, dir + "/data.txt");
boolean[][] pos = getPos(dir + "/");
String[] expname = FileUtil.getFirstLine(dir + "/stages.txt").replace("\"", "").split("\t");
assert expname.length == pos.length :
"expname.length = " + expname.length + "\tpos.length = " + pos.length;
for (int i = 0; i < expname.length; i++)
{
System.out.print("\t" + expname[i]);
}
for (String sym : modList)
{
Gene g = map.get(Triplet.getSymbolToGeneMap().get(sym));
if (g == null) continue;
System.out.print("\n" + sym);
for (int i = 0; i < pos.length; i++)
{
System.out.print("\t" + CellTypeMatcher.getMeanValue(g, pos[i]));
}
// System.out.print("\t" + score.get(sym));
}
System.out.println();
}
public static void findActivityInOtherTissue() throws Throwable
{
// TabDelimitedFileParser p = new TabDelimitedFileParser("resource/factors/AR-select-small.txt");
TabDelimitedFileParser p = new TabDelimitedFileParser("resource/NuclearReceptors.txt");
Set<String> tarNames = p.getColumnSet(0);
tarNames.add("AR");
Set<String> tarIDs = IDUtil.covertSym2Gene(tarNames);
String dir = "resource/expdata/expo/";
Map<String, Gene> geneMap = ExpDataReader.readGenes(tarIDs, dir, 0, 0);
boolean[][] pos = StageAnalyzer.getPos(dir);
String[] stgNm = StageAnalyzer.getStageNames(dir);
int indBr = ArrayUtils.getIndexOf(stgNm, "Breast");
int indPr = ArrayUtils.getIndexOf(stgNm, "Prostate");
List<Gene> genes = Gene.sortWithSym(geneMap.values());
Gene ar = genes.get(5);
int cnt = 0;
tarNames.clear();
for (Gene g : genes)
{
g.takeLog();
double globMean = g.calcMean();
double meanPr = g.calcMean(pos[indPr]);
double meanBr = g.calcMean(pos[indBr]);
double varBr = g.calcVariance(pos[indBr]);
boolean select = meanBr > globMean && varBr > 1;
double cor = Pearson.calcCorrelation(g.value, ar.value, pos[indBr]);
if (select)
{
System.out.println(g.getSymbol() + "\t" + globMean + "\t" +
meanPr + "\t" + meanBr + "\t" + varBr + "\t" + cor);
cnt++;
tarNames.add(g.getSymbol());
}
}
System.out.println("cnt = " + cnt);
p = new TabDelimitedFileParser("resource/NuclearReceptors.txt");
Set<String> nucres = p.getColumnSet(0);
// TripletMaker.make("AR", HPRDParser.readFor(Collections.singleton("AR")).get("AR"), tarNames,
// TripletMaker.make("AR", nucres, tarNames,
// "resource/tartrips/breast-AR-trips.txt");
Map<String, Gene> symMap = Gene.getSymMap(genes);
Gene g = symMap.get("ESR1");
Histogram h = g.getDistribution(pos[indBr], 0.5);
h.print();
}
public static void doGEMForBreast() throws IOException
{
List<Triplet> trips = Triplet.readTrips("resource/tartrips/breast-AR-trips.txt");
String dir = "resource/expdata/expo/";
trips = ExpDataReader.associate(trips, dir, 0, 0);
boolean[][] pos = StageAnalyzer.getPos(dir);
String[] stgNm = StageAnalyzer.getStageNames(dir);
int indBr = ArrayUtils.getIndexOf(stgNm, "Breast");
Set<Gene> genes = Triplet.collectGenes(trips);
for (Gene gene : genes)
{
gene.cropExps(pos[indBr]);
}
CaseCounter.count(trips, 1D / 3);
trips = Triplet.filterToStateExistence(trips);
// Calculate gamma
Difference.assignGammaPval(trips);
// Find the gamma significance thresold for the desired FDR
// double pv_thr = Triplet.getPvalGammaThreshold(trips, 0.2);
// System.out.println("pv_thr = " + pv_thr);
// Filter out triplets with insignificant gamma
trips = Triplet.filterToPvalGamma(trips, 0.05);
// Assign modulation categories
TripletClassifier.assignClass(trips, 0.1);
Triplet.removeCateg(trips, Triplet.MOA_INSIGNIFICANT);
Triplet.record(trips, "result/Breast_results.txt");
}
public static final String[] AR_TARS_BREAST = ("ATAD2\n" +
"C1orf21\n" +
"DHCR24\n" +
"FAM174B\n" +
"GNMT\n" +
"IGF1R\n" +
"KCNMA1\n" +
"LIFR\n" +
"MAK\n" +
"ORM1\n" +
"PMEPA1\n" +
"SLC16A6\n" +
"SNAI2\n" +
"SOCS2\n" +
"SORD\n" +
"TMEM79\n" +
"ZBTB16").split("\n");
}