package gem;
import gem.parser.TabDelimitedFileParser;
import gem.util.Binomial;
import gem.util.GraphML;
import java.awt.*;
import java.io.BufferedWriter;
import java.io.FileWriter;
import java.io.IOException;
import java.util.*;
import java.util.List;
public class ActivityPredicter implements Constants
{
public static final int FAC_PRES_MOD_CH = 0;
public static final int FAC_CH_MOD_FIX = 1;
public static final int CH_TYPE = FAC_PRES_MOD_CH;
public static final double THR = 0.05;
private static final List<String> DW_MODS = CellLineCorrecter.dwNames_LNCaP;
public static void main(String[] args) throws Throwable
{
// String dir = "resource/expdata/GSE27682/";
String dir = "resource/expdata/AR-GR/";
boolean[][] pos = StageAnalyzer.getPos(dir);
List<Triplet> trips = StageAnalyzer.readTrips(dir);
// trips = filterTars(trips);
// printModPresence(trips, pos);
// if (true) return;
List<Group> groups = groupTriplets(trips);
boolean[] pos1 = pos[1];
boolean[] pos2 = pos[2];
// boolean[] pos1 = StageAnalyzer.getUnion(pos, 0, 1);
// boolean[] pos2 = StageAnalyzer.getUnion(pos, 4, 5);
for (Group g : groups)
{
g.calculate(pos1, pos2, CH_TYPE, null);
System.out.println(g);
// System.out.println(getClassCounts(g.actTrips));
// System.out.println(getClassCounts(g.inhTrips));
}
// printTarChangeStat(trips, pos1, pos2);
printAgreement(groups);
System.out.println();
Gene gene = getGene(trips, "AR");
printExpChange(gene, pos1, pos2);
gene = getGene(trips, "KLK3");
printExpChange(gene, pos1, pos2);
gene = getGene(trips, "FOXA1");
printExpChange(gene, pos1, pos2);
System.out.println();
createDependencyGraph(groups, pos1, pos2);
predictFactorChange(trips, pos1, pos2);
}
private static List<Triplet> filterTars(List<Triplet> trips)
{
String file = "resource/factors/temp";
TabDelimitedFileParser parser = new TabDelimitedFileParser(file);
Set<String> tars = parser.getColumnSet(3);
List<Triplet> list = new ArrayList<Triplet>();
for (Triplet t : trips)
{
if (tars.contains(t.getTSym())) list.add(t);
}
return list;
}
private static Gene getGene(List<Triplet> trips, String symbol)
{
for (Triplet t : trips)
{
if (t.getMSym().equals(symbol)) return t.M;
if (t.getFSym().equals(symbol)) return t.F;
if (t.getTSym().equals(symbol)) return t.T;
}
return null;
}
private static List<Group> groupTriplets(List<Triplet> trips)
{
Map<String, Group> map = new HashMap<String, Group>();
for (Triplet t : trips)
{
if (!map.containsKey(t.modulator))
{
Group g = new Group(t.M);
map.put(t.modulator, g);
}
map.get(t.modulator).trips.add(t);
}
List<Group> groups = new ArrayList<Group>(map.values());
Collections.sort(groups);
return groups;
}
private static void printTarChangeStat(List<Triplet> trips, boolean[] pos1, boolean[] pos2)
{
Set<Gene> tars = new HashSet<Gene>();
int up = 0;
int down = 0;
for (Triplet t : trips)
{
if (tars.contains(t.T)) continue;
tars.add(t.T);
double pv = CellTypeMatcher.getChangePvalBetweenTissues(t.T, pos1, pos2);
if (pv < THR)
{
double mch = CellTypeMatcher.getMeanChange(t.T, pos1, pos2);
if (mch > 0) up++; else down++;
}
}
System.out.println("Upregulated targets = " + up);
System.out.println("Downregulated targets = " + down);
}
/**
* Activity prediction is performed for each modulator as if they are independent, however, they
* have many overlapping target genes, so they are not independent. If excluding targets of B
* will make activity prediction of A insignificant, then we say prediction on A depends on B.
*/
private static void createDependencyGraph(List<Group> groups, boolean[] pos1, boolean[] pos2) throws IOException
{
List<Group> changed = new ArrayList<Group>();
BufferedWriter writer = new BufferedWriter(new FileWriter("temp.graphml"));
GraphML.writeHeader(writer);
for (Group group : groups)
{
if (group.activity != NOTR)
{
changed.add(group);
writer.write(GraphML.createNodeData(group.mod, group.mod, Color.WHITE, 0, true));
}
}
for (Group g1 : changed)
{
g1.calculate(pos1, pos2, CH_TYPE, null);
int activity = g1.activity;
double pval = g1.pval;
int act = g1.actCnt;
int inh = g1.inactCnt;
for (Group g2 : changed)
{
if (g1 == g2) continue;
g1.calculate(pos1, pos2, CH_TYPE, g1.getCommonTars(g2));
// System.out.println(g1.actCnt + "\t" + g1.inactCnt + "\tpval = " + g1.pval);
if (g1.activity != activity)
{
g1.depends.add(g2);
g2.dependants.add(g1);
System.out.println(g1.mod + "\t-->\t" + g2.mod);
writer.write(GraphML.createEdgeData(g1.mod, g2.mod, Color.GRAY, true, true));
}
}
}
fillOverlaps(changed);
GraphML.writeFooter(writer);
writer.close();
for (Group g : changed)
{
g.calculate(pos1, pos2, CH_TYPE, null);
}
Group.sortType = SORT_TO_DEPENDENCY;
Collections.sort(changed);
System.out.println("\nModulator\tDepending\tDependants\tMutual");
for (Group group : changed)
{
int match = group.activity * group.expression;
String mStr = match == 1 ? "+" : match == -1 ? "-" : "";
System.out.println(group.mod + "\t" + group.depends.size() + "\t" +
group.dependants.size() + "\t" + group.overlaps.size() + "\t" + mStr);
}
}
public static void fillOverlaps(List<Group> groups)
{
for (Group g1 : groups)
{
for (Group g2 : new HashSet<Group>(g1.depends))
{
if (g1.dependants.contains(g2))
{
g1.overlaps.add(g2);
g1.depends.remove(g2);
g1.dependants.remove(g2);
}
}
}
}
public static void printExpChange(Gene gene, boolean[] pos1, boolean[] pos2)
{
double pv = CellTypeMatcher.getChangePvalBetweenTissues(gene, pos1, pos2);
double mch = CellTypeMatcher.getMeanChange(gene, pos1, pos2);
double before = CellTypeMatcher.getMeanValue(gene, pos1);
double after = CellTypeMatcher.getMeanValue(gene, pos2);
double rat = mch / before;
System.out.println(gene.getSymbol() + "\t" + fmt.format(before) + "\t" + fmt.format(after) +
"\t" + fmt.format(mch) + "\t" + pv);
}
private static void printModPresence(List<Triplet> trips, boolean[][] pos)
{
List<Gene> mods = new ArrayList<Gene>();
for (Triplet t : trips)
{
if (!mods.contains(t.M)) mods.add(t.M);
}
mods.add(trips.iterator().next().F);
Collections.sort(mods);
printGenePresence(mods, pos);
}
private static void printGenePresence(Collection<Gene> genes, boolean[][] pos)
{
for (Gene gene : genes)
{
System.out.print(gene.getSymbol());
for (boolean[] po : pos)
{
System.out.print("\t" + fmt.format(getPresence(gene, po)));
}
System.out.println();
}
}
private static double getPresence(Gene gene, boolean[] pos)
{
if (gene.status == null) gene.rankAdjustStatus(1D/3);
int pres = 0;
int abse = 0;
for (int i = 0; i < pos.length; i++)
{
if (!pos[i]) continue;
if (gene.status[i] == PRESENT) pres++;
else if (gene.status[i] == ABSENT) abse++;
}
return pres / (double) (abse + pres);
}
private static void predictFactorChange(List<Triplet> trips, boolean[] pos1, boolean[] pos2)
{
System.out.println();
Map<Gene, Set<Triplet>> tarGroup = new HashMap<Gene, Set<Triplet>>();
for (Triplet t : trips)
{
if (!tarGroup.containsKey(t.T)) tarGroup.put(t.T, new HashSet<Triplet>());
tarGroup.get(t.T).add(t);
}
int act = 0;
int inh = 0;
for (Gene tar : tarGroup.keySet())
{
// if (tarGroup.get(tar).size() < 1) continue;
double pv = CellTypeMatcher.getChangePvalBetweenTissues(tar, pos1, pos2);
if (pv < THR)
{
double mch = CellTypeMatcher.getMeanChange(tar, pos1, pos2);
int chSign = 0;
boolean conflict = false;
boolean atLeastOneHit = false;
for (Triplet t : tarGroup.get(tar))
{
double pval;
double ch;
if (DW_MODS.contains(t.getMSym()))
{
pval = Difference.calcAlphaFpval(t);
ch = Difference.calcAlphaF(t);
}
else
{
pval = Difference.calcBetaFpval(t);
ch = Difference.calcBetaF(t);
}
if (pval < THR)
{
if (ch * chSign < 0)
{
conflict = true;
break;
}
chSign = (int) Math.signum(ch);
atLeastOneHit = true;
}
}
if (!conflict && atLeastOneHit)
{
if (chSign * mch > 0) act++;
else inh++;
}
}
}
double pval = Binomial.getPval(act, inh);
int status = pval > THR ? 0 : act > inh ? 1 : -1;
System.out.println("Factor chg\t" + act + "\t" + inh + "\t" + fmt.format(pval) +
"\t" + status);
}
static void printAgreement(List<Group> groups)
{
int expch = 0;
int actch = 0;
int bothch = 0;
int agree = 0;
int disagree = 0;
for (Group g : groups)
{
if (g.activity != 0) actch++;
if (g.expression != 0) expch++;
int m = g.activity * g.expression;
if (m != 0) bothch++;
if (m == 1) agree++;
else if (m == -1) disagree++;
}
System.out.println("\n---------");
System.out.println("actch = " + actch);
System.out.println("expch = " + expch);
System.out.println("bothch = " + bothch);
System.out.println("expected = " + (actch * expch) / (double) groups.size());
System.out.println("agree = " + agree);
System.out.println("disagree = " + disagree);
System.out.println("---------\n");
}
public static final int SORT_TO_NAME = 0;
public static final int SORT_TO_DEPENDENCY = 1;
static class Group implements Comparable
{
static int sortType = SORT_TO_NAME;
String mod;
Gene M;
Set<Triplet> trips;
Set<Triplet> actTrips;
Set<Triplet> inhTrips;
int actCnt;
int inactCnt;
double pval;
int expression;
int activity;
Set<Group> depends;
Set<Group> dependants;
Set<Group> overlaps;
Group(Gene m)
{
M = m;
mod = Triplet.getGeneToSymbolMap().get(M.geneid);
trips = new HashSet<Triplet>();
actTrips = new HashSet<Triplet>();
inhTrips = new HashSet<Triplet>();
depends = new HashSet<Group>();
dependants = new HashSet<Group>();
overlaps = new HashSet<Group>();
}
public int compareTo(Object o)
{
if (sortType == SORT_TO_DEPENDENCY)
{
int c = new Integer(depends.size()).compareTo(((Group) o).depends.size());
if (c == 0) c = new Integer(((Group) o).dependants.size()).compareTo(dependants.size());
if (c == 0) c = new Integer(((Group) o).overlaps.size()).compareTo(overlaps.size());
return c;
}
return mod.compareTo(((Group) o).mod);
}
void calculate(boolean[] pos1, boolean[] pos2, int type, Set<String> ignoreTars)
{
actCnt = 0;
inactCnt = 0;
actTrips.clear();
inhTrips.clear();
for (Triplet t : trips)
{
if (ignoreTars != null && ignoreTars.contains(t.target)) continue;
double pv = CellTypeMatcher.getChangePvalBetweenTissues(t.T, pos1, pos2);
if (pv < THR)
{
double mch = CellTypeMatcher.getMeanChange(t.T, pos1, pos2);
double val = 0;
double pval = 0;
if (type == FAC_PRES_MOD_CH)
{
val = Difference.calcBetaM(t);
pval = Difference.calcBetaMpval(t);
}
else if (type == FAC_CH_MOD_FIX)
{
if (DW_MODS.contains(t.getMSym()))
{
val = Difference.calcAlphaF(t);
pval = Difference.calcAlphaFpval(t);
}
else
{
val = Difference.calcBetaF(t);
pval = Difference.calcBetaFpval(t);
}
}
if (pval > THR) continue;
if (mch * val > 0)
{
actCnt ++;
actTrips.add(t);
}
else
{
inactCnt ++;
inhTrips.add(t);
}
}
}
pval = Binomial.getPval(actCnt, inactCnt);
if (pval >= THR) activity = NOTR;
else if (actCnt > inactCnt) activity = UP;
else activity = DOWN;
double pv = CellTypeMatcher.getChangePvalBetweenTissues(M, pos1, pos2);
if (pv < THR)
{
double mch = CellTypeMatcher.getMeanChange(M, pos1, pos2);
if (mch > 0) expression = UP;
else expression = DOWN;
}
else expression = NOTR;
}
@Override
public String toString()
{
String s = mod + "\t" + actCnt + "\t" + inactCnt + "\t" + fmt.format(pval) + "\t" +
activity + "\t" + expression;
// for (double v : M.value)
// {
// s += "\t" + fmt.format(v);
// }
return s;
}
public Set<String> getCommonTars(Group g)
{
Set<String> tars = getTarSymbs();
Set<String> g2tars = g.getTarSymbs();
tars.retainAll(g2tars);
return tars;
}
public Set<String> getUncommonTars(Group g)
{
Set<String> tars = getTarSymbs();
Set<String> g2tars = g.getTarSymbs();
tars.removeAll(g2tars);
return tars;
}
public Set<String> getTarSymbs()
{
Set<String> tars = new HashSet<String>();
for (Triplet t : trips)
{
tars.add(t.target);
}
return tars;
}
}
// Text form of histo-plot
private static String getClassCounts(Set<Triplet> trips)
{
int[] cnt = new int[12];
for (Triplet t : trips)
{
for (int i = 0; i < TYPES.length; i++)
{
if (t.categ.equals(TYPES[i]))
{
cnt[i]++;
break;
}
}
}
String s = cnt[0] + "\t" + cnt[1] + "\t" + cnt[2] + "\t\t" + cnt[3] + "\t" + cnt[4] + "\t" + cnt[5] +
"\n" + cnt[6] + "\t" + cnt[7] + "\t" + cnt[8] + "\t\t" + cnt[9] + "\t" + cnt[10] + "\t" + cnt[11];
return s;
}
public static final int NOTR = 0;
public static final int UP = 1;
public static final int DOWN = -1;
public static final String[] TYPES = new String[]{
ENHANCES_ACTIVATION, ATTENUATES_ACTIVATION, INVERTS_ACTIVATION, XOR_ACTIVATE, OR_ACTIVATE, FMOD_ACTIVATE,
ENHANCES_INHIBITION, ATTENUATES_INHIBITION, INVERTS_INHIBITION, XOR_INHIBIT, OR_INHIBIT, FMOD_INHIBIT};
}