package gem;
import gem.parser.HGNCParser;
import java.io.*;
import java.util.*;
import gem.util.Histogram;
import gem.util.Histogram2D;
import gem.util.TermCounter;
import gem.util.Pearson;
/**
* Modulator - factor - target triplet for GEM analysis.
*
* @author Ozgun Babur
*/
public class Triplet implements Constants
{
/**
* Map from Entrez Gene ID to HGNC Gene Symbol
*/
protected static Map<String, String> geneToSymbolMap;
/**
* Map from HGNC Gene Symbol to Entrez Gene ID
*/
protected static Map<String, String> symbolToGeneMap;
/**
* Modulation category of the triplet.
*/
public String categ;
/**
* Entrez Gene ID of modulator
*/
public String modulator;
/**
* Entrez Gene ID of factor.
*/
public String factor;
/**
* Entrez Gene ID of target.
*/
public String target;
/**
* GenBank ID of modulator.
*/
public String mod_id;
/**
* GenBank ID of factor.
*/
public String fac_id;
/**
* GenBank ID of target.
*/
public String tar_id;
/**
* Modulator gene.
*/
public Gene M;
/**
* Factor gene.
*/
public Gene F;
/**
* Target gene.
*/
public Gene T;
/**
* Counts of 8 bins (M-F-T statuses) that is used in GEM calculation.
*/
public int[] cnt = new int[8];
/**
* Counts of 8 bins for a given tissue type.
*/
public int[] cnt_tiss;
/**
* P-value of gamma.
*/
public double pvalGamma;
/**
* P-value of betaM.
*/
public double pvalBetaM;
public double dcmi;
public double pvalDcmi;
/**
* Constructor with Entrez Gene IDs.
*
* @param modulator
* @param factor
* @param target
*/
public Triplet(String modulator, String factor, String target)
{
this.modulator = modulator;
this.factor = factor;
this.target = target;
}
/**
* Contructor with genes.
*
* @param M
* @param F
* @param T
*/
public Triplet(Gene M, Gene F, Gene T)
{
this.M = M;
this.F = F;
this.T = T;
this.modulator = M.geneid;
this.factor = F.geneid;
this.target = T.geneid;
this.mod_id = M.id;
this.fac_id = F.id;
this.tar_id = T.id;
}
/**
* Constructor with the data line from the triplet file.
*
* @param line
*/
public Triplet(String line)
{
String[] terms = line.split("\t");
int i = 0;
try
{
if (terms.length >= 3)
{
this.modulator = terms[i++];
this.factor = terms[i++];
this.target = terms[i++];
}
if (terms.length >= 20)
{
this.mod_id = terms[i++];
this.fac_id = terms[i++];
this.tar_id = terms[i++];
this.cnt[0] = Integer.parseInt(terms[i++]);
this.cnt[4] = Integer.parseInt(terms[i++]);
this.cnt[1] = Integer.parseInt(terms[i++]);
this.cnt[5] = Integer.parseInt(terms[i++]);
this.cnt[2] = Integer.parseInt(terms[i++]);
this.cnt[6] = Integer.parseInt(terms[i++]);
this.cnt[3] = Integer.parseInt(terms[i++]);
this.cnt[7] = Integer.parseInt(terms[i++]);
i += 13;
this.pvalGamma = Double.parseDouble(terms[i++]);
// this.dcmi = Double.parseDouble(terms[i++]);
// this.pvalDcmi = Double.parseDouble(terms[i++]);
this.categ = terms[i++];
}
}
catch (RuntimeException e)
{
System.out.println(line);
throw e;
}
}
/**
* Column names of the triplet file.
*
* @return
*/
public static String getColNames()
{
return "Modulator Gene Symbol\tFactor Gene Symbol\tTarget Gene Symbol\t" +
"Modulator GenBank ID\tFactor GenBank ID\tTarget GenBank ID\t" +
"f000\tf001\tf010\tf011\tf100\tf101\tf110\tf111\t" +
"p00\tp01\tp10\tp11\t" +
"alpha_F\tpval of alpha_F\t" +
"beta_F\tpval of beta_F\t" +
"beta_M\tpval of beta_M\t" +
"alpha_F + beta_M\tpval of alpha_F + beta_M\t" +
"gamma\tpval of gamma\t" +
// "dcmi\tpval of dcmi\t" +
"Mode of action"
;
}
/**
* Used for writing the triplet in a text file.
*
* @return
*/
public String toString()
{
String s = getGenes();
for (int i = 0; i< 4; i++)
{
s += "\t" + cnt[i] + "\t" + cnt[i + 4];
}
for (int i = 0; i < 4; i++)
{
s += "\t" + cnt[i+4] / (cnt[i] + (double) cnt[i+4]);
}
int[] n = Difference.calcTotals(cnt);
double[] p = Difference.calcProportions(cnt, n);
s += "\t" + Difference.calcAlphaF(p);
s += "\t" + Difference.calcAlphaFpval(cnt, n);
s += "\t" + Difference.calcBetaF(p);
s += "\t" + Difference.calcBetaFpval(cnt, n);
s += "\t" + Difference.calcBetaM(p);
s += "\t" + Difference.calcBetaMpval(cnt, n);
s += "\t" + Difference.calcAlphaFplusBetaM(p);
s += "\t" + Difference.calcAlphaFplusBetaMpval(cnt, n);
s += "\t" + Difference.calcGamma(p);
s += "\t" + Difference.calcGammaPval(cnt);
// s += "\t" + dcmi + "\t" + pvalDcmi;
s += "\t" + categ;
// s += "\n\t\t\t\t\t";
// for (int i = 0; i< 4; i++)
// {
// s += "\t" + cnt_tiss[i] + "\t" + cnt_tiss[i + 4];
// }
return s;
}
public String getProportionsInString()
{
String s = "";
for (int i = 0; i < 4; i++)
{
s += " " + fmt.format(cnt[i+4] / (cnt[i] + (double) cnt[i+4]));
}
return s;
}
public String getCoeffInString()
{
String s = "";
double[] p = Difference.calcProportions(cnt, Difference.calcTotals(cnt));
s += " " + fmt.format(Difference.calcAlphaF(p));
s += " " + fmt.format(Difference.calcAlphaM(p));
s += " " + fmt.format(Difference.calcBetaF(p));
s += " " + fmt.format(Difference.calcBetaM(p));
return s;
}
public String getGenes()
{
return modulator + "\t" + factor + "\t" + target + "\t" +
mod_id + "\t" + fac_id + "\t" + tar_id;
}
public String getMod_id()
{
return mod_id.substring(0, mod_id.indexOf("|"));
}
public String getFac_id()
{
return fac_id.substring(0, fac_id.indexOf("|"));
}
public String getTar_id()
{
return tar_id.substring(0, tar_id.indexOf("|"));
}
public String getEnrezGeneOnlySignature()
{
return modulator + factor + target;
}
public String getGeneSymbols()
{
if (getGeneToSymbolMap().containsKey(modulator))
{
return getGeneToSymbolMap().get(modulator) + "\t" +
getGeneToSymbolMap().get(factor) + "\t" +
getGeneToSymbolMap().get(target);
}
else
{
return modulator + "\t" + factor + "\t" + target;
}
}
public static Set<Gene> collectGenes(Collection<Triplet> trips)
{
Set<Gene> genes = new HashSet<Gene>();
for (Triplet t : trips)
{
genes.add(t.M);
genes.add(t.F);
genes.add(t.T);
}
return genes;
}
public String getMSym()
{
return getGeneToSymbolMap().get(modulator);
}
public String getFSym()
{
return getGeneToSymbolMap().get(factor);
}
public String getTSym()
{
return getGeneToSymbolMap().get(target);
}
public boolean isDebug()
{
return mod_id.equals("NM_007295") && fac_id.equals("M73069") && tar_id.equals("NM_001099");
}
/**
* Checks if the depdendency type in this triplet is a logical-and.
*
* @return
*/
public boolean isLogicalAND()
{
return TripletClassifier.isLogicalAND(this);
}
public boolean isModulation()
{
return categ.startsWith("A") || categ.startsWith("I") || categ.startsWith("E") || categ.startsWith("X");
}
public void backFromURLToIDs()
{
modulator = extractGeneIDFromURL(modulator);
factor = extractGeneIDFromURL(factor);
target = extractGeneIDFromURL(target);
mod_id = extractGBIDFromURL(mod_id);
fac_id = extractGBIDFromURL(fac_id);
tar_id = extractGBIDFromURL(tar_id);
}
public void backFromURLToSymbol()
{
modulator = modulator.substring(modulator.indexOf(",\"") + 2, modulator.lastIndexOf("\""));
factor = factor.substring(factor.indexOf(",\"") + 2, factor.lastIndexOf("\""));
target = target.substring(target.indexOf(",\"") + 2, target.lastIndexOf("\""));
mod_id = extractGBIDFromURL(mod_id);
fac_id = extractGBIDFromURL(fac_id);
tar_id = extractGBIDFromURL(tar_id);
}
public void backFromIDToSymbol()
{
Map<String, String> g2s = getGeneToSymbolMap();
modulator = g2s.containsKey(modulator) ? g2s.get(modulator) : modulator;
factor = g2s.containsKey(factor) ? g2s.get(factor) : factor;
target = g2s.containsKey(target) ? g2s.get(target) : target;
}
private String extractGeneIDFromURL(String url)
{
if (!url.contains("uids=")) return url;
return url.substring(url.indexOf("uids=") + 5, url.indexOf("\",\""));
}
private String extractGBIDFromURL(String url)
{
if (!url.contains(",\"")) return url;
return url.substring(url.lastIndexOf(",\"") + 2, url.lastIndexOf("\""));
}
public void writeExpValues(String dir)
{
Map<String, String> map = HGNCParser.getGeneToSymbolMap();
try
{
BufferedWriter writer = new BufferedWriter(new FileWriter(
dir+"/"+map.get(modulator)+"_"+map.get(factor)+"_"+map.get(target)+".txt"));
for (int i = 0; i < M.getExpSize(); i++)
{
writer.write(M.value[i] + "\t" + F.value[i] + "\t" + T.value[i] + "\n");
}
writer.close();
}
catch (Exception e)
{
e.printStackTrace();
}
}
public static void recordGenes(Collection<Triplet> set, String filename)
{
try
{
BufferedWriter writer = new BufferedWriter(new FileWriter(filename));
for (Triplet triplet : set)
{
writer.write(triplet.getGenes() + "\n");
}
writer.close();
}
catch (IOException e)
{
e.printStackTrace();
}
}
public static void record(List<Triplet> trips, String filename)
{
trips = orderToGenes(trips);
replaceIDsWithURL(trips);
try
{
BufferedWriter writer = new BufferedWriter(new FileWriter(filename));
writer.write(getColNames() + "\n");
for (Triplet triplet : trips)
{
writer.write(triplet + "\n");
}
writer.close();
System.out.println("Wrote " + trips.size() + " triplets into file " + filename);
}
catch (IOException e)
{
e.printStackTrace();
}
}
public static void recordUniqueGene(List<Triplet> trips, String filename)
{
trips = orderToGenes(trips);
replaceIDsWithURL(trips);
try
{
BufferedWriter writer = new BufferedWriter(new FileWriter(filename));
writer.write(getColNames() + "\n");
int i = 0;
Triplet prev = null;
for (Triplet triplet : trips)
{
if (prev != null)
{
if (sameGenes(prev, triplet))
{
triplet = triplet.pvalGamma > prev.pvalGamma ? triplet : prev;
}
else
{
writer.write(prev + "\n");
i++;
}
}
prev = triplet;
}
if (prev != null)
{
writer.write(prev.toString());
i++;
}
writer.close();
System.out.println("Wrote " + i + " of " + trips.size() + " triplets into file " +
filename);
}
catch (IOException e)
{
e.printStackTrace();
}
}
static boolean sameGenes(Triplet t1, Triplet t2)
{
return t1.modulator.equals(t2.modulator) &&
t1.factor.equals(t2.factor) &&
t1.target.equals(t2.target);
}
/**
* Writes down common triplets in the given two sets. Same triplets are alined in two lines.
* @param set1
* @param set2
* @param filename
*/
public static void record(Collection<Triplet> set1, Collection<Triplet> set2, String filename)
{
try
{
Map<String, Triplet> map = new HashMap<String, Triplet>();
for (Triplet t : set2)
{
map.put(t.getGenes(), t);
}
BufferedWriter writer = new BufferedWriter(new FileWriter(filename));
writer.write(getColNames() + "\n");
int i = 0;
for (Triplet t1 : set1)
{
if (map.containsKey(t1.getGenes()))
{
writer.write(t1 + "\n");
writer.write(map.get(t1.getGenes()) + "\n");
i++;
}
}
writer.close();
System.out.println("Wrote " + i + " triplets into file " + filename);
}
catch (IOException e)
{
e.printStackTrace();
}
}
public static List<Triplet> readTrips(String filename)
{
List<Triplet> set = null;
try
{
set = new ArrayList<Triplet>();
BufferedReader reader = new BufferedReader(new FileReader(filename));
// For header
reader.readLine();
String line;
while ((line = reader.readLine()) != null)
{
if (line.contains("\t"))
{
set.add(new Triplet(line));
}
}
reader.close();
}
catch (IOException e)
{
e.printStackTrace();
}
return set;
}
public static List<Triplet> readTripsAndAssociate(String filename, String ... expfile)
{
List<Triplet> trips = readTrips(filename);
for (Triplet t : trips)
{
t.backFromURLToIDs();
}
Set<String> ids = getIDs(trips);
Map<String, Gene> map = Gene.readGenesWithID(ids, 0, expfile);
for (Gene gene : map.values())
{
gene.rankAdjustStatus(1D / 3);
}
for (Triplet t : trips)
{
t.M = map.get(t.mod_id);
t.F = map.get(t.fac_id);
t.T = map.get(t.tar_id);
}
return trips;
}
public static Set<String> readGeneIDs(String filename)
{
Set<String> names = new HashSet<String>();
try
{
BufferedReader reader = new BufferedReader(new FileReader(filename));
String line;
while ((line = reader.readLine()) != null)
{
String[] terms = line.split("\t");
if (terms.length > 2)
{
names.add(terms[0]);
names.add(terms[1]);
names.add(terms[2]);
}
}
reader.close();
}
catch (Exception e)
{
e.printStackTrace();
}
return names;
}
public static Set<String> getGeneIDs(Collection<Triplet> trips)
{
Set<String> set = new HashSet<String>();
for (Triplet t : trips)
{
set.add(t.modulator);
set.add(t.factor);
set.add(t.target);
}
return set;
}
public static Set<String> getIDs(Collection<Triplet> trips)
{
Set<String> set = new HashSet<String>();
for (Triplet t : trips)
{
set.add(t.mod_id);
set.add(t.fac_id);
set.add(t.tar_id);
}
return set;
}
public static Set<String> getMFGeneIDs(Collection<Triplet> trips)
{
Set<String> set = new HashSet<String>();
for (Triplet t : trips)
{
set.add(t.modulator);
set.add(t.factor);
}
return set;
}
public boolean equals(Object obj)
{
if (obj instanceof Triplet)
{
Triplet t = (Triplet) obj;
boolean eq = modulator.equals(t.modulator) &&
factor.equals(t.factor) &&
target.equals(t.target);
if (!eq) return eq;
if (mod_id != null && t.mod_id != null) eq = eq && mod_id.equals(t.mod_id);
if (fac_id != null && t.fac_id != null) eq = eq && fac_id.equals(t.fac_id);
if (tar_id != null && t.tar_id != null) eq = eq && tar_id.equals(t.tar_id);
return eq;
}
return false;
}
private int hcd = 0;
public int hashCode()
{
if (hcd == 0)
{
if (target != null) hcd += target.hashCode();
if (factor != null) hcd += factor.hashCode();
if (modulator != null) hcd += modulator.hashCode();
if (tar_id != null) hcd += tar_id.hashCode();
if (fac_id != null) hcd += fac_id.hashCode();
if (mod_id != null) hcd += mod_id.hashCode();
}
return hcd;
}
public double calcFTCorr()
{
double[][] x = new double[2][F.value.length];
x[0] = F.value;
x[1] = T.value;
return Pearson.calcCorrelation(x);
}
public double calcFTCorrCondM(int mCond)
{
int size = Pearson.frequency(M.status, mCond);
double[][] x = new double[2][size];
int k = 0;
for (int i = 0; i < M.status.length; i++)
{
if (M.status[i] == mCond)
{
x[0][k] = F.value[i];
x[1][k] = T.value[i];
k++;
}
}
assert k == size;
return Pearson.calcCorrelation(x);
}
private static List<Triplet> order(List<Triplet> trips, HoldFact fact)
{
THolder[] holders = new THolder[trips.size()];
int i = 0;
for (Triplet t : trips)
{
holders[i++] = fact.hold(t);
}
Arrays.sort(holders);
trips.clear();
for (THolder holder : holders)
{
trips.add(holder.t);
}
return trips;
}
public static List<Triplet> orderRegTarg(List<Triplet> trips)
{
return order(trips, new HoldFact()
{
public THolder hold(Triplet t)
{
return new THolder(t)
{
Comparable getCriter()
{
return t.modulator + "|" + t.target;
}
};
}
});
}
public static List<Triplet> orderFactTarg(List<Triplet> trips)
{
return order(trips, new HoldFact()
{
public THolder hold(Triplet t)
{
return new THolder(t)
{
Comparable getCriter()
{
return t.factor + "|" + t.target;
}
};
}
});
}
public static List<Triplet> orderToGenes(List<Triplet> trips)
{
final Map<String, String> map = getGeneToSymbolMap();
return order(trips, new HoldFact()
{
public THolder hold(Triplet t)
{
return new THolder(t)
{
Comparable getCriter()
{
String reg = map.containsKey(t.modulator) ? map.get(t.modulator) : t.modulator;
String fac = map.containsKey(t.factor) ? map.get(t.factor) : t.factor;
String tar = map.containsKey(t.target) ? map.get(t.target) : t.target;
return fac + reg + tar + t.fac_id + t.mod_id + t.tar_id;
}
};
}
});
}
public static List<Triplet> orderTargFact(List<Triplet> trips)
{
return order(trips, new HoldFact()
{
public THolder hold(Triplet t)
{
return new THolder(t)
{
Comparable getCriter()
{
return t.target + "|" + t.factor;
}
};
}
});
}
public static List<Triplet> orderPvalGamma(List<Triplet> trips)
{
return order(trips, new HoldFact()
{
public THolder hold(Triplet t)
{
return new THolder(t)
{
Comparable getCriter()
{
return t.pvalGamma;
}
};
}
});
}
public static List<Triplet> orderModulation(List<Triplet> trips)
{
class Holder extends THolder
{
double val;
protected Holder(Triplet t)
{
super(t);
int[] n = Difference.calcTotals(t.cnt);
double[] p = Difference.calcProportions(t.cnt, n);
double betaM = Difference.calcBetaM(p);
double alfaM = Difference.calcAlphaM(p);
double gamma = Difference.calcGamma(p);
if (betaM > 0)
{
if (alfaM < betaM) val = Math.min(gamma, betaM);
else val = 0;
}
else
{
if (alfaM > betaM) val = -Math.max(gamma, betaM);
else val = 0;
}
}
Comparable getCriter()
{
return val;
}
}
return order(trips, new HoldFact()
{
public THolder hold(Triplet t)
{
return new Holder(t);
}
});
}
public static List<Triplet> orderPvalDcmi(List<Triplet> trips)
{
return order(trips, new HoldFact()
{
public THolder hold(Triplet t)
{
return new THolder(t)
{
Comparable getCriter()
{
return t.pvalDcmi;
}
};
}
});
}
public static List<Triplet> orderPvalBetaM(List<Triplet> trips)
{
return order(trips, new HoldFact()
{
public THolder hold(Triplet t)
{
return new THolder(t)
{
Comparable getCriter()
{
return t.pvalBetaM;
}
};
}
});
}
public static List<Triplet> orderCnt(List<Triplet> trips, final int cntIndex)
{
return order(trips, new HoldFact()
{
public THolder hold(Triplet t)
{
return new THolder(t)
{
Comparable getCriter()
{
return t.cnt[cntIndex];
}
};
}
});
}
private static void prepareGeneSymbolMaps()
{
geneToSymbolMap = HGNCParser.getGeneToSymbolMap();
symbolToGeneMap = new HashMap<String, String>();
for (String s : geneToSymbolMap.keySet())
{
symbolToGeneMap.put(geneToSymbolMap.get(s), s);
}
}
public static Map<String, String> getGeneToSymbolMap()
{
if (geneToSymbolMap == null)
{
prepareGeneSymbolMaps();
}
return geneToSymbolMap;
}
public static Map<String, String> getSymbolToGeneMap()
{
if (symbolToGeneMap == null)
{
prepareGeneSymbolMaps();
}
return symbolToGeneMap;
}
public Triplet createCopy()
{
return new Triplet(M, F, T);
}
/**
* Inner class used for sorting triplets.
*/
private static abstract class THolder implements Comparable
{
Triplet t;
Comparable crt;
protected THolder(Triplet t)
{
this.t = t;
this.crt = getCriter();
}
abstract Comparable getCriter();
public int compareTo(Object o)
{
if (o instanceof THolder)
{
return crt.compareTo(((THolder) o).crt);
}
return 0;
}
}
private interface HoldFact
{
THolder hold(Triplet t);
}
//----------------------------------------------------------------------------------------------
// Section: Accessors
//----------------------------------------------------------------------------------------------
public static void removeNonModulation(List<Triplet> trips)
{
Iterator<Triplet> iter = trips.iterator();
while (iter.hasNext())
{
Triplet t = iter.next();
if (!t.isModulation()) iter.remove();
}
}
public static void removeCateg(List<Triplet> trips, String categ)
{
Iterator<Triplet> iter = trips.iterator();
while (iter.hasNext())
{
Triplet t = iter.next();
if (t.categ.equals(categ)) iter.remove();
}
}
public static List<String> getAllFactors(Collection<Triplet> trips)
{
List<String> facts = new ArrayList<String>();
for (Triplet t : trips)
{
if (!facts.contains(t.factor)) facts.add(t.factor);
}
return facts;
}
public static List<Triplet> selectTarget(Collection<Triplet> trips, String target, boolean entrez)
{
Map<String, String> map = getSymbolToGeneMap();
target = map.containsKey(target) ? map.get(target) : target;
List<Triplet> list = new ArrayList<Triplet>();
for (Triplet t : trips)
{
String id = entrez ? t.target : t.tar_id;
if (id.equals(target))
{
list.add(t);
}
}
return list;
}
public static List<Triplet> selectFactor(Collection<Triplet> trips, String factor, boolean entrez)
{
Map<String, String> map = HGNCParser.getSymbolToGeneMap();
factor = map.containsKey(factor) ? map.get(factor) : factor;
List<Triplet> list = new ArrayList<Triplet>();
for (Triplet t : trips)
{
String id = entrez ? t.factor : t.fac_id;
if (id.equals(factor))
{
list.add(t);
}
}
return list;
}
public static List<Triplet> selectModulator(Collection<Triplet> trips, String modulator)
{
Map<String, String> map = HGNCParser.getSymbolToGeneMap();
modulator = map.containsKey(modulator) ? map.get(modulator) : modulator;
List<Triplet> list = new ArrayList<Triplet>();
for (Triplet t : trips)
{
if (t.modulator.equals(modulator))
{
list.add(t);
}
}
return list;
}
public static List<Triplet> selectTargets(Collection<Triplet> trips, Collection<String> targets)
{
List<Triplet> list = new ArrayList<Triplet>();
for (String target : targets)
{
list.addAll(selectTarget(trips, target, true));
}
return list;
}
public static List<Triplet> selectModulators(Collection<Triplet> trips,
Collection<String> modulators)
{
List<Triplet> list = new ArrayList<Triplet>();
for (String modulator : modulators)
{
list.addAll(selectModulator(trips, modulator));
}
return list;
}
public static List<Triplet> selectFactors(Collection<Triplet> trips, Collection<String> factors)
{
List<Triplet> list = new ArrayList<Triplet>();
for (String factor : factors)
{
list.addAll(selectFactor(trips, factor, true));
}
return list;
}
public static void printTargets(Collection<Triplet> trips)
{
Map<String, String> map = HGNCParser.getGeneToSymbolMap();
TermCounter cnt = new TermCounter();
for (Triplet t : trips)
{
String sym = map.get(t.target);
cnt.addTerm(sym != null ? sym : t.target);
}
cnt.print();
}
public static List<Triplet> filterToLogicalAND(Collection<Triplet> trips)
{
List<Triplet> list = new ArrayList<Triplet>();
for (Triplet t : trips)
{
// If OR relations were not discarded before, keep them.
// Just filter out other non-modulation cases.
if (t.categ != null && t.categ.startsWith("OR")) list.add(t);
else if (t.isLogicalAND()) list.add(t);
}
return list;
}
public static List<Triplet> filterToMonotonic(Collection<Triplet> trips)
{
List<Triplet> list = new ArrayList<Triplet>();
for (Triplet t : trips)
{
if (!Difference.complexBetaM(t, 0.05)) list.add(t);
}
return list;
}
public static List<Triplet> filterToPvalGamma(Collection<Triplet> trips, double thr)
{
List<Triplet> list = new ArrayList<Triplet>();
for (Triplet t : trips)
{
if (t.pvalGamma <= thr) list.add(t);
}
return list;
}
public static List<Triplet> filterToPvalDcmi(Collection<Triplet> trips, double thr)
{
List<Triplet> list = new ArrayList<Triplet>();
for (Triplet t : trips)
{
if (t.pvalDcmi <= thr) list.add(t);
}
return list;
}
public static List<Triplet> filterToPvalBetaM(Collection<Triplet> trips, double thr)
{
return filterToPvalBetaM(trips, thr, false);
}
public static List<Triplet> filterToPvalBetaM(Collection<Triplet> trips, double thr, boolean keepor)
{
List<Triplet> list = new ArrayList<Triplet>();
for (Triplet t : trips)
{
if (t.pvalBetaM <= thr)
{
list.add(t);
}
else if (keepor)
{
int[] n = Difference.calcTotals(t.cnt);
double ampv = Difference.calcAlphaMpval(t.cnt, n);
double afpv = Difference.calcAlphaFpval(t.cnt, n);
if (ampv < thr && afpv < thr)
{
double[] p = Difference.calcProportions(t.cnt, n);
double am = Difference.calcAlphaM(p);
double af = Difference.calcAlphaF(p);
double bm = Difference.calcBetaM(p);
double bf = Difference.calcBetaF(p);
if (am * af > 0 && Math.abs(am) > Math.abs(bm) && Math.abs(af) > Math.abs(bf))
{
list.add(t);
t.categ = "OR_" + (am > 0 ? "ACTIVATION" : "INHIBITION");
}
}
}
}
return list;
}
/**
* Checks if all M-F cases present. Filters out triplets that do not have.
*
* @param trips
* @return
*/
public static List<Triplet> filterToStateExistence(Collection<Triplet> trips)
{
List<Triplet> list = new ArrayList<Triplet>();
for (Triplet t : trips)
{
boolean allExists = true;
for (int i = 0; i < 4; i++)
{
if (t.cnt[i] + t.cnt[i + 4] == 0)
{
allExists = false;
break;
}
}
if (allExists) list.add(t);
}
return list;
}
/**
* Temporary filtering method for debugging.
*
* @param trips
* @return
*/
public static List<Triplet> filterTemp(Collection<Triplet> trips)
{
List<Triplet> list = new ArrayList<Triplet>();
for (Triplet t : trips)
{
if (t.pvalGamma < 1)
list.add(t);
}
return list;
}
/**
* Selects a fixed number of most significant triplets.
*
* @param trips
* @param size
* @return
*/
public static List<Triplet> filterHighestRanked(List<Triplet> trips, int size)
{
if (trips.size() <= size) return trips;
orderPvalGamma(trips);
List<Triplet> list = new ArrayList<Triplet>(size);
for (int i = 0; i < size; i++)
{
list.add(trips.get(i));
}
System.out.println("Rank filtered first " + size + ", " +
"lowest signif = " + trips.get(size - 1).pvalGamma);
return list;
}
/**
* Keeps only one M-F-T isoform, the one with the most significant and highest gamma.
*
* @param trips
*/
public static void keepMostSignif(List<Triplet> trips)
{
Map<String, Double> mapPval = new HashMap<String, Double>();
Map<String, Double> mapGamma = new HashMap<String, Double>();
for (Triplet t : trips)
{
String key = t.getGeneSymbols();
if (!mapPval.containsKey(key) || mapPval.get(key) > t.pvalGamma) mapPval.put(key, t.pvalGamma);
}
Iterator<Triplet> iter = trips.iterator();
while (iter.hasNext())
{
Triplet t = iter.next();
if (mapPval.get(t.getGeneSymbols()) < t.pvalGamma) iter.remove();
}
for (Triplet t : trips)
{
String key = t.getGeneSymbols();
double gamma = Math.abs(Difference.calcGamma(t));
if (!mapGamma.containsKey(key) || mapGamma.get(key) < gamma) mapGamma.put(key, gamma);
}
iter = trips.iterator();
while (iter.hasNext())
{
Triplet t = iter.next();
double gamma = Math.abs(Difference.calcGamma(t));
if (mapGamma.get(t.getGeneSymbols()) > gamma) iter.remove();
}
}
/**
* Removes repeating M-F-T isforms, keeps only the first.
*
* @param trips
*/
public static void keepFirstUnique(List<Triplet> trips)
{
Map<String, Triplet> map = new HashMap<String, Triplet>();
for (Triplet t : trips)
{
String key = t.getGeneSymbols();
if (!map.containsKey(key)) map.put(key, t);
}
trips.clear();
trips.addAll(map.values());
}
/**
* Removes any "MoA Insignificant" triplets.
*
* @param trips
*/
public static void keepClassified(List<Triplet> trips)
{
Iterator<Triplet> iter = trips.iterator();
while (iter.hasNext())
{
Triplet t = iter.next();
if (t.categ.startsWith("MoA") || t.categ.startsWith("OR")) iter.remove();
}
}
public double[] getConstants()
{
int[] n = Difference.calcTotals(cnt);
double[] p = Difference.calcProportions(cnt, n);
double am = Difference.calcAlphaM(p);
double af = Difference.calcAlphaF(p);
double g = Difference.calcGamma(this);
// double ampv = Difference.calcAlphaMpval(cnt, n);
// double afpv = Difference.calcAlphaFpval(cnt, n);
// double gpv = Difference.calcGammaPval(this);
return new double[]{am, af, g};
}
/**
* Finds gamma pval thresold for the target FDR.
*
* @param trips
* @param target_fdr targeted false discovery rate
* @return pval threshold
*/
public static double getPvalGammaThreshold(List<Triplet> trips, double target_fdr)
{
assert target_fdr > 0 && target_fdr < 1;
trips = orderPvalGamma(trips);
int size = trips.size();
double thr = 0;
int i = 0;
for (Triplet t : trips)
{
i++;
thr = t.pvalGamma;
double fd = t.pvalGamma * size;
double fdr = fd / i;
if (fdr >= target_fdr)
{
return thr;
}
}
return thr;
}
/**
* Finds the betaM pval threshold for the targeted FDR.
*
* @param trips
* @param target_fdr targeted false discovery rate
* @return pval threshold
*/
public static double getPvalBetaMThreshold(List<Triplet> trips, double target_fdr)
{
assert target_fdr > 0 && target_fdr < 1;
trips = orderPvalBetaM(trips);
int size = trips.size();
double thr = 0;
int i = 0;
for (Triplet t : trips)
{
i++;
thr = t.pvalBetaM;
double fd = t.pvalBetaM * size;
double fdr = fd / i;
if (fdr >= target_fdr)
{
return thr - 0.00000001;
}
}
return thr;
}
/**
* Returns debug triplets.
*
* @param trips
* @return
*/
public static List<Triplet> getDebug(List<Triplet> trips)
{
List<Triplet> list = new ArrayList<Triplet>();
for (Triplet t : trips)
{
if (t.isDebug()) list.add(t);
}
return list;
}
/**
* Replaces Entrez Gene IDs and GenBank IDs with Excel hyperlinks for navigation.
*/
public void replaceIDsWithURL()
{
mod_id = geneToSymbolMap.get(modulator);
fac_id = geneToSymbolMap.get(factor);
tar_id = geneToSymbolMap.get(target);
// modulator = getGeneHyperlink(modulator);
// factor = getGeneHyperlink(factor);
// target = getGeneHyperlink(target);
// mod_id = getGBHyperlink(mod_id);
// fac_id = getGBHyperlink(fac_id);
// tar_id = getGBHyperlink(tar_id);
}
/**
* Replaces Entrez Gene IDs and GenBank IDs with Excel hyperlinks for navigation.
*/
public static void replaceIDsWithURL(Collection<Triplet> trips)
{
geneToSymbolMap = Triplet.getGeneToSymbolMap();
for (Triplet t : trips)
{
t.replaceIDsWithURL();
}
}
/**
* Removes URL from gene IDs.
*
* @param trips
*/
public static void restoreIDs(Collection<Triplet> trips)
{
for (Triplet t : trips)
{
t.backFromURLToIDs();
}
}
/**
* Prepares a hyperlink for the associated Entrez Gene ID.
*
* @param geneID
* @return
*/
private static String getGeneHyperlink(String geneID)
{
getGeneToSymbolMap();
String link = "=HYPERLINK(\"http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?" +
"db=gene&cmd=Retrieve&dopt=Graphics&list_uids=" + geneID + "\",\"";
if (geneToSymbolMap.containsKey(geneID))
{
link += geneToSymbolMap.get(geneID);
}
else
{
link += geneID;
}
return link + "\")";
}
/**
* Prepares a hyperlink for the given GenBank ID.
*
* @param gbID
* @return
*/
private static String getGBHyperlink(String gbID)
{
return "=HYPERLINK(\"http://www.ncbi.nlm.nih.gov/entrez/viewer.fcgi?val=" +
gbID.substring(0, gbID.indexOf("|")) + "\",\"" + gbID + "\")";
}
//----------------------------------------------------------------------------------------------
// Debug code
//----------------------------------------------------------------------------------------------
private static void printTarsWithMostFactors()
{
Map<String, Set<String>> tar2facs = new HashMap<String, Set<String>>();
List<Triplet> trips = readTrips("result/All_fdr0.05_var1.0.xls");
for (Triplet t : trips)
{
t.backFromURLToSymbol();
if (!tar2facs.containsKey(t.target)) tar2facs.put(t.target, new HashSet<String>());
tar2facs.get(t.target).add(t.factor);
}
TermCounter cnt = new TermCounter();
for (String tar : tar2facs.keySet())
{
for (String fac : tar2facs.get(tar))
{
cnt.addTerm(tar);
}
}
cnt.print();
}
private static void printFT()
{
List<Triplet> trips = readTripsAndAssociate("result/All_fdr0.05_var1.0.xls",
"resource/experiments_expO_1.txt", "resource/experiments_expO_2.txt");
Triplet.orderPvalGamma(trips);
for (Triplet t : trips)
{
Histogram2D h1 = new Histogram2D(0.2);
h1.setName(Triplet.getGeneToSymbolMap().get(t.F.geneid) + " -> " +
Triplet.getGeneToSymbolMap().get(t.T.geneid) + " in " +
Triplet.getGeneToSymbolMap().get(t.M.geneid) + "-");
Histogram2D h2 = new Histogram2D(0.2);
h2.setName(Triplet.getGeneToSymbolMap().get(t.F.geneid) + " -> " +
Triplet.getGeneToSymbolMap().get(t.T.geneid) + " in " +
Triplet.getGeneToSymbolMap().get(t.M.geneid) + "+");
for (int i = 0; i < t.F.value.length; i++)
{
if (t.M.status[i] == ABSENT)
h1.count(t.F.value[i], t.T.value[i]);
else if (t.M.status[i] == PRESENT)
h2.count(t.F.value[i], t.T.value[i]);
}
h1.plot(false);
h2.plot(true);
}
}
private static void printFTEffect()
{
List<Triplet> trips = readTripsAndAssociate("result/All_fdr0.05_var1.0.xls",
"resource/experiments_expO_1.txt", "resource/experiments_expO_2.txt");
Random r = new Random();
Histogram h = new Histogram(0.05);
Set<String> counted = new HashSet<String>();
for (Triplet t : trips)
{
Gene g1 = t.M;
Gene g2 = trips.get(r.nextInt(trips.size())).M;
if (g1 == g2) continue;
String s = g1.id + g2.id;
if (!counted.contains(s))
{
double eff = TripletGraphMLWriter.calcPairwiseEffect(g1, g2);
h.count(eff);
counted.add(s);
}
}
h.printDensity();
}
private static void printFactorPresenceInLeukemia() throws IOException
{
List<Triplet> trips = readTripsAndAssociate("result/Big_all_fdr0.05_var1.0.xls",
"resource/exp_big_1.txt", "resource/exp_big_2.txt", "resource/exp_big_3.txt", "resource/exp_big_4.txt");
Set<Gene> factors = new HashSet<Gene>();
for (Triplet t : trips)
{
factors.add(t.F);
}
boolean[] pos = CellTypeMatcher.getLeukemiaHitArrayForBigdata();
for (Gene F : factors)
{
System.out.println(F.getPrintable() + "\t" + fmt.format(F.calcPresenceOnTissue(pos)));
}
}
public static void printGammaAndSupportPlot()
{
List<Triplet> trips = readTrips("result/All_big_fdr0.05_var1.0.xls");
Histogram2D h = new Histogram2D(0.05);
for (Triplet t : trips)
{
double gamma = Difference.calcGamma(t);
double support = Difference.getGammaSupport(t.cnt, true);
h.count(gamma, (2 * support) - 1);
}
h.takeLog();
h.plot();
}
public static void printSymInFile()
{
Set<String> set = Triplet.readGeneIDs("resource/factor-trips/AR.txt");
for (String s : set)
{
if (getGeneToSymbolMap().containsKey(s)) System.out.println(getGeneToSymbolMap().get(s));
}
}
public static void main(String[] args) throws Throwable
{
BufferedReader reader = new BufferedReader(new FileReader("test.txt"));
for (String line = reader.readLine(); line != null; line = reader.readLine())
{
Map<String, String> s2g = Triplet.getSymbolToGeneMap();
String id = s2g.containsKey(line) ? s2g.get(line) : "";
System.out.println(line + "\t" + id);
}
reader.close();
}
}