package gem;
import gem.parser.HPRDParser;
import gem.parser.TabDelimitedFileParser;
import gem.util.Kronometre;
import gem.util.Progress;
import gem.util.SetUtils;
import java.util.*;
/**
* Main class to run the GEM implementation. Make sure that the constant <code>TRIPLET_FILE</code>
* points to the hypothesis file, where each row contains Entrez Gene IDs of triplets. The false
* discovery rate is controlled by the <code>FDR</code> constant. The constant <code<MINVAR</code>
* is for filtering genes with low variance in the expression dataset. Any triplet hypothesis
* containing a gene with variance under this threshold is filtered out.
*/
public class GEMAllPairs implements Constants
{
public static final String FACNAME = "AR";
// These sets of M, F, and T are used for debugging, for filtering triplets to the desired
// M, F, or T.
private static String[] MODULATOR = new String[]{};//"NR3C1"};
private static String[] FACTOR = new String[]{};//"AR"};
private static String[] TARGET = new String[]{};//"NOL3"};
/**
* Desired false discovery rate.
*/
private static final double FDR = 0.01;
/**
* Variance threshold for genes. Any triplet that contains a gene with a lower variance will be
* filtered out.
*/
public static final double MINVAR = 10;
/**
* Name of the result file.
*/
private static String RESULT_FILE = "result/Result" +
"_fdr" + FDR + "_var" + MINVAR + "_" + FACNAME +
(MODULATOR.length > 0 ? "_M_" + MODULATOR[0] : "") +
(FACTOR.length > 0 ? "_F_" + FACTOR[0] : "") +
(TARGET.length > 0 ? "_T_" + TARGET[0] : "") +
"_allpairs_expo.xls";
/**
* Method that runs the analysis.
*/
public void run() throws Throwable
{
Map<String,Set<String>> map =
HPRDParser.readFor(new HashSet<String>(Arrays.asList(FACNAME)));
Set<String> interactors = map.get(FACNAME);
TabDelimitedFileParser parser =
new TabDelimitedFileParser("resource/factors/" + FACNAME + ".txt");
Set<String> targets = parser.getColumnSet(0);
SetUtils.removeCommon(interactors, targets);
// Remove ETV1-AR interaction from triplets. This is a false positive entry in HPRD. We
// noticed it while investigating our results. GEM reported ETV1 as a strong modulator of
// AR, but we could not find any evidence in the literature. Then we investigated the
// evidence of ETV1-AR interaction in the HPRD database, and realized that the related paper
// in HPRD databse is actually not related to ETV1. Dr. Renu Goel from HPRD approved that
// ETV1-AR interaction is an incorrect entry in HPRD database.
interactors.remove("ETV1");
interactors.add(FACNAME);
Map<String, String> s2g = Triplet.getSymbolToGeneMap();
Set<String> mods = new HashSet<String>();
for (String inter : interactors) if (s2g.containsKey(inter)) mods.add(s2g.get(inter));
Set<String> tars = new HashSet<String>();
for (String tar : targets) if (s2g.containsKey(tar)) tars.add(s2g.get(tar));
// If the above processing didn't leave any triplet, return
if (mods.isEmpty() || tars.isEmpty()) return;
Set<String> ids = new HashSet<String>(mods);
ids.addAll(tars);
Map<String, Gene> id2gene = ExpDataReader.readGenes(ids, "resource/expdata/expo", MINVAR, 0.25);
Map<String, Double> t2pv = new HashMap<String, Double>();
double pv_thr = 5.418500919305291E-4;
List <Triplet> trips = new ArrayList<Triplet>();
Progress p = new Progress(mods.size());
for (String u1 : mods)
{
p.tick();
Gene U1 = id2gene.get(u1);
if (U1 == null) continue;
for (String u2 : mods)
{
if (u1.compareTo(u2) < 0)
{
Gene U2 = id2gene.get(u2);
if (U2 == null) continue;
for (String tar : tars)
{
Gene T = id2gene.get(tar);
if (T == null) continue;
Triplet t = new Triplet(U1, U2, T);
CaseCounter.count(t);
double pv = Difference.calcGammaPval(t);
if (pv_thr > 0)
{
if (pv < pv_thr) trips.add(t);
}
else t2pv.put(u1 + " " + u2 + " " + tar, pv);
}
}
}
}
if (pv_thr < 0)
{
System.out.println("t2pv.size() = " + t2pv.size());
List<Double> pvals = new ArrayList<Double>(t2pv.values());
System.out.println("pvals.size() = " + pvals.size());
// Find the gamma significance thresold for the desired FDR
pv_thr = findThreshold(pvals, FDR);
System.out.println("Threshold gamma significance for fdr " + FDR + " = " + pv_thr);
// Re-create significant triplets
for (String key : t2pv.keySet())
{
double pv = t2pv.get(key);
if (pv < pv_thr)
{
String[] id = key.split(" ");
Triplet t = new Triplet(id2gene.get(id[0]), id2gene.get(id[1]), id2gene.get(id[2]));
CaseCounter.count(t);
trips.add(t);
}
}
System.out.println("Re-created significant trips. Size = " + trips.size());
}
System.out.println("trips.size = " + trips.size());
// Assign modulation categories
TripletClassifier.assignClass(trips, 0.05);
if (!trips.isEmpty())
{
//Record significant modulations to the result file
Triplet.record(trips, RESULT_FILE);
}
}
private static double findThreshold(List<Double> list, double fdr)
{
Collections.sort(list);
int i = 0;
for (Double pv : list)
{
i++;
double fd = pv * list.size();
double rate = fd / i;
if (rate > fdr) return pv;
}
return 1;
}
public static void main(String[] args) throws Throwable
{
Kronometre kron = new Kronometre();
GEMAllPairs m = new GEMAllPairs();
m.run();
kron.stop();
kron.print();
}
}