package gem;
import gem.parser.HPRDParser;
import gem.parser.TabDelimitedFileParser;
import gem.util.TermCounter;
import java.io.File;
import java.io.IOException;
import java.util.*;
/**
* @author Ozgun Babur
*/
public class CellLineCorrecter
{
public static final double THR = 0.05;
public static void main(String[] args) throws Throwable
{
printAndrogenApprovedTargets();
// getAndrogenApprovedTrips();
// filterSignsOfAndrRespTrips();
// List<Triplet> trips = getCellCorrectedTrips();
// Triplet.record(trips, "result/Result_fdr0.05_var10.0_AR_expo_select_LNCaP.xls");
}
public static List<Triplet> getCellCorrectedTrips() throws IOException
{
List<Triplet> trips = loadTrips();
System.out.println("Initial trips size = " + trips.size());
Set<Gene> upMods = new HashSet<Gene>();
Set<Gene> dwMods = new HashSet<Gene>();
for (Triplet t : trips)
{
if (upNames_LNCaP.contains(t.getMSym()))
{
upMods.add(t.M);
}
else if (dwNames_LNCaP.contains(t.getMSym()))
{
dwMods.add(t.M);
}
}
for (Gene mod : new HashSet<Gene>(dwMods))
{
mod.rankAdjustStatus(1D/3);
dwMods.remove(mod);
dwMods.add(mod.getNegative());
}
Set<Gene> mods = new HashSet<Gene>();
mods.addAll(upMods);
mods.addAll(dwMods);
List<Triplet> filtered = new ArrayList<Triplet>();
for (Triplet t : trips)
{
double modVal = Difference.calcModulation(t);
boolean contradicts = false;
for (Gene mod : mods)
{
if (t.M.geneid.equals(mod.geneid)) continue;
Triplet trip = new Triplet(t.M, mod, t.T);
CaseCounter.count(trip);
double mVal = Difference.calcModulation(trip);
double pv = Difference.calcModulationPval(trip);
if (pv < THR && modVal * mVal < 0)
{
contradicts = true;
break;
}
}
if (!contradicts) filtered.add(t);
}
System.out.println("Filtered size = " + filtered.size());
return filtered;
}
public static List<Triplet> getAndrogenApprovedTrips() throws Throwable
{
TabDelimitedFileParser parser = new TabDelimitedFileParser("resource/factors/AR_andr.txt");
Map<String, String> score = parser.getOneToOneMap("Target", "Score");
String tripFile = "result/Result_fdr0.05_var10.0_AR_expo_select.xls";
List<Triplet> trips = Triplet.readTrips(tripFile);
for (Triplet t : trips) t.backFromURLToIDs();
System.out.println("initial size = " + trips.size());
List<Triplet> verified = new ArrayList<Triplet>();
for (Triplet t : trips)
{
int sign = score.get(t.getTSym()).startsWith("-") ? -1 : 1;
double pv1 = Difference.calcAlphaFpval(t);
double pv2 = Difference.calcBetaFpval(t);
int s1 = pv1 < THR ? Difference.calcAlphaF(t) > 0 ? 1 : -1 : 0;
int s2 = pv2 < THR ? Difference.calcBetaF(t) > 0 ? 1 : -1 : 0;
if (s1 == sign || s2 == sign) verified.add(t);
}
Triplet.record(verified, "result/Result_fdr0.05_var10.0_AR_expo_select_filtered.xls");
System.out.println("filtered size = " + verified.size());
return verified;
}
public static void printAndrogenApprovedTargets() throws Throwable
{
// String[] dirs = new String[]{"Ling", "vivek", "MSKCC", "GSE7868", "GSE7708", "GSE846"};
// int[][] pairs = new int[][]{ {0, 1}, {2, 0}, {3, 2}, {0, 2}, {0, 1}, {0, 1}};
String[] dirs = new String[]{"Ling", "GSE7868", "GSE7708", "GSE846"};
int[][] pairs = new int[][]{ {0, 1}, {0, 2}, {0, 1}, {0, 1}};
Map<String, Integer> count = new HashMap<String, Integer>();
for (int i = 0; i < dirs.length; i++)
{
String dir = "resource/expdata/" + dirs[i] + "/";
boolean[][] pos = StageAnalyzer.getPos(dir);
// for (Gene gene : getARInteractors(dir))
for (Gene gene : getARTargets(dir))
{
double pv = CellTypeMatcher.getChangePvalBetweenTissues(
gene, pos[pairs[i][0]], pos[pairs[i][1]]);
double x0 = CellTypeMatcher.getMeanValue(gene, pos[pairs[i][0]]);
double x1 = CellTypeMatcher.getMeanValue(gene, pos[pairs[i][1]]);
if (x0 < 20 && x1 < 20)
{
x0 = Math.exp(x0);
x1 = Math.exp(x1);
}
double rat = x0/x1;
if (pv < THR)// && (rat < 0.5 || rat > 2))
{
int ch = (int) Math.signum(CellTypeMatcher.getMeanChange(
gene, pos[pairs[i][0]], pos[pairs[i][1]]));
if (!count.containsKey(gene.id)) count.put(gene.id, ch);
else count.put(gene.id, count.get(gene.id) + ch);
}
}
}
int i = 0;
TermCounter tc = new TermCounter();
for (String sym : count.keySet())
{
int score = count.get(sym);
tc.addTerm("" + score);
if (Math.abs(score) > 1)
{
System.out.println(sym + "\t" + score);
i++;
}
}
System.out.println("i = " + i);
tc.print();
}
private static void filterSignsOfAndrRespTrips()
{
List<Triplet> trips = Triplet.readTrips("result/Result_fdr0.05_var10.0_AR_expo_andr.xls");
TabDelimitedFileParser parser = new TabDelimitedFileParser("resource/factors/AR_andr.txt");
Map<String, String> score = parser.getOneToOneMap("Target", "Score");
List<Triplet> list = new ArrayList<Triplet>();
for (Triplet t : trips)
{
t.backFromURLToIDs();
int sign = score.get(Triplet.getGeneToSymbolMap().get(t.target)).startsWith("-") ?
-1 : 1;
double pv1 = Difference.calcAlphaFpval(t);
double pv2 = Difference.calcBetaFpval(t);
int s1 = pv1 < THR ? Difference.calcAlphaF(t) > 0 ? 1 : -1 : 0;
int s2 = pv2 < THR ? Difference.calcBetaF(t) > 0 ? 1 : -1 : 0;
if (s1 * sign == 1 || s2 * sign == 1)
{
list.add(t);
}
}
Triplet.record(list, "result/Result_fdr0.05_var10.0_AR_expo_andr_clean.xls");
}
private static Collection<Gene> getARTargets(String dir) throws Throwable
{
// TabDelimitedFileParser parser = new TabDelimitedFileParser("resource/factors/AR.txt");
TabDelimitedFileParser parser = new TabDelimitedFileParser("resource/NuclearReceptors.txt");
Set<String> targets = parser.getColumnSet(0);
//
Set<String> ids = new HashSet<String>();
for (String sym : targets)
{
if (Triplet.getSymbolToGeneMap().containsKey(sym))
ids.add(Triplet.getSymbolToGeneMap().get(sym));
}
// ids.addAll(Triplet.getGeneToSymbolMap().keySet());
Map<String, Gene> geneMap;
File plat = new File(dir + "platform.txt");
if (!plat.exists()) geneMap = CrossPlatformMapper.fetchGenes(ids, dir + "data.txt");
else geneMap = CrossPlatformMapper.fetchGenes(ids, dir + "data.txt", plat.getPath());
return geneMap.values();
}
private static Collection<Gene> getARInteractors(String dir) throws Throwable
{
Map<String, Set<String>> map = HPRDParser.readFor(new HashSet<String>(Arrays.asList("AR")));
Set<String> ids = new HashSet<String>();
for (String modSym : map.get("AR"))
{
String id = Triplet.getSymbolToGeneMap().get(modSym);
if (id != null) ids.add(id);
}
File plat = new File(dir + "platform.txt");
Map<String, Gene> geneMap;
if (!plat.exists()) geneMap = CrossPlatformMapper.fetchGenes(ids, dir + "data.txt");
else geneMap = CrossPlatformMapper.fetchGenes(ids, dir + "data.txt", plat.getPath());
return geneMap.values();
}
static final List<String> upNames_LNCaP = Arrays.asList("AES", "AKT1", "CCNH", "CDC37", "CDK1", "CDK7", "CDK9", "DAP3", "DAXX", "DCAF6", "DDC", "FOXA1", "GAPDH", "GNB2L1", "GTF2F1", "GTF2F2", "HDAC1", "HIPK3", "IDE", "KAT5", "MYST2", "NCOR1", "NONO", "NR0B2", "NSD1", "PA2G4", "PELP1", "PIAS2", "PRMT1", "PRPF6", "RAD54L2", "RAF1", "RCHY1", "SART3", "SMAD4", "SPDEF", "STUB1", "TSG101", "UBE2I", "UBE3A", "XRCC5");
static final List<String> dwNames_LNCaP = Arrays.asList("AHR", "APOL2", "CASP1", "CASP3", "CASP8", "CDC25B", "CDK6", "CTDSP2", "FHL2", "FLNA", "FOXO1", "GAK", "GSN", "GTF2H1", "HIF1A", "HMGB1", "IFI16", "IL6ST", "JMJD1C", "JUN", "MED14", "NCOA4", "NISCH", "NR2C1", "NR2C2", "NR3C1", "PAK6", "PIAS1", "PTEN", "RBAK", "RNASEL", "RUNX1", "SMAD1", "SP1", "TCF4", "TGFB1I1", "TGIF1", "TRIP4", "UXT", "WIPI1");
static final List<String> upNames_prost = Arrays.asList("AES", "AKT1", "CCNH", "DCAF6", "FLNA", "FOXA1", "GTF2F1", "HDAC1", "IDE", "IL6ST", "JMJD1C", "JUN", "KAT2B", "NCOA4", "NISCH", "PIAS1", "RCHY1", "SMAD4", "SPDEF", "STAT3", "TGFB1I1");
static final List<String> dwNames_prost = Arrays.asList("AHR", "BRCA1", "CASP1", "CASP3", "CASP8", "CDC25B", "CDK1", "CTDSP2", "DAXX", "DDC", "FOXO1", "GAPDH", "IFI16", "NR2C1", "PA2G4", "PRMT1", "PSMC3IP", "RB1", "RUNX1", "TCF4", "UBE2I", "XRCC5");
public static List<Triplet> loadTrips() throws IOException
{
List<Triplet> trips = Triplet.readTrips("result/Result_fdr0.05_var10.0_AR_expo_select.xls");
Triplet.removeNonModulation(trips);
for (Triplet t : trips)
{
t.backFromURLToIDs();
}
trips = ExpDataReader.associate(trips, "resource/expdata/expo", 10, 0.25);
return trips;
}
}