package gem;
import gem.parser.TabDelimitedFileParser;
import gem.util.Histogram;
import gem.util.Summary;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.FileReader;
import java.io.FileWriter;
import java.util.*;
/**
* @author Ozgun Babur
*/
public class RelationTester implements Constants
{
private static final int expsize = 1000;
private static final Random rand = new Random();
public static void main(String[] args) throws Throwable
{
writeLeukemiaIndexes();
}
public static void writeLeukemiaIndexes() throws Throwable
{
TabDelimitedFileParser parser = new TabDelimitedFileParser("resource/celltypes_expO.txt");
Set<String> expoSet = parser.getColumnSet("Accession");
List<String> bigList = new ArrayList<String>();
BufferedReader reader = new BufferedReader(new FileReader("resource/expdata/bigdata/1/mas5calls.txt"));
bigList.addAll(Arrays.asList(reader.readLine().split("\t")));
reader = new BufferedReader(new FileReader("resource/expdata/bigdata/2/mas5calls.txt"));
bigList.addAll(Arrays.asList(reader.readLine().split("\t")));
reader = new BufferedReader(new FileReader("resource/expdata/bigdata/3/mas5calls.txt"));
bigList.addAll(Arrays.asList(reader.readLine().split("\t")));
reader = new BufferedReader(new FileReader("resource/expdata/bigdata/4/mas5calls.txt"));
bigList.addAll(Arrays.asList(reader.readLine().split("\t")));
reader.close();
BufferedWriter writer = new BufferedWriter(new FileWriter("resource/Leukemia-in-big.txt"));
int j = 0;
for (int i = 0; i < bigList.size(); i++)
{
if (!expoSet.contains(bigList.get(i).substring(0, bigList.get(i).indexOf("."))))
{
writer.write(i + "\n");
j++;
}
}
writer.close();
System.out.println("j = " + j);
}
public static void testDistribution() throws Throwable
{
Gene M = createRandomGene("M");
Gene F = createRandomGene("F");
Gene T = createRandomGene("T");
double val = 0.12;
int count = 0;
int count2 = 0;
int iter = 100000;
double[][] d = new double[3][iter];
for (int i = 0; i < iter; i++)
{
randomize(T);
Triplet t = new Triplet(M, F, T);
CaseCounter.count(t);
int[] n = Difference.calcTotals(t.cnt);
double[] p = Difference.calcProportions(t.cnt, n);
double am = Difference.calcAlphaM(p);
double bm = Difference.calcBetaM(p);
double mod = Difference.calcModulation(t.cnt);
if (Math.abs(mod) >= val) count++;
if (Math.abs(Math.abs(bm) - Math.abs(am)) >= val) count2++;
d[0][i] = Math.abs(bm) - Math.abs(am);
d[1][i] = bm;
d[2][i] = mod;
}
System.out.println("pval mod = " + (count / (double) iter));
double pvabs = count2 / (double) iter;
System.out.println("pval abs = " + pvabs);
double pv1 = Difference.calcPval(val, Summary.stdev(d[0]));
double pv2 = Difference.calcPval(val, Summary.stdev(d[1]));
System.out.println("pv1 = " + pv1);
System.out.println("pv2 = " + pv2);
double pv3 = (pv2 / 2) + (pvabs / 4);
System.out.println("pv3 = " + pv3);
for (int i = 0; i < d.length; i++)
{
System.out.println("dev " + i + " = " + Summary.stdev(d[i]));
}
double[] g = getSample(Summary.stdev(d[0]), d[0].length);
Histogram h1 = new Histogram(0.01, d[0]);
Histogram h2 = new Histogram(0.01, g);
h1.printTogether(h2);
}
static Gene createRandomGene(String id)
{
Gene g = new Gene(id, id, expsize);
g.status = new int[expsize];
for (int i = 0; i < expsize; i++)
{
g.status[i] = rand.nextBoolean() ? PRESENT : ABSENT;
}
return g;
}
static double[] getSample(double stdev, int size)
{
double[] d = new double[size];
for (int i = 0; i < size; i++)
{
d[i] = rand.nextGaussian() * stdev;
}
return d;
}
static void randomize(Gene gene)
{
int temp;
for (int i = 0; i < expsize; i++)
{
int j = rand.nextInt(expsize);
temp = gene.status[i];
gene.status[i] = gene.status[j];
gene.status[j] = temp;
}
}
}