package gem.parser;
import gem.CellTypeMatcher;
import gem.Gene;
import java.io.*;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
/**
* Parses mas5values.txt and mas5calls.txt files.
*
* @author Ozgun Babur
*/
public class ExperimentParser
{
private static String platform;
private static String callfile;
private static String valuefile;
private static int expsize;
public static void main(String[] args) throws Throwable
{
String dir = "resource/expdata/expop/1/";
parse(dir + "platform.txt", dir + "mas5calls.txt", dir + "mas5values.txt",
"resource/expop/expop_1.txt");
takeSubset();
}
public static void parse(String plat, String callfi, String valuefi, String filetowrite)
{
platform = plat;
callfile = callfi;
valuefile = valuefi;
Map<String, String> affyToGene = parsePlatform();
expsize = countExperiments();
List<Gene> exps = parseExperiments(affyToGene);
Gene.writeGenes(exps, filetowrite);
}
private static Map<String, String> parsePlatform()
{
Map<String, String> affyToGene = null;
try
{
affyToGene = new HashMap<String, String>();
BufferedReader reader = new BufferedReader(new FileReader(platform));
// skip header lines
String line;
while ((line = reader.readLine()) != null)
{
if (!(line.startsWith("!") || line.startsWith("#") || line.startsWith("^")))
{
break;
}
}
while ((line = reader.readLine()) != null)
{
if (line.startsWith("!")) continue;
String[] terms = line.split("\t");
if (terms.length > 11)
{
String affy = terms[0];
String gb = terms[1];
String gene = terms[11];
if (gene.length() > 0 && !gene.equals("0") && gene.indexOf("/") < 0)
{
affyToGene.put(affy, gene + "\t" + gb);
}
}
}
reader.close();
}
catch (IOException e)
{
e.printStackTrace();
}
return affyToGene;
}
private static int countExperiments()
{
int n = 0;
try
{
BufferedReader reader = new BufferedReader(new FileReader(callfile));
reader.readLine();
String line = reader.readLine();
String[] terms = line.split("\t");
n = terms.length - 1;
reader.close();
}
catch (IOException e)
{
e.printStackTrace();
}
System.out.println("Number of experiments = " + n);
return n;
}
private static List<Gene> parseExperiments(Map<String, String> affyToGene)
{
List<Gene> list = null;
try
{
list = new ArrayList<Gene>();
BufferedReader callReader = new BufferedReader(new FileReader(callfile));
BufferedReader valuReader = new BufferedReader(new FileReader(valuefile));
callReader.readLine();
valuReader.readLine();
String cline;
String vline;
while((cline = callReader.readLine())!= null)
{
vline = valuReader.readLine();
assert vline != null;
String[] cterms = cline.split("\t");
String[] vterms = vline.split("\t");
String[] cnstrLine = new String[expsize + 2];
assert cterms[0].equals(vterms[0]) : "call:" + cterms[0] + " val:" + vterms[0];
assert cterms.length == expsize + 1 : "cterms.length = " + cterms.length;
assert vterms.length == expsize + 1 : "vterms.length = " + vterms.length;
if (affyToGene.containsKey(cterms[0]))
{
String[] ids = affyToGene.get(cterms[0]).split("\t");
if (ids.length != 2)
{
continue;
}
cnstrLine[0] = ids[0];
cnstrLine[1] = ids[1] + "|" + cterms[0];
for (int i = 2; i <= cterms.length; i++)
{
cnstrLine[i] = Gene.getCall(cterms[i-1]) + " " + vterms[i-1];
}
list.add(new Gene(cnstrLine));
}
}
callReader.close();
}
catch (IOException e)
{
e.printStackTrace();
}
System.out.println("Number of genes with Gene ID = " + list.size());
return list;
}
public static void takeSubset() throws Throwable
{
String originalFile = "resource/expop/expop_";
String newFile = "resource/expop/pc_";
boolean[] pos = CellTypeMatcher.getCancerProstateInExpop("all");
int index = 0;
for (int i = 1; i <= 3; i++)
{
BufferedReader reader = new BufferedReader(new FileReader(originalFile + i + ".txt"));
BufferedWriter writer = new BufferedWriter(new FileWriter(newFile + i + ".txt"));
int expCnt = 0;
for (String line = reader.readLine(); line != null; line = reader.readLine())
{
String[] tokens = line.split("\t");
expCnt = tokens.length - 2;
writer.write(tokens[0] + "\t" + tokens[1]);
for (int j = 2; j < tokens.length; j++)
{
if (pos[index + j - 2]) writer.write("\t" + tokens[j]);
}
writer.write("\n");
}
index += expCnt;
reader.close();
writer.close();
}
}
}