Package gem.parser

Source Code of gem.parser.ExperimentParser

package gem.parser;

import gem.CellTypeMatcher;
import gem.Gene;

import java.io.*;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

/**
* Parses mas5values.txt and mas5calls.txt files.
*
* @author Ozgun Babur
*/
public class ExperimentParser
{
  private static String platform;
  private static String callfile;
  private static String valuefile;
  private static int expsize;

  public static void main(String[] args) throws Throwable
  {
    String dir = "resource/expdata/expop/1/";
    parse(dir + "platform.txt", dir + "mas5calls.txt", dir + "mas5values.txt",
      "resource/expop/expop_1.txt");
    takeSubset();
  }

  public static void parse(String plat, String callfi, String valuefi, String filetowrite)
  {
    platform = plat;
    callfile = callfi;
    valuefile = valuefi;

    Map<String, String> affyToGene = parsePlatform();
    expsize = countExperiments();
    List<Gene> exps = parseExperiments(affyToGene);
    Gene.writeGenes(exps, filetowrite);
  }

  private static Map<String, String> parsePlatform()
  {
    Map<String, String> affyToGene = null;

    try
    {
      affyToGene = new HashMap<String, String>();
      BufferedReader reader = new BufferedReader(new FileReader(platform));

      // skip header lines
      String line;

      while ((line = reader.readLine()) != null)
      {
        if (!(line.startsWith("!") || line.startsWith("#") || line.startsWith("^")))
        {
          break;
        }
      }

      while ((line = reader.readLine()) != null)
      {
        if (line.startsWith("!")) continue;

        String[] terms = line.split("\t");

        if (terms.length > 11)
        {
          String affy = terms[0];
          String gb = terms[1];
          String gene = terms[11];

          if (gene.length() > 0 && !gene.equals("0") && gene.indexOf("/") < 0)
          {
            affyToGene.put(affy, gene + "\t" + gb);
          }
        }
      }
      reader.close();
    }
    catch (IOException e)
    {
      e.printStackTrace();
    }
    return affyToGene;
  }

  private static int countExperiments()
  {
    int n = 0;
    try
    {
      BufferedReader reader = new BufferedReader(new FileReader(callfile));

      reader.readLine();
      String line = reader.readLine();

      String[] terms = line.split("\t");

      n = terms.length - 1;

      reader.close();
    }
    catch (IOException e)
    {
      e.printStackTrace();
    }
    System.out.println("Number of experiments = " + n);
    return n;
  }

  private static List<Gene> parseExperiments(Map<String, String> affyToGene)
  {
    List<Gene> list = null;

    try
    {
      list = new ArrayList<Gene>();
      BufferedReader callReader = new BufferedReader(new FileReader(callfile));
      BufferedReader valuReader = new BufferedReader(new FileReader(valuefile));

      callReader.readLine();
      valuReader.readLine();

      String cline;
      String vline;

      while((cline = callReader.readLine())!= null)
      {
        vline = valuReader.readLine();
        assert vline != null;

        String[] cterms = cline.split("\t");
        String[] vterms = vline.split("\t");

        String[] cnstrLine = new String[expsize + 2];

        assert cterms[0].equals(vterms[0]) : "call:" + cterms[0] + " val:" + vterms[0];

        assert cterms.length == expsize + 1 : "cterms.length = " + cterms.length;
        assert vterms.length == expsize + 1 : "vterms.length = " + vterms.length;

        if (affyToGene.containsKey(cterms[0]))
        {
          String[] ids = affyToGene.get(cterms[0]).split("\t");

          if (ids.length != 2)
          {
            continue;
          }

          cnstrLine[0] = ids[0];
          cnstrLine[1] = ids[1] + "|" + cterms[0];

          for (int i = 2; i <= cterms.length; i++)
          {
            cnstrLine[i] = Gene.getCall(cterms[i-1]) + " " + vterms[i-1];
          }

          list.add(new Gene(cnstrLine));
        }
      }

      callReader.close();
    }
    catch (IOException e)
    {
      e.printStackTrace();
    }
    System.out.println("Number of genes with Gene ID = " + list.size());
    return list;
  }

  public static void takeSubset() throws Throwable
  {
    String originalFile = "resource/expop/expop_";
    String newFile = "resource/expop/pc_";
    boolean[] pos = CellTypeMatcher.getCancerProstateInExpop("all");

    int index = 0;
    for (int i = 1; i <= 3; i++)
    {
      BufferedReader reader = new BufferedReader(new FileReader(originalFile + i + ".txt"));
      BufferedWriter writer = new BufferedWriter(new FileWriter(newFile + i + ".txt"));

      int expCnt = 0;

      for (String line = reader.readLine(); line != null; line = reader.readLine())
      {
        String[] tokens = line.split("\t");
        expCnt = tokens.length - 2;

        writer.write(tokens[0] + "\t" + tokens[1]);
        for (int j = 2; j < tokens.length; j++)
        {
          if (pos[index + j - 2]) writer.write("\t" + tokens[j]);
        }
        writer.write("\n");
      }

      index += expCnt;
      reader.close();
      writer.close();
    }
  }
}
TOP

Related Classes of gem.parser.ExperimentParser

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.