Source Code of org.molgenis.util.plink.drivers.PedFileDriver

package org.molgenis.util.plink.drivers;


import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.List;
import java.util.NoSuchElementException;
import java.util.StringTokenizer;


import org.molgenis.util.TextFileUtils;
import org.molgenis.util.plink.PlinkFileParser;
import org.molgenis.util.plink.datatypes.Biallele;
import org.molgenis.util.plink.datatypes.PedEntry;


/**
 * Driver to query PED files. A PED file contains family- and genotyping data
 * for an individual, plus a single phenotype. Basically it is a FAM file with
 * added genotyping (typically SNP) data. However, the example file is a bit
 * peculiar: it has 'null' columns because of additional spacing between some
 * data values. This makes parsing hard. Question: can all Plink files have
 * this? or just PED? See:
 * http://pngu.mgh.harvard.edu/~purcell/plink/data.shtml#ped
 */
public class PedFileDriver implements PlinkFileParser
{
  private BufferedReader reader;
  private File file;
  private char separator;
  private long nrElements;


  /**
   * Construct a PedFileDriver on this file
   * 
   * @param bimFile
   * @throws Exception
   */
  public PedFileDriver(File pedFile)
  {
    this(pedFile, DEFAULT_FIELD_SEPARATOR);
  }


  public PedFileDriver(File pedFile, char separator)
  {
    if (pedFile == null) throw new IllegalArgumentException("file is null");
    this.file = pedFile;
    this.separator = separator;
    this.nrElements = -1l;
  }


  /**
   * Get all PED file entries
   * 
   * @return
   * @throws Exception
   */
  public List<PedEntry> getAllEntries() throws IOException
  {
    reset();


    List<PedEntry> entryList = new ArrayList<PedEntry>();
    String line;
    while ((line = reader.readLine()) != null)
      entryList.add(parseEntry(line));


    return entryList;
  }


  /**
   * Get a specific set of PED file entries
   * 
   * @param from
   *            = inclusive
   * @param to
   *            = exclusive
   * @return
   * @throws Exception
   */
  public List<PedEntry> getEntries(final long from, final long to) throws IOException
  {
    reset();


    List<PedEntry> entryList = new ArrayList<PedEntry>();
    String line;
    for (int i = 0; (line = reader.readLine()) != null && i < to; ++i)
      if (i >= from) entryList.add(parseEntry(line));


    return entryList;
  }


  private PedEntry parseEntry(String line) throws IOException
  {
    StringTokenizer strTokenizer = new StringTokenizer(line, separator + "");
    try
    {
      String family = strTokenizer.nextToken();
      String individual = strTokenizer.nextToken();
      String father = strTokenizer.nextToken();
      String mother = strTokenizer.nextToken();
      byte sex = Byte.parseByte(strTokenizer.nextToken());
      double phenotype = Double.parseDouble(strTokenizer.nextToken());
      List<Biallele> bialleles = new ArrayList<Biallele>();
      while (strTokenizer.hasMoreTokens())
      {
        char allele1 = strTokenizer.nextToken().charAt(0);
        char allele2 = strTokenizer.nextToken().charAt(0);
        bialleles.add(Biallele.create(allele1, allele2));
      }
      return new PedEntry(family, individual, father, mother, sex, phenotype, bialleles);
    }
    catch (NoSuchElementException e)
    {
      throw new IOException("error in line: " + line, e);
    }
    catch (IndexOutOfBoundsException e)
    {
      throw new IOException("error in line: " + line, e);
    }
    catch (NumberFormatException e)
    {
      throw new IOException("error in line: " + line, e);
    }
  }


  public long getNrOfElements() throws IOException
  {
    if (nrElements == -1) nrElements = TextFileUtils.getNumberOfNonEmptyLines(file, FILE_ENCODING);
    return nrElements;
  }


  @Override
  public void close() throws IOException
  {
    if (this.reader != null) this.reader.close();
  }


  public void reset() throws IOException
  {
    if (this.reader != null) close();
    this.reader = new BufferedReader(new InputStreamReader(new FileInputStream(file), FILE_ENCODING));
  }
}
Source Code of org.molgenis.util.plink.drivers.PedFileDriver

Related Classes of org.molgenis.util.plink.drivers.PedFileDriver