Package org.molgenis.util.plink.drivers

Source Code of org.molgenis.util.plink.drivers.BimFileDriver

package org.molgenis.util.plink.drivers;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.List;
import java.util.NoSuchElementException;
import java.util.StringTokenizer;

import org.molgenis.util.TextFileUtils;
import org.molgenis.util.plink.PlinkFileParser;
import org.molgenis.util.plink.datatypes.Biallele;
import org.molgenis.util.plink.datatypes.BimEntry;

/**
* Driver to query BIM files. BIM files annotate the genotypes of BED files.
* They are basically MAP files, with added biallelic data. See:
* http://pngu.mgh.harvard.edu/~purcell/plink/binary.shtml
*
* Content of a BIM file: chromosome, SNP, cM, base-position, allele 1, allele 2
*/
public class BimFileDriver implements PlinkFileParser
{
  private BufferedReader reader;
  private File file;
  private char separator;
  private long nrElements;

  /**
   * Construct a BimFileDriver on this file
   *
   * @param bimFile
   * @throws Exception
   */
  public BimFileDriver(File bimFile)
  {
    this(bimFile, DEFAULT_FIELD_SEPARATOR);
  }

  public BimFileDriver(File bimFile, char separator)
  {
    if (bimFile == null) throw new IllegalArgumentException("file is null");
    this.file = bimFile;
    this.separator = separator;
    this.nrElements = -1l;
  }

  /**
   * Get a specific set of BIM file entries
   *
   * @param from
   *            = inclusive
   * @param to
   *            = exclusive
   * @return
   * @throws Exception
   */
  public List<BimEntry> getEntries(final long from, final long to) throws IOException
  {
    reset();

    List<BimEntry> entryList = new ArrayList<BimEntry>();
    String line;
    for (int i = 0; (line = reader.readLine()) != null && i < to; ++i)
      if (i >= from) entryList.add(parseEntry(line));

    return entryList;
  }

  /**
   * Get all BIM file entries
   *
   * @return
   * @throws Exception
   */
  public List<BimEntry> getAllEntries() throws IOException
  {
    reset();

    List<BimEntry> entryList = new ArrayList<BimEntry>();
    String line;
    while ((line = reader.readLine()) != null)
      entryList.add(parseEntry(line));

    return entryList;
  }

  private BimEntry parseEntry(String line) throws IOException
  {
    StringTokenizer strTokenizer = new StringTokenizer(line, this.separator + "");
    try
    {
      String chromosome = strTokenizer.nextToken();
      String snp = strTokenizer.nextToken();
      double cM = Double.parseDouble(strTokenizer.nextToken());
      long bpPos = Long.parseLong(strTokenizer.nextToken());
      char allelle1 = strTokenizer.nextToken().charAt(0);
      char allelle2 = strTokenizer.nextToken().charAt(0);
      return new BimEntry(chromosome, snp, cM, bpPos, Biallele.create(allelle1, allelle2));
    }
    catch (NoSuchElementException e)
    {
      throw new IOException("error in line: " + line, e);
    }
    catch (IndexOutOfBoundsException e)
    {
      throw new IOException("error in line: " + line, e);
    }
    catch (NumberFormatException e)
    {
      throw new IOException("error in line: " + line, e);
    }
  }

  public long getNrOfElements() throws IOException
  {
    if (nrElements == -1) nrElements = TextFileUtils.getNumberOfNonEmptyLines(file, FILE_ENCODING);
    return nrElements;
  }

  @Override
  public void close() throws IOException
  {
    if (this.reader != null) this.reader.close();
  }

  public void reset() throws IOException
  {
    if (this.reader != null) close();
    this.reader = new BufferedReader(new InputStreamReader(new FileInputStream(file), FILE_ENCODING));
  }
}
TOP

Related Classes of org.molgenis.util.plink.drivers.BimFileDriver

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.