package picard.pedigree;
import htsjdk.samtools.util.IOUtil;
import htsjdk.samtools.util.Log;
import htsjdk.samtools.util.RuntimeIOException;
import java.io.BufferedWriter;
import java.io.File;
import java.io.IOException;
import java.util.Iterator;
import java.util.Map;
import java.util.TreeMap;
import java.util.regex.Pattern;
/**
* Represents a .ped file of family information as documented here:
* http://pngu.mgh.harvard.edu/~purcell/plink/data.shtml
*
* Stores the information in memory as a map of individualId -> Pedigree information for that individual
*/
public class PedFile extends TreeMap<String, PedFile.PedTrio> {
private static final Log log = Log.getInstance(PedFile.class);
static final Pattern WHITESPACE = Pattern.compile("\\s+");
static final Pattern TAB = Pattern.compile("\\t");
private final Pattern delimiterPattern;
private final String delimiterString; // A textual representation of the delimiter, for output purposes
// These two are really for PedTrio, but they can't be static in there and need to be accessed outside of PedFile
public static final Number NO_PHENO = new Integer(-9);
public static final Sex UNKNOWN_SEX = Sex.Unknown;
public PedFile(final boolean isTabMode) {
delimiterPattern = isTabMode ? TAB : WHITESPACE;
delimiterString = isTabMode ? "tabs" : "whitespace";
}
/** Adds a trio to the PedFile keyed by the individual id. */
public void add(final PedTrio trio) {
put(trio.getIndividualId(), trio);
}
/**
* Writes a set of pedigrees out to disk.
*/
public void write(final File file) {
IOUtil.assertFileIsWritable(file);
final BufferedWriter out = IOUtil.openFileForBufferedWriting(file);
try {
for (final PedTrio trio : values()) {
out.write(trio.getFamilyId());
out.write("\t");
out.write(trio.getIndividualId());
out.write("\t");
out.write(trio.getPaternalId());
out.write("\t");
out.write(trio.getMaternalId());
out.write("\t");
out.write(String.valueOf(trio.getSex().toCode()));
out.write("\t");
out.write(trio.getPhenotype().toString());
out.newLine();
}
out.close();
}
catch (final IOException ioe) {
throw new RuntimeIOException("IOException while writing to file " + file.getAbsolutePath(), ioe);
}
}
/**
* Attempts to read a pedigree file into memory.
*/
public static PedFile fromFile(final File file, final boolean isTabMode) {
final PedFile pedFile = new PedFile(isTabMode);
IOUtil.assertFileIsReadable(file);
for (final String line : IOUtil.readLines(file)) {
final String[] fields = pedFile.delimiterPattern.split(line);
if (fields.length != 6) {
log.error("Ped file line contained invalid number of fields, skipping: " + line);
continue;
}
final PedTrio trio = pedFile.new PedTrio(fields[0],
fields[1],
fields[2],
fields[3],
Sex.fromCode(Integer.parseInt(fields[4])),
fields[5].contains(".") ? Double.parseDouble(fields[5]) : Integer.parseInt(fields[5])
);
pedFile.add(trio);
}
return pedFile;
}
/**
* Scans through the pedigrees and removes all entries that do not have both paternal and maternal ids set.
*/
public PedFile removeIncompleteTrios() {
final Iterator<Map.Entry<String,PedTrio>> iterator = entrySet().iterator();
while (iterator.hasNext()) {
if (!iterator.next().getValue().hasBothParents()) iterator.remove();
}
return this;
}
public class PedTrio {
private final String familyId;
private final String individualId;
private final String paternalId;
private final String maternalId;
private final Sex sex;
private final Number phenotype;
/** Constructs a TRIO that cannot be modified after the fact. */
public PedTrio(final String familyId, final String individualId, final String paternalId, final String maternalId, final Sex sex, final Number phenotype) {
if (delimiterPattern.split(familyId).length != 1) throw new IllegalArgumentException("FamilyID cannot contain " + delimiterString + ": [" + familyId + "]");
if (delimiterPattern.split(individualId).length != 1) throw new IllegalArgumentException("IndividualID cannot contain " + delimiterString + ": [" + individualId + "]");
if (delimiterPattern.split(paternalId).length != 1) throw new IllegalArgumentException("PaternalID cannot contain " + delimiterString + ": [" + paternalId + "]");
if (delimiterPattern.split(maternalId).length != 1) throw new IllegalArgumentException("MaternalID cannot contain " + delimiterString + ": [" + maternalId + "]");
this.familyId = familyId;
this.individualId = individualId;
this.paternalId = paternalId;
this.maternalId = maternalId;
this.sex = sex;
this.phenotype = phenotype;
}
/** True if this record has paternal and maternal ids, otherwise false. */
public boolean hasBothParents() {
return this.paternalId != null && this.maternalId != null;
}
public String getFamilyId() { return familyId; }
public String getIndividualId() { return individualId; }
public String getPaternalId() { return paternalId; }
public String getMaternalId() { return maternalId; }
public Sex getSex() { return sex; }
public Number getPhenotype() { return phenotype; }
}
/** Function that accepts a map from sample-name to its sex and creates a PEDFile
* documenting the sexes. Note that the parents are created as UNKNOWNS in this implementation
* as the purpose is only to create a PED file for the sex of the samples, not the whole pedigree
* @param sampleSexes a map from sample-name to its sex
* @return a PedFile object that contains data.
*/
static public PedFile fromSexMap(final Map<String, Sex> sampleSexes) {
final PedFile pedfile = new PedFile(true);
int parentCounter = 1;
for (final Map.Entry<String, Sex> sampleSex : sampleSexes.entrySet()) {
final PedFile.PedTrio ped = pedfile.new PedTrio(
sampleSex.getKey(), sampleSex.getKey(),
"UNKNOWN" + (parentCounter),
"UNKNOWN" + (parentCounter + 1),
sampleSex.getValue(), PedFile.NO_PHENO);
parentCounter += 2;
pedfile.add(ped);
}
return pedfile;
}
}