/*
* Copyright (c) 2007-2011 by The Broad Institute of MIT and Harvard. All Rights Reserved.
*
* This software is licensed under the terms of the GNU Lesser General Public License (LGPL),
* Version 2.1 which is available at http://www.opensource.org/licenses/lgpl-2.1.php.
*
* THE SOFTWARE IS PROVIDED "AS IS." THE BROAD AND MIT MAKE NO REPRESENTATIONS OR
* WARRANTES OF ANY KIND CONCERNING THE SOFTWARE, EXPRESS OR IMPLIED, INCLUDING,
* WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
* PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER
* OR NOT DISCOVERABLE. IN NO EVENT SHALL THE BROAD OR MIT, OR THEIR RESPECTIVE
* TRUSTEES, DIRECTORS, OFFICERS, EMPLOYEES, AND AFFILIATES BE LIABLE FOR ANY DAMAGES
* OF ANY KIND, INCLUDING, WITHOUT LIMITATION, INCIDENTAL OR CONSEQUENTIAL DAMAGES,
* ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER
* THE BROAD OR MIT SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT
* SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING.
*/
package org.broad.igv.peaks;
import org.broad.igv.tdf.BufferedByteWriter;
import org.broad.igv.util.CompressionUtils;
import htsjdk.tribble.util.LittleEndianOutputStream;
import java.io.*;
import java.util.*;
/**
* A utility class to convert a set of time-course bed files to a binary "peaks" file.
*
* @author jrobinso
* @date Apr 22, 2011
*/
public class BedToPeaks {
private static Map<String, String> colorMap;
private static CompressionUtils compressionUtils;
public BedToPeaks() {
compressionUtils = new CompressionUtils();
}
/**
* Converts a collection of peak "bed" files to a single binary "peak" file
* <p/>
* Note: All input bed files must have identical structure. Specifically
* identical number of lines
* for any line number, identical loci (chr, start, end)
* identical placement of track lines and comments ("#" lines)
*/
public static void createBinaryPeakFile(String factor, List<Integer> times, List<File> bedFiles, File outputDir, String root) throws IOException {
String c = colorMap.get(factor);
if (c == null) {
System.out.println("No color found for " + factor);
c = "0,0,150";
}
String peeksFileName = factor + ".peak.bin";
File peeksFile = new File(outputDir, peeksFileName);
BufferedReader[] readers = new BufferedReader[bedFiles.size()];
LittleEndianOutputStream peakWriter = null;
long indexPosition = 0l;
try {
for (int i = 0; i < bedFiles.size(); i++) {
readers[i] = new BufferedReader(new FileReader(bedFiles.get(i)));
}
peakWriter = new LittleEndianOutputStream(new BufferedOutputStream(new FileOutputStream(peeksFile)));
LinkedHashMap<String, Long> chrIndex = new LinkedHashMap<String, Long>();
String[] line = new String[readers.length];
// Placeholder for index position
peakWriter.writeLong(0l);
// Track line
peakWriter.writeString("track name=" + factor + " sample=" + factor + " viewLimits=0:100 useScore=1 color=" + c);
// Time values
int nTimePoints = times.size();
peakWriter.writeInt(nTimePoints);
for (int i = 0; i < times.size(); i++) {
peakWriter.writeInt(times.get(i));
}
// Path to associated signal ("tdf") files
String tdf = root + "/tdf/compressed/" + factor + ".merged.bam.tdf";
peakWriter.writeString(tdf);
for (int t : times) {
peakWriter.writeString(root + "/tdf/timecourses/" + factor + "_" + t + "/" + factor + "_" + t + ".merged.bam.tdf");
}
// Process bed records
String currentChr = "";
List<PeakRecord> records = new ArrayList(20000);
while (true) {
for (int i = 0; i < readers.length; i++) {
line[i] = readers[i].readLine();
if (line[i] == null) {
// EOF, we're done
if (records.size() > 0) {
writeChromosomeData(peakWriter, chrIndex, currentChr, records);
}
break;
}
}
if (line[0].startsWith("#") || line[0].startsWith("track")) {
continue; // Skip these lines. Note we should actually check all files
}
// Take locus from first file
String[] tokens = line[0].split("\t");
String chr = tokens[0];
if (!chr.equals(currentChr) && records.size() > 0) {
// End of data for current chromosome.
// Record position of start of this chromosome block for index
writeChromosomeData(peakWriter, chrIndex, currentChr, records);
}
if (chr == null) {
break;
}
currentChr = chr;
int start = Integer.parseInt(tokens[1]);
int end = Integer.parseInt(tokens[2]);
float score = Float.parseFloat(tokens[4]);
float[] timeScores = new float[nTimePoints];
for (int i = 0; i < nTimePoints; i++) {
tokens = line[i + 1].split("\t");
if (!(tokens[0].equals(chr) &&
Integer.parseInt(tokens[1]) == start &&
Integer.parseInt(tokens[2]) == end)) {
throw new RuntimeException("Unordered files");
}
timeScores[i] = Float.parseFloat(tokens[4]);
}
records.add(new PeakRecord(start, end, score, timeScores));
}
// Write out index
indexPosition = peakWriter.getWrittenCount();
peakWriter.writeInt(chrIndex.size());
for (Map.Entry<String, Long> entry : chrIndex.entrySet()) {
peakWriter.writeString(entry.getKey());
peakWriter.writeLong(entry.getValue());
}
} finally {
if (peakWriter != null) {
peakWriter.close();
writeIndexPosition(peeksFile, indexPosition);
}
for (BufferedReader reader : readers) {
reader.close();
}
}
}
private static void writeChromosomeData(LittleEndianOutputStream peakWriter,
LinkedHashMap<String, Long> chrIndex,
String currentChr,
List<PeakRecord> records) throws IOException {
chrIndex.put(currentChr, peakWriter.getWrittenCount());
// Compress data for chromosome, then write it out.
BufferedByteWriter buffer = new BufferedByteWriter(100000);
buffer.putNullTerminatedString(currentChr);
buffer.putInt(records.size());
for (PeakRecord record : records) {
buffer.putInt(record.start);
buffer.putInt(record.end);
buffer.putFloat(record.score);
for (int i = 0; i < record.timeScores.length; i++) {
buffer.putFloat(record.timeScores[i]);
}
}
byte[] bytes = buffer.getBytes();
bytes = compressionUtils.compress(bytes);
peakWriter.writeInt(bytes.length);
peakWriter.write(bytes);
records.clear();
}
static class PeakRecord {
int start;
int end;
float score;
float[] timeScores;
PeakRecord(int start, int end, float score, float[] timeScores) {
this.start = start;
this.end = end;
this.score = score;
this.timeScores = timeScores;
}
}
static void writeIndexPosition(File file, long indexPosition) throws IOException {
RandomAccessFile raf = null;
try {
raf = new RandomAccessFile(file, "rw");
raf.getChannel().position(0);
// Write as little endian
BufferedByteWriter buffer = new BufferedByteWriter();
buffer.putLong(indexPosition);
raf.write(buffer.getBytes());
raf.close();
} catch (IOException ex) {
ex.printStackTrace();
} finally {
if (raf != null) raf.close();
}
}
/**
* Convenience method for processing files from the HT-Chip paper.
*
* @param rootDir
* @param destDir
* @param root
* @throws IOException
*/
public static void convertAll(File rootDir, File destDir, String root) throws IOException {
// <FACTOR>.peaks.filtered.by.fold.real.sorted.bed
// <FACTOR>_0.peaks.filtered.by.fold.real.sorted.bed
int[] allTimes = {0, 30, 60, 120};
File factorFile = new File(rootDir, "factors.txt");
BufferedReader reader = new BufferedReader(new FileReader(factorFile));
String factor;
while ((factor = reader.readLine()) != null) {
factor = factor.trim();
List<File> bedFiles = new ArrayList();
File bedFile = new File(rootDir, factor + ".peaks.filtered.by.fold.real.sorted.bed");
if (bedFile.exists()) {
//System.out.println("Adding " + bedFile.getName());
bedFiles.add(bedFile);
} else {
System.out.println("Can't find " + bedFile.getName());
}
List<Integer> times = new ArrayList<Integer>();
for (int i = 0; i < allTimes.length; i++) {
bedFile = new File(rootDir, factor + "_" + allTimes[i] + ".peaks.filtered.by.fold.real.sorted.bed");
if (bedFile.exists()) {
//System.out.println("Copying " + bedFile.getName());
bedFiles.add(bedFile);
times.add(allTimes[i]);
} else {
System.out.println("Can't find " + bedFile.getName());
}
}
createBinaryPeakFile(factor, times, bedFiles, destDir, root);
}
reader.close();
}
public static Map<String, String> loadColors(String file) throws IOException {
BufferedReader reader = new BufferedReader(new FileReader(file));
String line;
Map<String, String> map = new HashMap();
while ((line = reader.readLine()) != null) {
String[] tokens = line.split("\t");
map.put(tokens[0], tokens[1]);
}
reader.close();
return map;
}
/**
* Process Broad ht-chip files.
* <p/>
* timePoints=0,30,60,120\
*
* @param args
* @throws IOException
*/
public static void main(String[] args) throws IOException {
File inputDir = new File(args[0] + "bed/");
File destDir = new File(args[0] + args[1]);
String root = args[2];
colorMap = loadColors(args[0] + "colors.txt");
convertAll(inputDir, destDir, root);
}
}