/*
* Copyright (c) 2007-2012 The Broad Institute, Inc.
* SOFTWARE COPYRIGHT NOTICE
* This software and its documentation are the copyright of the Broad Institute, Inc. All rights are reserved.
*
* This software is supplied without any warranty or guaranteed support whatsoever. The Broad Institute is not responsible for its use, misuse, or functionality.
*
* This software is licensed under the terms of the GNU Lesser General Public License (LGPL),
* Version 2.1 which is available at http://www.opensource.org/licenses/lgpl-2.1.php.
*/
package org.broad.igv.tools;
import org.broad.igv.AbstractHeadlessTest;
import org.broad.igv.data.WiggleDataset;
import org.broad.igv.data.WiggleParser;
import org.broad.igv.feature.LocusScore;
import org.broad.igv.feature.genome.Genome;
import org.broad.igv.feature.tribble.CodecFactory;
import org.broad.igv.tdf.TDFDataSource;
import org.broad.igv.tdf.TDFDataset;
import org.broad.igv.tdf.TDFReader;
import org.broad.igv.tdf.TDFTile;
import org.broad.igv.track.WindowFunction;
import org.broad.igv.util.ResourceLocator;
import org.broad.igv.util.TestUtils;
import htsjdk.tribble.AbstractFeatureReader;
import htsjdk.tribble.Feature;
import htsjdk.tribble.FeatureCodec;
import org.junit.After;
import org.junit.Before;
import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.TestRule;
import org.junit.rules.Timeout;
import java.io.*;
import java.util.*;
import static junit.framework.Assert.*;
public class IGVToolsCountTest extends AbstractHeadlessTest {
IgvTools igvTools;
private static final String hg18id = TestUtils.DATA_DIR + "genomes/hg18.unittest.genome";
private static final int MAX_LINES_CHECK = 200;
@Rule
public TestRule testTimeout = new Timeout((int) 1e3 * 60 * 20);
@Before
public void setUp() throws Exception {
super.setUp();
igvTools = new IgvTools();
}
@After
public void tearDown() throws Exception {
super.tearDown();
igvTools = null;
}
@Test
public void testCountBEDoutTDF() throws Exception {
String inputFile = TestUtils.DATA_DIR + "bed/Unigene.sample.sorted.bed";
tstCountStrandOpts(inputFile, "testtdf", "tdf", null, -1, -1);
}
@Test
public void testCountBEDoutWig() throws Exception {
String inputFile = TestUtils.DATA_DIR + "bed/Unigene.sample.sorted.bed";
tstCountStrandOpts(inputFile, "testwig", "wig", null, -1, -1);
tstCountStrandMapOpts(inputFile, "testwig", "wig", null, -1, -1);
}
@Test
public void testCountBEDoutWigCheckBW() throws Exception {
String inputFile = TestUtils.DATA_DIR + "bed/test2.bed";
String fullout = TestUtils.TMP_OUTPUT_DIR + "twig.wig";
String input = "count " + inputFile + " " + fullout + " " + hg18id;
String[] args = input.split("\\s+");
igvTools.run(args);
//Check file
File outFile = new File(fullout);
assertTrue(outFile.exists());
assertTrue(outFile.canRead());
WiggleParser parser = new WiggleParser(new ResourceLocator(fullout));
WiggleDataset wgs = parser.parse();
assertEquals(3, wgs.getChromosomes().length);
for(String chr: new String[]{"chr1", "chr3", "chr7"}){
assertNotNull(wgs.getData(null, chr));
}
BufferedReader reader = new BufferedReader(new FileReader(outFile));
String line = reader.readLine();
assertTrue(line.contains("wiggle_0"));
//These shouldn't be present in the rest of the file
while((line = reader.readLine()) != null){
assertFalse(line.startsWith("#"));
assertFalse(line.contains("wiggle_0"));
}
}
@Test
public void testCountBEDstdoutWig() throws Exception {
String inputFile = TestUtils.DATA_DIR + "bed/Unigene.sample.sorted.bed";
//Write to a file
String fileOutArg = TestUtils.DATA_DIR + "out/testfilewig.wig";
String input = "count " + inputFile + " " + fileOutArg + " " + hg18id;
String[] args = input.split("\\s+");
igvTools.run(args);
//Write to stdout. We redirect
ByteArrayOutputStream os = new ByteArrayOutputStream();
PrintStream oldOut = System.out;
System.setOut(new PrintStream(os));
input = "count " + inputFile + " " + IgvTools.STDOUT_FILE_STR + " " + hg18id;
args = input.split("\\s+");
(new IgvTools()).run(args);
System.setOut(oldOut);
BufferedReader fiReader = new BufferedReader(new FileReader(fileOutArg));
BufferedReader strReader = new BufferedReader(new InputStreamReader(new ByteArrayInputStream(os.toByteArray())));
String fiLine = null;
String strLine = null;
int lineCount = 0;
while(true){
fiLine = fiReader.readLine();
strLine = strReader.readLine();
assertEquals(fiLine, strLine);
lineCount++;
if(fiLine == null || strLine == null) break;
}
assertTrue(lineCount > 10);
}
@Test
public void testCountBED_region() throws Exception {
String inputFile = TestUtils.DATA_DIR + "bed/Unigene.sample.sorted.bed";
tstCountStrandOpts(inputFile, "testwig", "wig", "chr2", 178709699, 179008373);
tstCountStrandMapOpts(inputFile, "testwig", "wig", "chr2", 178709699, 179008373);
}
@Test
public void testCountSAM() throws Exception {
String inputFile = TestUtils.DATA_DIR + "sam/test_2.sam";
tstCountStrandOpts(inputFile, "testwig", "wig", null, -1, -1);
}
@Test
public void testCountBAM() throws Exception {
String inputFile = TestUtils.DATA_DIR + "bam/NA12878.SLX.sample.bam";
tstCountStrandOpts(inputFile, "testwig", "wig", null, -1, -1);
tstCountStrandMapOpts(inputFile, "testwig", "wig", null, -1, -1);
}
public void tstCountStrandOpts(String inputFile, String outputBase, String outputExt,
String chr, int start, int end) throws Exception {
String[] opts = new String[]{"--bases", "--strands=read", "--strands=first", "--strands=read --bases"};
tstCountOptsGen(inputFile, outputBase, outputExt, chr, start, end, "", opts);
}
public void tstCountStrandMapOpts(String inputFile, String outputBase, String outputExt,
String chr, int start, int end) throws Exception {
String refOpt = "--minMapQuality 1";
String[] opts = new String[]{"--strands=read " + refOpt, "--bases " + refOpt};
tstCountOptsGen(inputFile, outputBase, outputExt, chr, start, end, refOpt, opts);
}
/**
* Compare the output of count with the various options against the output of {@code refOpt}. That is,
* we assume the row total for each will be equal.
*
* @param inputFile
* @param outputBase
* @param outputExt
* @param chr
* @param start
* @param end
* @param opts
*/
public void tstCountOptsGen(String inputFile, String outputBase, String outputExt,
String chr, int start, int end, String refOpt, String[] opts) throws Exception {
boolean query = chr != null && start >= 0 && end >= start + 1;
String outputFile = TestUtils.TMP_OUTPUT_DIR + outputBase + "_";
Map<String, float[]> rowTotals = new HashMap<String, float[]>(opts.length + 1);
String[] allOpts = new String[opts.length + 1];
allOpts[0] = refOpt;
System.arraycopy(opts, 0, allOpts, 1, opts.length);
for (int ind = 0; ind < allOpts.length; ind++) {
String opt = allOpts[ind];
int winsize = 5;
if (query) {
opt += " --windowSize " + winsize + " --query " + chr + ":" + start + "-" + end;
}
//In case we modify the option for querying as above
if (ind == 0) refOpt = opt;
String fullout = outputFile + ind + "." + outputExt;
String input = "count " + opt + " " + inputFile + " " + fullout + " " + hg18id;
String[] args = input.split("\\s+");
igvTools.run(args);
if (outputExt.equals("tdf")) {
TDFReader reader = TDFReader.getReader(fullout);
assertTrue(reader.getDatasetNames().size() > 0);
if (query) {
for (String name : reader.getDatasetNames()) {
TDFDataset ds = reader.getDataset(name);
List<TDFTile> tiles = ds.getTiles();
for (TDFTile tile : tiles) {
assertTrue(tile.getTileStart() >= start);
assertTrue(tile.getTileStart() < end);
}
}
}
} else {
File outFile = new File(fullout);
assertTrue(outFile.exists());
assertTrue(outFile.canRead());
ResourceLocator locator = new ResourceLocator(fullout);
WiggleDataset ds = (new WiggleParser(locator, IgvTools.loadGenome(hg18id))).parse();
//We miss a few alignments with this option sometimes,
//so it doesn't total up the same
if (!opt.contains("strands=first")) {
float[] linetotals = getLineTotals(fullout);
rowTotals.put(opt, linetotals);
}
if (query) {
assertEquals(1, ds.getChromosomes().length);
assertEquals(chr, ds.getChromosomes()[0]);
int[] starts = ds.getStartLocations(chr);
for (Integer act_start : starts) {
assertTrue(act_start + " is outside range", act_start >= start - winsize && act_start < end + winsize);
}
}
}
}
//Compare row totals
//They should all add up the same
float[] refTotals = rowTotals.get(refOpt);
for (String opt : rowTotals.keySet()) {
if (opt.equals(refOpt)) {
continue;
}
float[] testTotals = rowTotals.get(opt);
assertEquals(refTotals.length, testTotals.length);
for (int rw = 0; rw < refTotals.length; rw++) {
float diff = refTotals[rw] - testTotals[rw];
String msg = "Difference between " + refTotals[rw] + " and " + testTotals[rw] + " too high. ";
msg += "Row " + rw + ". Test option " + opt;
//This can get pretty high, we only use 2 digits of precision
//Also test this in CoverageCounterTest
assertTrue(msg, Math.abs(diff) <= 0.1);
}
}
}
/**
* Calculates the sum of each row, excluding the first column.
* Skips non-numeric rows
*
* @param filename
* @return
*/
private float[] getLineTotals(String filename) throws Exception {
BufferedReader reader = new BufferedReader(new FileReader(filename));
String line = "";
float tmpsum;
List<Float> sums = new ArrayList<Float>();
while ((line = reader.readLine()) != null && sums.size() < MAX_LINES_CHECK) {
//Skip header lines
if(line.startsWith("#") || line.contains("Step") || line.contains("wiggle_0")) continue;
try {
String[] tokens = line.split("\\t");
tmpsum = 0;
for (int ii = 1; ii < tokens.length; ii++) {
tmpsum += Float.parseFloat(tokens[ii]);
}
sums.add(tmpsum);
} catch (NumberFormatException e) {
continue;
}
}
reader.close();
float[] toret = new float[sums.size()];
for (int ii = 0; ii < sums.size(); ii++) {
toret[ii] = sums.get(ii);
}
return toret;
}
/**
* Test counting from a BAM.list file. Note that if the paths
* in the bam.list are relative, they are interpreted as relative to
* the location of the bam.list file, rather than the working directory.
* <p/>
* If paths are entered on the command line (option to be deprecated shortly)
* via comma separated list, they are interpreted relative to the current working directory.
*
* @throws Exception
*/
@Test
public void testCountBAMList() throws Exception {
String listPath = TestUtils.DATA_DIR + "bam/test.unindexed.bam.list";
tstCountBamList(listPath);
}
private void tstCountBamList(String listArg) throws Exception {
String outputFile = TestUtils.DATA_DIR + "out/file_";
String[] opts = new String[]{"--strands=read", "--strands=first", ""};
for (int ind = 0; ind < opts.length; ind++) {
String opt = opts[ind];
String fullout = outputFile + ind + ".tdf";
String input = "count " + opt + " " + listArg + " " + fullout + " " + hg18id;
String[] args = input.split("\\s+");
igvTools.run(args);
TDFReader reader = TDFReader.getReader(fullout);
assertTrue(reader.getDatasetNames().size() > 0);
}
}
@Test
public void testCountDups() throws Exception {
String inputFiname = "test_5duplicates";
String ext = ".sam";
String inputFile = TestUtils.DATA_DIR + "sam/" + inputFiname + ext;
String outputFileND = TestUtils.TMP_OUTPUT_DIR + inputFiname + "_nodups" + ".tdf";
String outputFileWithDup = TestUtils.TMP_OUTPUT_DIR + inputFiname + "_withdups" + ".tdf";
String queryChr = "1";
int pos = 9718611;
String queryStr = queryChr + ":" + (pos - 100) + "-" + (pos + 100) + " ";
String cmd_nodups = "count --windowSize 1 -z 7 --query " + queryStr + inputFile + " " + outputFileND + " " + hg18id;
igvTools.run(cmd_nodups.split("\\s+"));
String cmd_withdups = "count --includeDuplicates -z 7 --windowSize 1 --query " + queryStr + inputFile + " " + outputFileWithDup + " " + hg18id;
igvTools.run(cmd_withdups.split("\\s+"));
assertTrue((new File(outputFileND).exists()));
assertTrue((new File(outputFileWithDup).exists()));
Genome genome = TestUtils.loadGenome();
//Have to read back in using aliased chromosome names
String readChr = genome.getChromosomeAlias(queryChr);
int noDupCount = (int) getCount(outputFileND, readChr, 23, pos, genome);
int dupCount = (int) getCount(outputFileWithDup, readChr, 23, pos, genome);
assertEquals(noDupCount + 4, dupCount);
//No dups at this location
pos += 80;
noDupCount = (int) getCount(outputFileND, readChr, 23, pos, genome);
dupCount = (int) getCount(outputFileWithDup, readChr, 23, pos, genome);
assertEquals(noDupCount, dupCount);
}
private float getCount(String filename, String chr, int zoom, int pos, Genome genome) {
TDFReader reader = TDFReader.getReader(filename);
TDFDataset ds = reader.getDataset(chr, zoom, WindowFunction.mean);
TDFDataSource dataSource = new TDFDataSource(reader, 0, "test", genome);
List<LocusScore> scores = dataSource.getSummaryScoresForRange(chr, pos - 1, pos + 1, zoom);
return scores.get(0).getScore();
}
@Test
public void testCountAliasedForward() throws Exception {
String genfile = TestUtils.DATA_DIR + "genomes/hg18_truncated_aliased.genome";
tstCountAliased(TestUtils.DATA_DIR + "sam/NA12878.muc1.test.sam",
TestUtils.DATA_DIR + "sam/NA12878.muc1.test_modchr.sam",
genfile);
}
@Test
public void testCountAliasedReversed() throws Exception {
String genfile = TestUtils.DATA_DIR + "genomes/hg18_truncated_aliased_reversed.genome";
tstCountAliased(TestUtils.DATA_DIR + "sam/NA12878.muc1.test.sam",
TestUtils.DATA_DIR + "sam/NA12878.muc1.test_modchr.sam",
genfile);
}
/**
* Test count when using a custom alias file.
* Should supply a file using normal chromosome names, and aliases
* which are defined in the genome file.
*/
public void tstCountAliased(String normfile, String aliasedfile, String genfile) throws Exception {
String outfile = TestUtils.DATA_DIR + "out/tmpcount1.wig";
File outFi = new File(outfile);
outFi.delete();
outFi.deleteOnExit();
//Count aliased file
String command = "count " + normfile + " " + outfile + " " + genfile;
igvTools.run(command.split("\\s+"));
//Count non-aliased file
String outfile2 = TestUtils.DATA_DIR + "out/tmpcount2.wig";
File outFi2 = new File(outfile2);
outFi2.delete();
//Count aliased file
command = "count " + aliasedfile + " " + outfile2 + " " + genfile;
igvTools = new IgvTools();
igvTools.run(command.split("\\s+"));
BufferedReader reader1 = new BufferedReader(new FileReader(outfile));
BufferedReader reader2 = new BufferedReader(new FileReader(outfile2));
String line1, line2;
line1 = reader1.readLine();
line2 = reader2.readLine();
while ((line1 = reader1.readLine()) != null) {
line2 = reader2.readLine();
//Header lines don't need to match
if (line1.startsWith("#") || line1.startsWith("variableStep")
|| line1.startsWith("fixedStep")) {
continue;
}
assertEquals(line1, line2);
}
reader1.close();
reader2.close();
}
@Test
public void testCountIndexedFasta() throws Exception {
String fasta_file = TestUtils.DATA_DIR + "fasta/ecoli_out.padded.fasta";
String infile = TestUtils.DATA_DIR + "bed/ecoli_out.test.bed";
String outfile = TestUtils.DATA_DIR + "out/findextest.wig";
String end_command = infile + " " + outfile + " " + fasta_file;
String count_command = "count " + end_command;
igvTools.run(count_command.split("\\s"));
File outFi = new File(outfile);
assertTrue(outFi.exists());
outFi.delete();
igvTools = new IgvTools();
String index_command = "index " + end_command;
igvTools.run(index_command.split("\\s"));
String indexFile = infile + ".idx";
File idxFi = new File(indexFile);
assertTrue(idxFi.exists());
idxFi.deleteOnExit();
Genome genome = IgvTools.loadGenome(fasta_file);
FeatureCodec codec = CodecFactory.getCodec(infile, genome);
AbstractFeatureReader<Feature, ?> reader = AbstractFeatureReader.getFeatureReader(infile, codec, true);
String chr = "NC_000913_bb";
Iterator<Feature> features = reader.query(chr, 5085, 5091);
int count = 0;
while (features.hasNext() && count < 100) {
assertEquals(chr, features.next().getChr());
count++;
}
assertEquals(3, count);
}
@Test
public void testCountAllWF() throws Exception {
String[] exclude = {"none", "density", "stddev", "count"};
List<WindowFunction> wfList = new ArrayList<WindowFunction>();
wfList.addAll(Arrays.asList(WindowFunction.values()));
for (String s : exclude) {
wfList.remove(WindowFunction.valueOf(s));
}
String inputFile = TestUtils.DATA_DIR + "bam/NA12878.SLX.sample.bam";
tstCountWindowFunctions(inputFile, "All", wfList);
}
private void tstCountWindowFunctions(String inputFile, String chr, Iterable<WindowFunction> windowFunctions) throws Exception {
String outputFile = TestUtils.DATA_DIR + "out/testCountWindowFunctions.tdf";
String wfs = "";
for (WindowFunction wf : windowFunctions) {
wfs += wf.name() + ",";
}
wfs = wfs.substring(0, wfs.lastIndexOf(","));
String[] cmd = {"count", "--windowFunctions", wfs, inputFile, outputFile, hg18id};
igvTools.run(cmd);
TDFReader reader = new TDFReader(new ResourceLocator(outputFile));
for (WindowFunction wf : windowFunctions) {
TDFDataset ds = reader.getDataset(chr, 0, wf);
assertNotNull(ds);
}
}
}