/*
* Copyright (c) 2007-2012 The Broad Institute, Inc.
* SOFTWARE COPYRIGHT NOTICE
* This software and its documentation are the copyright of the Broad Institute, Inc. All rights are reserved.
*
* This software is supplied without any warranty or guaranteed support whatsoever. The Broad Institute is not responsible for its use, misuse, or functionality.
*
* This software is licensed under the terms of the GNU Lesser General Public License (LGPL),
* Version 2.1 which is available at http://www.opensource.org/licenses/lgpl-2.1.php.
*/
package org.broad.igv.tools;
import org.broad.igv.AbstractHeadlessTest;
import org.broad.igv.data.Dataset;
import org.broad.igv.data.expression.ExpressionFileParser;
import org.broad.igv.feature.FeatureDB;
import org.broad.igv.feature.genome.FastaIndex;
import org.broad.igv.feature.genome.Genome;
import org.broad.igv.sam.Alignment;
import org.broad.igv.sam.reader.AlignmentReader;
import org.broad.igv.sam.reader.AlignmentReaderFactory;
import org.broad.igv.sam.reader.FeatureIndex;
import org.broad.igv.sam.reader.SamUtils;
import org.broad.igv.tdf.TDFDataset;
import org.broad.igv.tdf.TDFReader;
import org.broad.igv.tdf.TDFTile;
import org.broad.igv.tools.sort.SorterTest;
import org.broad.igv.util.FileUtils;
import org.broad.igv.util.ResourceLocator;
import org.broad.igv.util.TestUtils;
import htsjdk.tribble.AbstractFeatureReader;
import htsjdk.tribble.FeatureCodec;
import htsjdk.tribble.index.Block;
import htsjdk.tribble.index.Index;
import htsjdk.tribble.index.IndexFactory;
import htsjdk.variant.variantcontext.VariantContext;
import htsjdk.variant.vcf.VCF3Codec;
import htsjdk.variant.vcf.VCFCodec;
import org.junit.*;
import org.junit.rules.TestRule;
import org.junit.rules.Timeout;
import java.io.*;
import java.util.*;
import static junit.framework.Assert.*;
public class IGVToolsTest extends AbstractHeadlessTest {
IgvTools igvTools;
private static final String hg18id = TestUtils.DATA_DIR + "genomes/hg18.unittest.genome";
private static final int MAX_LINES_CHECK = 200;
@Rule
public TestRule testTimeout = new Timeout((int) 1e3 * 60 * 20);
@Before
public void setUp() throws Exception {
super.setUp();
igvTools = new IgvTools();
}
@After
public void tearDown() throws Exception {
super.tearDown();
igvTools = null;
}
private String doStandardIndex(String inputFile, String expectedExtension) throws IOException {
String indDir = TestUtils.TMP_OUTPUT_DIR;
TestUtils.clearOutputDir();
String indPath = igvTools.doIndex(inputFile, indDir, IgvTools.LINEAR_INDEX, IgvTools.LINEAR_BIN_SIZE);
File indFile = new File(indPath);
//Check that only the index file we intended exists
assertTrue(indFile.exists());
assertTrue(indPath.endsWith(expectedExtension));
final Set<String> exts = new HashSet<String>();
for (String ext : new String[]{".idx", ".sai", ".bai", ".fai"}) {
exts.add(ext);
}
File[] files = (new File(indDir)).listFiles(new FilenameFilter() {
@Override
public boolean accept(File dir, String name) {
return exts.contains((Preprocessor.getExtension(name)));
}
});
assertEquals("Extra files in output directory", 1, files.length);
return indFile.getAbsolutePath();
}
@Test
public void testIndexSam() throws Exception {
String samFile = TestUtils.DATA_DIR + "sam/NA12878.muc1.test2.sam";
String samFileIdx = doStandardIndex(samFile, "sai");
FeatureIndex idx = SamUtils.getIndexFor(samFile);
assertTrue(idx.containsChromosome("chr1"));
assertEquals(1, idx.getIndexedChromosomes().size());
}
@Test
public void testIndexFasta() throws Exception {
String inFile = TestUtils.DATA_DIR + "fasta/ecoli_out.padded2.fasta";
String indPath = doStandardIndex(inFile, "fai");
FastaIndex index = new FastaIndex(indPath);
assertEquals(1, index.getSequenceNames().size());
assertNotNull(index.getIndexEntry("NC_000913_bb"));
}
@Test
public void testLinearIndex() throws IOException {
String bedFile = TestUtils.DATA_DIR + "bed/test.bed";
String idxPath = doStandardIndex(bedFile, "idx");
Index idx = IndexFactory.loadIndex(idxPath);
List<Block> blocks = idx.getBlocks("chr1", 100, 200);
Block block = blocks.get(0);
assertEquals("Unexpected start position ", 0, block.getStartPosition());
}
@Test
public void testIntervalIndex33() throws Exception {
String testFile = TestUtils.LARGE_DATA_DIR + "CEU.SRP000032.2010_03_v3.3.genotypes.head.vcf";
FeatureCodec codec = new VCF3Codec();
tstIntervalIndex(testFile, codec);
}
@Test
public void testIntervalIndex40() throws Exception {
String testFile = TestUtils.LARGE_DATA_DIR + "CEU.SRP000032.2010_03_v4.0.genotypes.head.vcf";
FeatureCodec codec = new VCFCodec();
tstIntervalIndex(testFile, codec);
}
private void tstIntervalIndex(String testFile, FeatureCodec codec) throws IOException {
// Create an interval tree index with 5 features per interval
File indexFile = new File(testFile + ".idx");
if (indexFile.exists()) {
indexFile.delete();
}
igvTools.doIndex(testFile, null, 2, 5);
indexFile.deleteOnExit();
// Now use the index
String chr = "1";
int start = 1718546;
int end = 1748915;
int[] expectedStarts = {1718547, 1718829, 1723079, 1724830, 1731376, 1733967, 1735586, 1736016, 1738594,
1739272, 1741124, 1742815, 1743224, 1748886, 1748914};
AbstractFeatureReader bfr = AbstractFeatureReader.getFeatureReader(testFile, codec);
Iterator<VariantContext> iter = bfr.query(chr, start, end);
int count = 0;
while (iter.hasNext()) {
VariantContext feat = iter.next();
int expStart = expectedStarts[count];
assertEquals(expStart, feat.getStart());
count++;
}
Assert.assertEquals(15, count);
}
@Test
public void testVersion() throws IOException {
String[] args = {"version"};
//IgvTools.main(args);
igvTools.run(args);
}
@Test
public void testTileWigFile() throws IOException {
String inputFile = TestUtils.DATA_DIR + "wig/phastCons_chr1_small.wig";
testTile(inputFile, 300, 1300);
}
@Test
public void testTileCNFile() throws IOException {
String inputFile = TestUtils.DATA_DIR + "cn/HindForGISTIC.hg16.cn";
testTile(inputFile, 5000000, 5500000);
}
@Test
public void testTileGCT_01() throws IOException {
String inputFile = TestUtils.DATA_DIR + "gct/OV.transcriptome__agilentg4502.data.txt";
String outFilePath = TestUtils.DATA_DIR + "out/testTileGCT.wig";
String[] args = {"tile", "-z", "1", "--fileType", "mage-tab", inputFile, outFilePath, hg18id};
igvTools.run(args);
}
@Test
public void testTileGCT_02() throws IOException {
String inputFile = TestUtils.DATA_DIR + "gct/GBM.methylation__sampled.data.txt";
String outFilePath = TestUtils.DATA_DIR + "out/testTileGCT.wig";
String[] args = new String[]{"tile", "-z", "1", "--fileType", "mage-tab", inputFile, outFilePath, hg18id};
igvTools.run(args);
}
private void testTile(String inputFile, int start, int end) throws IOException {
String file1 = TestUtils.DATA_DIR + "out/file1.tdf";
String file2 = TestUtils.DATA_DIR + "out/file2.tdf";
//todo Compare 2 outputs more meaningfully
String[] args = {"toTDF", "-z", "1", "--windowFunctions", "min", inputFile, file1, hg18id};
igvTools.run(args);
FeatureDB.clearFeatures();
Runtime.getRuntime().gc();
args = new String[]{"toTDF", "-z", "1", "--windowFunctions", "max", inputFile, file2, hg18id};
igvTools.run(args);
String dsName = "/chr1/raw";
TDFDataset ds1 = TDFReader.getReader(file1).getDataset(dsName);
TDFDataset ds2 = TDFReader.getReader(file2).getDataset(dsName);
TDFTile t1 = ds1.getTiles(start, end).get(0);
TDFTile t2 = ds2.getTiles(start, end).get(0);
int nPts = t1.getSize();
assertEquals(nPts, t2.getSize());
for (int i = 0; i < nPts; i++) {
assertTrue(t1.getStartPosition(i) < t1.getEndPosition(i));
assertEquals(t1.getStartPosition(i), t2.getStartPosition(i));
assertTrue(t1.getValue(0, i) <= t2.getValue(0, i));
if (i < nPts - 1) {
assertTrue(t1.getStartPosition(i) < t1.getStartPosition(i + 1));
}
}
(new File(file1)).delete();
(new File(file2)).delete();
}
/**
* Calculates the sum of each row, excluding the first column.
* Skips non-numeric rows
*
* @param filename
* @return
*/
private float[] getLineTotals(String filename) throws Exception {
BufferedReader reader = new BufferedReader(new FileReader(filename));
String line = "";
float tmpsum;
List<Float> sums = new ArrayList<Float>();
while ((line = reader.readLine()) != null && sums.size() < MAX_LINES_CHECK) {
try {
String[] tokens = line.split("\\t");
tmpsum = 0;
for (int ii = 1; ii < tokens.length; ii++) {
tmpsum += Float.parseFloat(tokens[ii]);
}
sums.add(tmpsum);
} catch (NumberFormatException e) {
continue;
}
}
reader.close();
float[] toret = new float[sums.size()];
for (int ii = 0; ii < sums.size(); ii++) {
toret[ii] = sums.get(ii);
}
return toret;
}
/**
* Test iterating through a merged bam file(actually a list with the same file duplicated). Each record
* should appear twice (since the list contains the same file twice), and in coordinate sort order.
*
* @throws Exception
*/
@Test
public void testIterateMergedBam() throws Exception {
String listPath = TestUtils.DATA_DIR + "bam/test.unindexed.bam.list";
AlignmentReader reader = AlignmentReaderFactory.getReader(new ResourceLocator(listPath), false);
Set<String> visitedChromosomes = new HashSet();
String lastChr = null;
int lastStart = -1;
Iterator<Alignment> iter = reader.iterator();
while (iter.hasNext()) {
Alignment a1 = iter.next();
Alignment a2 = iter.next();
assertEquals(a1.getReadName(), a2.getReadName());
assertEquals(a1.getChr(), a2.getChr());
assertEquals(a1.getStart(), a2.getStart());
assertEquals(a2.getEnd(), a2.getEnd());
String chr = a1.getChr();
int start = a1.getAlignmentStart();
if (lastChr != null && chr.equals(lastChr)) {
assertTrue(a1.getReadName(), start >= lastStart);
} else {
assertFalse(visitedChromosomes.contains(chr));
lastChr = chr;
visitedChromosomes.add(chr);
}
lastStart = start;
}
}
@Test
public void testSort() throws Exception {
tstSort(false);
}
@Test
public void testSortStdout() throws Exception {
tstSort(true);
}
public void tstSort(boolean writeToStdOut) throws Exception {
String inputFiname = "Unigene.unsorted.bed";
String inputFile = TestUtils.DATA_DIR + "bed/" + inputFiname;
String outputFile = TestUtils.TMP_OUTPUT_DIR + inputFiname + ".sorted";
File oFile = new File(outputFile);
oFile.deleteOnExit();
String outputArg = outputFile;
//This looks a bit funny, but for ease of testing we redirect stdout to a file
//Mostly just concerned about spurious log statements getting into the file
if(writeToStdOut){
System.setOut(new PrintStream(new FileOutputStream(oFile)));
outputArg = IgvTools.STDOUT_FILE_STR;
}
String input = "sort --tmpDir=./ --maxRecords=50 " + inputFile + " " + outputArg;
igvTools.run(input.split("\\s+"));
int numlines = SorterTest.checkFileSorted(oFile, 0, 1, 0);
assertEquals(71, numlines);
}
/**
* This test could stand to be improved, but it's difficult to test math.
* So we just check that file is about the right size (and well formed).
*
* @throws Exception
*/
@Test
public void testFormatexp() throws Exception {
String inputFiname = "igv_test2";
String ext = ".gct";
String inputFile = TestUtils.DATA_DIR + "gct/" + inputFiname + ext;
String outputFile = TestUtils.TMP_OUTPUT_DIR + inputFiname + "_formatted" + ext;
File oFile = new File(outputFile);
oFile.deleteOnExit();
String input = "formatexp " + inputFile + " " + outputFile;
igvTools.run(input.split("\\s+"));
Genome genome = TestUtils.loadGenome();
ExpressionFileParser parser = new ExpressionFileParser(new ResourceLocator(outputFile), null, genome);
Dataset ds = parser.createDataset();
assertEquals(10, ds.getChromosomes().length);
}
@Ignore("Missing data file")
@Test
public void testTileMageTab() throws Exception {
String mageTabFile = TestUtils.DATA_DIR + "mage-tab/test.data.txt";
String outputFile = TestUtils.DATA_DIR + "mage-tab/test.data.tdf";
String genfile = TestUtils.DATA_DIR + "genomes/hg18_truncated_aliased.genome";
String command = "tile -z 1 --fileType mage-tab " + mageTabFile + " " + outputFile + " " + genfile;
igvTools.run(command.split("\\s+"));
}
public static String[] generateRepLargebamsList(String listPath, String bamFiName, int reps) throws IOException {
return generateRepLargebamsList(listPath, bamFiName, reps, false);
}
/*
Generate a bam.list file, with rep entries, all having the same content bamPath.
If makeAbsolute is true and bamPath not absolute, the listPath parent directory is prepended.
The file is saved to listPath
*/
public static String[] generateRepLargebamsList(String listPath, String bamPath, int reps, boolean makeAbsolute) throws IOException {
File listFile = new File(listPath);
listFile.delete();
listFile.deleteOnExit();
File f = new File(bamPath);
String eachPath = null;
if (makeAbsolute && !f.isAbsolute()) {
eachPath = FileUtils.getAbsolutePath(bamPath, listPath);
} else {
eachPath = f.getPath();
}
//We generate the file on each test, because largedata dir can change
List<String> largebams = new ArrayList<String>(reps);
for (int ii = 0; ii < reps; ii++) {
largebams.add(eachPath);
}
FileWriter writer = new FileWriter(listFile);
for (String s : largebams) {
writer.write(s + "\n");
}
writer.close();
return largebams.toArray(new String[0]);
}
}