/*
* The MIT License
*
* Copyright (c) 2011 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
package picard.util;
import htsjdk.samtools.util.FormatUtil;
import htsjdk.samtools.util.IOUtil;
import org.testng.Assert;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import picard.PicardException;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.List;
public class TextFileParsersTest {
private static final String testFile1 = "testdata/picard/util/whitespace_text_file.txt";
private static final String testFile2 = "testdata/picard/util/all_ones_text_file.txt";
private static final String testFile3 = "testdata/picard/util/no_grouping_file.txt";
private static final String testFile4 = "testdata/picard/util/tabbed_text_file.txt";
// There is a comment in the file data that should be skipped by the parser, so it is not included below
private static final Object[][] testFile1Data = {
{ "Now", "is", "the", "time" },
{ "for", "all", "good", "men" },
{ "to", "come", "to", "the" },
{ "aid", "of", "their", "country." },
{ 15.0d, 23, 55, 67.88888}
};
@Test(dataProvider = "basicInputParserData")
public void testTextFileParser(Object fileOrStream) throws IOException {
FormatUtil format = new FormatUtil();
List<String> expected = new ArrayList<String>();
if (fileOrStream instanceof File) {
BufferedReader reader = new BufferedReader(new FileReader((File)fileOrStream));
String line = null;
while ((line = reader.readLine()) != null) {
if (!line.startsWith("#")) expected.add(line);
}
reader.close();
}
BasicInputParser parser = fileOrStream instanceof File
? new BasicInputParser(true, (File)fileOrStream )
: new BasicInputParser(true, (InputStream)fileOrStream);
int index = 0;
while (parser.hasNext())
{
String parts[] = parser.next();
if (fileOrStream instanceof File) {
// Can't have the parser and the reader workking with an InputStream at the same time
// so we only do this test with the file
Assert.assertEquals(parser.getCurrentLine(), expected.get(index));
}
// Line 4 is a comment, so there's a gap in the line numbers
Assert.assertEquals(parser.getCurrentLineNumber(), index <= 2 ? index+1 : index+2);
Assert.assertEquals(parts.length, 4);
if (index < 4) {
for (int i = 0; i < parts.length; i++) {
Assert.assertEquals(parts, testFile1Data[index]);
}
}
else {
Assert.assertEquals(testFile1Data[index][0], format.parseDouble(parts[0]));
Assert.assertEquals(testFile1Data[index][1], format.parseInt(parts[1]));
Assert.assertEquals(testFile1Data[index][2], format.parseInt(parts[2]));
Assert.assertEquals(testFile1Data[index][3], format.parseDouble(parts[3]));
}
index++;
}
}
@DataProvider(name = "basicInputParserData")
private Object[][] getBasicInputParserData()
{
return new Object[][] {
{new File(testFile1)},
{IOUtil.openFileForReading(new File(testFile1))}
};
}
@Test(dataProvider = "multiFileParsingData")
public void testMultiFileParsing(Object fileOrStream1, Object fileOrStream2) throws IOException {
FormatUtil format = new FormatUtil();
List<String> expected = new ArrayList<String>();
if (fileOrStream1 instanceof File) {
BufferedReader reader = new BufferedReader(new FileReader((File)fileOrStream1));
String line = null;
while ((line = reader.readLine()) != null) {
if (!line.startsWith("#")) expected.add(line);
}
reader.close();
reader = new BufferedReader(new FileReader((File)fileOrStream2));
while ((line = reader.readLine()) != null) {
if (!line.startsWith("#")) expected.add(line);
}
reader.close();
}
BasicInputParser parser = fileOrStream1 instanceof File
? new BasicInputParser(true, (File)fileOrStream1, (File)fileOrStream2 )
: new BasicInputParser(true, (InputStream)fileOrStream1, (InputStream)fileOrStream2);
int index = 0;
// Line 4 is a comment, so there's a gap in the line numbers
int expectedLineNumbers[] = {1,2,3,5,6,1,2,3,5,6};
while (parser.hasNext())
{
String parts[] = parser.next();
if (fileOrStream1 instanceof File) {
// Can't have the parser and the reader working with an InputStream at the same time
// so we only test the files
Assert.assertEquals(parser.getCurrentLine(), expected.get(index));
}
Assert.assertEquals(parser.getCurrentLineNumber(), expectedLineNumbers[index]);
Assert.assertEquals(parts.length, 4);
int indexIntoTestData = (index<5) ? index : index-5;
if (index != 4 && index != 9) {
for (int i = 0; i < parts.length; i++) {
Assert.assertEquals(parts, testFile1Data[indexIntoTestData]);
}
}
else {
Assert.assertEquals(testFile1Data[indexIntoTestData][0], format.parseDouble(parts[0]));
Assert.assertEquals(testFile1Data[indexIntoTestData][1], format.parseInt(parts[1]));
Assert.assertEquals(testFile1Data[indexIntoTestData][2], format.parseInt(parts[2]));
Assert.assertEquals(testFile1Data[indexIntoTestData][3], format.parseDouble(parts[3]));
}
index++;
}
}
@DataProvider(name = "multiFileParsingData")
private Object[][] getMultiFileParsingData()
{
return new Object[][] {
{new File(testFile1), new File(testFile1)},
{IOUtil.openFileForReading(new File(testFile1)), IOUtil.openFileForReading(new File(testFile1))}
};
}
@Test(dataProvider = "noGroupingData")
public void testTextFileParserNoGrouping(Object fileOrStream) {
BasicInputParser parser = fileOrStream instanceof File
? new BasicInputParser(true, (File)fileOrStream)
: new BasicInputParser(true, (InputStream)fileOrStream);
parser.setTreatGroupedDelimitersAsOne(false);
while (parser.hasNext()) {
String parts[] = parser.next();
for (int i = 0; i < parts.length; i++) {
if (parts[i] != null) {
Assert.assertEquals(Integer.parseInt(parts[i]), i+1);
}
}
}
}
@DataProvider(name = "noGroupingData")
private Object[][] getNoGroupingData()
{
return new Object[][] {
{new File(testFile3)},
{IOUtil.openFileForReading(new File(testFile3))}
};
}
@Test(dataProvider = "leadingWhiteSpaceData")
public void testTextFileParserLeadingWhitespace(Object fileOrStream) {
BasicInputParser parser = fileOrStream instanceof File
? new BasicInputParser(true, (File)fileOrStream)
: new BasicInputParser(true, (InputStream)fileOrStream);
while (parser.hasNext())
{
String parts[] = parser.next();
Assert.assertEquals(parts.length, 1);
Assert.assertEquals("1", parts[0]);
}
}
@DataProvider(name = "leadingWhiteSpaceData")
private Object[][] getLeadingWhiteSpaceData()
{
return new Object[][] {
{new File(testFile2)},
{IOUtil.openFileForReading(new File(testFile2))}
};
}
@Test(expectedExceptions= PicardException.class, dataProvider = "tooManyWordsData")
public void testTooManyWords(Object fileOrStream) {
BasicInputParser parser = fileOrStream instanceof File
? new BasicInputParser(true, 3, (File)fileOrStream)
: new BasicInputParser(true, 3, (InputStream)fileOrStream);
if (parser.hasNext()) {
String parts[] = parser.next();
}
Assert.fail("Attempt to parse extra-long file should have failed but didn't.");
}
@DataProvider(name = "tooManyWordsData")
private Object[][] getTooManyWordsData()
{
return new Object[][] {
{new File(testFile1)},
{IOUtil.openFileForReading(new File(testFile1))}
};
}
@Test(dataProvider = "tabbedData")
public void testTabbedFileParser(Object fileOrStream) {
TabbedInputParser parser = fileOrStream instanceof File
? new TabbedInputParser(false, (File)fileOrStream)
: new TabbedInputParser(false, (InputStream)fileOrStream);
while (parser.hasNext()) {
String parts[] = parser.next();
for (int i = 0; i < parts.length; i++) {
if (parts[i] != null && !parts[i].equals("")) {
Assert.assertEquals(parts[i].trim(), String.valueOf(i+1));
}
}
}
}
@DataProvider(name = "tabbedData")
private Object[][] getTabbedData()
{
return new Object[][] {
{new File(testFile4)},
{IOUtil.openFileForReading(new File(testFile4))}
};
}
@Test(dataProvider="data")
public void testWordCountCalculation(String line, boolean groupDelimiters, String name) {
WordCountTestParser parser = new WordCountTestParser();
parser.setDelimiter("\t ");
parser.setTreatGroupedDelimitersAsOne(groupDelimiters);
parser.calculateWordCount(line.getBytes());
Assert.assertEquals(parser.getWordCount(), 3, name);
}
@DataProvider(name = "data")
private Object[][] getWordCountCalculationData()
{
return new Object[][]{
{"1\t2\t3", false, "Tabs with all fields filled."},
{"1\t2\t", false, "Tabs with no final field."},
{"\t2\t3", false, "Tabs with no first field."},
{"\t2\t", false, "Tabs with no first or final field."},
{"1 2 3", true, "Spaces with all fields filled (grouping on)."},
{"1 2 3 ", true, "Spaces with no final field (grouping on)."},
{" 2 3 4", true, "Spaces with no first field (grouping on)."},
{" 2 ", false, "Spaces with no first or final field."}
};
}
/**
* Toy class for testing the word count functionality
*/
private static class WordCountTestParser extends AbstractInputParser {
private char delimiters[] = null;
public WordCountTestParser() {
}
public void setDelimiter(String delim) {
delimiters = delim.toCharArray();
}
protected boolean isDelimiter(final byte b) {
for (int i = 0; i < delimiters.length; i++) {
if (b == delimiters[i]) {
return true;
}
}
return false;
}
protected byte[] readNextLine() { return new byte[0]; }
public String getFileName() { return null; }
public void close() {}
}
}