Package picard.util

Source Code of picard.util.TextFileParsersTest

/*
* The MIT License
*
* Copyright (c) 2011 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
package picard.util;

import htsjdk.samtools.util.FormatUtil;
import htsjdk.samtools.util.IOUtil;
import org.testng.Assert;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import picard.PicardException;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.List;

public class TextFileParsersTest {

    private static final String testFile1 = "testdata/picard/util/whitespace_text_file.txt";
    private static final String testFile2 = "testdata/picard/util/all_ones_text_file.txt";
    private static final String testFile3 = "testdata/picard/util/no_grouping_file.txt";
    private static final String testFile4 = "testdata/picard/util/tabbed_text_file.txt";
    // There is a comment in the file data that should be skipped by the parser, so it is not included below
    private static final Object[][] testFile1Data = {
        { "Now", "is", "the", "time" },
        { "for", "all", "good", "men" },
        { "to", "come", "to", "the" },
        { "aid", "of", "their", "country." },
        { 15.0d, 23, 55, 67.88888}
    };

    @Test(dataProvider = "basicInputParserData")
    public void testTextFileParser(Object fileOrStream) throws IOException {
        FormatUtil format = new FormatUtil();

        List<String> expected = new ArrayList<String>();
        if (fileOrStream instanceof File) {
            BufferedReader reader = new BufferedReader(new FileReader((File)fileOrStream));
            String line = null;
            while ((line = reader.readLine()) != null)  {
                if (!line.startsWith("#")) expected.add(line);
            }
            reader.close();
        }

        BasicInputParser parser = fileOrStream instanceof File
            ? new BasicInputParser(true, (File)fileOrStream )
            : new BasicInputParser(true, (InputStream)fileOrStream);
        int index = 0;
        while (parser.hasNext())
        {
            String parts[] = parser.next();
            if (fileOrStream instanceof File) {
                // Can't have the parser and the reader workking with an InputStream at the same time
                // so we only do this test with the file
                Assert.assertEquals(parser.getCurrentLine(), expected.get(index));
            }
            // Line 4 is a comment, so there's a gap in the line numbers
            Assert.assertEquals(parser.getCurrentLineNumber(), index <= 2 ? index+1 : index+2);
            Assert.assertEquals(parts.length, 4);
            if (index < 4) {
                for (int i = 0; i < parts.length; i++) {
                    Assert.assertEquals(parts, testFile1Data[index]);
                }
            }
            else {
                Assert.assertEquals(testFile1Data[index][0], format.parseDouble(parts[0]));
                Assert.assertEquals(testFile1Data[index][1], format.parseInt(parts[1]));
                Assert.assertEquals(testFile1Data[index][2], format.parseInt(parts[2]));
                Assert.assertEquals(testFile1Data[index][3], format.parseDouble(parts[3]));
            }
            index++;
        }
    }

    @DataProvider(name = "basicInputParserData")
    private Object[][] getBasicInputParserData()
    {
        return new Object[][] {
                {new File(testFile1)},
                {IOUtil.openFileForReading(new File(testFile1))}
        };
    }

    @Test(dataProvider = "multiFileParsingData")
    public void testMultiFileParsing(Object fileOrStream1, Object fileOrStream2) throws IOException {
        FormatUtil format = new FormatUtil();

        List<String> expected = new ArrayList<String>();
        if (fileOrStream1 instanceof File) {
            BufferedReader reader = new BufferedReader(new FileReader((File)fileOrStream1));
            String line = null;
            while ((line = reader.readLine()) != null)  {
                if (!line.startsWith("#")) expected.add(line);
            }
            reader.close();
            reader = new BufferedReader(new FileReader((File)fileOrStream2));
            while ((line = reader.readLine()) != null)  {
                if (!line.startsWith("#")) expected.add(line);
            }
            reader.close();
        }

        BasicInputParser parser = fileOrStream1 instanceof File
            ? new BasicInputParser(true, (File)fileOrStream1, (File)fileOrStream2 )
            : new BasicInputParser(true, (InputStream)fileOrStream1, (InputStream)fileOrStream2);
        int index = 0;
        // Line 4 is a comment, so there's a gap in the line numbers
        int expectedLineNumbers[] = {1,2,3,5,6,1,2,3,5,6};
        while (parser.hasNext())
        {
            String parts[] = parser.next();
            if (fileOrStream1 instanceof File) {
                // Can't have the parser and the reader working with an InputStream at the same time
                // so we only test the files
                Assert.assertEquals(parser.getCurrentLine(), expected.get(index));
            }
            Assert.assertEquals(parser.getCurrentLineNumber(), expectedLineNumbers[index]);
            Assert.assertEquals(parts.length, 4);
            int indexIntoTestData = (index<5) ? index : index-5;
            if (index != 4 && index != 9) {
                for (int i = 0; i < parts.length; i++) {
                    Assert.assertEquals(parts, testFile1Data[indexIntoTestData]);
                }
            }
            else {
                Assert.assertEquals(testFile1Data[indexIntoTestData][0], format.parseDouble(parts[0]));
                Assert.assertEquals(testFile1Data[indexIntoTestData][1], format.parseInt(parts[1]));
                Assert.assertEquals(testFile1Data[indexIntoTestData][2], format.parseInt(parts[2]));
                Assert.assertEquals(testFile1Data[indexIntoTestData][3], format.parseDouble(parts[3]));
            }
            index++;
        }
    }

    @DataProvider(name = "multiFileParsingData")
    private Object[][] getMultiFileParsingData()
    {
        return new Object[][] {
                {new File(testFile1), new File(testFile1)},
                {IOUtil.openFileForReading(new File(testFile1)), IOUtil.openFileForReading(new File(testFile1))}
        };
    }

    @Test(dataProvider = "noGroupingData")
    public void testTextFileParserNoGrouping(Object fileOrStream) {
        BasicInputParser parser = fileOrStream instanceof File
            ? new BasicInputParser(true, (File)fileOrStream)
            : new BasicInputParser(true, (InputStream)fileOrStream);
        parser.setTreatGroupedDelimitersAsOne(false);
        while (parser.hasNext()) {
            String parts[] = parser.next();
            for (int i = 0; i < parts.length; i++) {
                if (parts[i] != null) {
                    Assert.assertEquals(Integer.parseInt(parts[i]), i+1);
                }
            }
        }
    }

    @DataProvider(name = "noGroupingData")
    private Object[][] getNoGroupingData()
    {
        return new Object[][] {
                {new File(testFile3)},
                {IOUtil.openFileForReading(new File(testFile3))}
        };
    }


    @Test(dataProvider = "leadingWhiteSpaceData")
    public void testTextFileParserLeadingWhitespace(Object fileOrStream) {
        BasicInputParser parser = fileOrStream instanceof File
            ? new BasicInputParser(true, (File)fileOrStream)
            : new BasicInputParser(true, (InputStream)fileOrStream);
        while (parser.hasNext())
        {
            String parts[] = parser.next();
            Assert.assertEquals(parts.length, 1);
            Assert.assertEquals("1", parts[0]);
        }
    }

    @DataProvider(name = "leadingWhiteSpaceData")
    private Object[][] getLeadingWhiteSpaceData()
    {
        return new Object[][] {
                {new File(testFile2)},
                {IOUtil.openFileForReading(new File(testFile2))}
        };
    }


    @Test(expectedExceptions= PicardException.class, dataProvider = "tooManyWordsData")
    public void testTooManyWords(Object fileOrStream) {
        BasicInputParser parser = fileOrStream instanceof File
            ? new BasicInputParser(true, 3, (File)fileOrStream)
            : new BasicInputParser(true, 3, (InputStream)fileOrStream);
        if (parser.hasNext()) {
            String parts[] = parser.next();
        }
        Assert.fail("Attempt to parse extra-long file should have failed but didn't.");
    }

    @DataProvider(name = "tooManyWordsData")
    private Object[][] getTooManyWordsData()
    {
        return new Object[][] {
                {new File(testFile1)},
                {IOUtil.openFileForReading(new File(testFile1))}
        };
    }

    @Test(dataProvider = "tabbedData")
    public void testTabbedFileParser(Object fileOrStream) {
        TabbedInputParser parser = fileOrStream instanceof File
            ? new TabbedInputParser(false, (File)fileOrStream)
            : new TabbedInputParser(false, (InputStream)fileOrStream);
        while (parser.hasNext()) {
            String parts[] = parser.next();
            for (int i = 0; i < parts.length; i++) {
                if (parts[i] != null && !parts[i].equals("")) {
                    Assert.assertEquals(parts[i].trim(), String.valueOf(i+1));
                }
            }
        }
    }

    @DataProvider(name = "tabbedData")
    private Object[][] getTabbedData()
    {
        return new Object[][] {
                {new File(testFile4)},
                {IOUtil.openFileForReading(new File(testFile4))}
        };
    }

    @Test(dataProvider="data")
    public void testWordCountCalculation(String line, boolean groupDelimiters, String name) {

        WordCountTestParser parser = new WordCountTestParser();
        parser.setDelimiter("\t ");
        parser.setTreatGroupedDelimitersAsOne(groupDelimiters);
        parser.calculateWordCount(line.getBytes());
        Assert.assertEquals(parser.getWordCount(), 3, name);
    }

    @DataProvider(name = "data")
    private Object[][] getWordCountCalculationData()
    {
        return new Object[][]{
                {"1\t2\t3", false, "Tabs with all fields filled."},
                {"1\t2\t", false, "Tabs with no final field."},
                {"\t2\t3", false, "Tabs with no first field."},
                {"\t2\t", false, "Tabs with no first or final field."},
                {"1  2  3", true, "Spaces with all fields filled  (grouping on)."},
                {"1  2  3  ", true, "Spaces with no final field (grouping on)."},
                {"   2   3   4", true, "Spaces with no first field (grouping on)."},
                {" 2 ", false, "Spaces with no first or final field."}
        };
    }

    /**
     * Toy class for testing the word count functionality
     */
    private static class WordCountTestParser extends AbstractInputParser {

        private char delimiters[] = null;
       
        public WordCountTestParser() {
        }

        public void setDelimiter(String delim) {
            delimiters = delim.toCharArray();
        }

        protected boolean isDelimiter(final byte b) {
            for (int i = 0; i < delimiters.length; i++) {
                if (b == delimiters[i]) {
                    return true;
                }
            }
            return false;
        }

        protected byte[] readNextLine() {  return new byte[0]; }
        public String getFileName() { return null; }
        public void close() {}
    }
}
TOP

Related Classes of picard.util.TextFileParsersTest

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.