Package picard.util

Source Code of picard.util.AbstractInputParser

/*
* The MIT License
*
* Copyright (c) 2011 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
package picard.util;

import htsjdk.samtools.util.AbstractIterator;
import htsjdk.samtools.util.CloseableIterator;
import picard.PicardException;

import java.util.Iterator;

/**
* Class for parsing text files where each line consists of fields separated by whitespace.
* Code is abstracted into this class so that we can optimize its performance over time.
*
* This class assumes that every line will have the same number of whitespace-separated "words"
* and that lines that start with "#" are comments and should be ignored.
*
* Classes that extend this parser can do so simply by implementing their own constructors and the
* readNextLine(), close(), and getFileName() methods.
*
* @author Kathleen Tibbetts
*/
public abstract class AbstractInputParser
extends AbstractIterator<String[]>
implements Iterable<String[]>, CloseableIterator<String[]> {

    private boolean treatGroupedDelimitersAsOne = true; // Whether multiple delimiters in succession should be treated as one
    private int wordCount = 0;      /* The number of delimiter-separated "words" per line of the file.
                                       We can save a little caclulation, or handle files with varying numbers of
                                       words per line, by specifying this if known in advance */
    private boolean skipBlankLines = true;

    /**
     * Closes this stream and releases any system resources associated with it.
     */
    public abstract void close();

    /**
     * @return the next line of text from the underlying stream(s) or null if there is no next line
     */
    protected abstract byte[] readNextLine();

    /**
     * @return  the name(s) of the file(s) being parsed, or null if no name is available
     */
    public abstract String getFileName();

    /**
     * @return an iterator over a set of elements of type String[]
     */
    public Iterator<String[]> iterator() {
        if (isIterating()) {
            throw new IllegalStateException("iterator() method can only be called once, before the" +
                    "first call to hasNext()");
        }
        hasNext();
        return this;
    }

    @Override
    protected String[] advance() {
        byte[] nextLine;
        do {
            nextLine = readNextLine();
        }
        while (nextLine != null && ((this.skipBlankLines && isBlank(nextLine)) || isComment(nextLine)));
        return nextLine == null ? null : parseLine(nextLine);
    }

    /**
     * This method represents the most efficient way (so far) to parse a line of whitespace-delimited text
     *
     * @param line the line to parse
     * @return  an array of all the "words"
     */
    private String[] parseLine(final byte[] line) {

        if (getWordCount() == 0) {
            calculateWordCount(line);
        }
        final String[] parts = new String[getWordCount()];
        boolean delimiter = true;
        int index=0;
        int start = 0;

        try
        {
            for (int i = 0; i < line.length; i++) {
                if (isDelimiter(line[i])) {
                    if (!delimiter) {
                        parts[index++] = new String(line,start,i-start);
                    }
                    else if(!isTreatGroupedDelimitersAsOne()) {
                        parts[index++] = null;
                    }
                    delimiter=true;
                }
                else {
                    if (delimiterstart = i;
                    delimiter = false;
                }
            }
            if (!delimiter) {
                 parts[index] = new String(line,start,line.length-start);
            }
        }
        catch (ArrayIndexOutOfBoundsException e) {
            throw new PicardException("Unexpected number of elements found when parsing file " +
                    this.getFileName() + ": " + index + ".  Expected a maximum of " +
                    this.getWordCount() + " elements per line:" + new String(line,0,line.length), e);
        }
        return parts;
    }

    /**
     * Calculates the number of delimiter-separated "words" in a line and sets the value of <code>wordCount</code>
     *
     * @param line  representative line from the file
     */
    protected void calculateWordCount(final byte[] line) {
        int words = 0;
        boolean delimiter = true;
        for (final byte b : line) {
            if (isDelimiter(b)) {
                if (delimiter && !isTreatGroupedDelimitersAsOne()) words++;
                delimiter = true;
            } else {
                if (delimiter) words++;
                delimiter = false;
            }
        }
        if (delimiter && !isTreatGroupedDelimitersAsOne()) {
            words += 1;
        }
        setWordCount(words);
    }

    /**
     * Determines whether a given line is a comment
     *
     * @param line  the line to evaluate
     * @return  true if the line is a comment (and should be ignored) otherwise false
     */
    protected boolean isComment(final byte[] line) {
        return line.length > 0 && line[0] == '#';
    }

    /**
     * Determines whether a given line is a comment
     *
     * @param line  the line to evaluate
     * @return  true if the line is a comment (and should be ignored) otherwise false
     */
    protected boolean isBlank(final byte[] line) {
        return line.length == 0;
    }

    /**
     * Determines whether a given character is a delimiter
     *
     * @param b the character to evaluate
     * @return  true if <code>b</code> is a delimiter; otherwise false
     */
    protected boolean isDelimiter(final byte b) {
        return b == ' ' || b == '\t';
    }

    protected int getWordCount() { return wordCount; }
    protected void setWordCount(final int wordCount) { this.wordCount = wordCount; }
    protected boolean isTreatGroupedDelimitersAsOne() { return treatGroupedDelimitersAsOne; }
    protected void setTreatGroupedDelimitersAsOne(final boolean treatGroupedDelimitersAsOne) {
        this.treatGroupedDelimitersAsOne = treatGroupedDelimitersAsOne;
    }
    protected boolean isSkipBlankLines() { return this.skipBlankLines; }
    protected void setSkipBlankLines(final boolean skipBlankLines) {
        this.skipBlankLines = skipBlankLines;
    }
}
TOP

Related Classes of picard.util.AbstractInputParser

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.