Package picard.illumina.parser

Source Code of picard.illumina.parser.ParameterizedFileUtil

package picard.illumina.parser;

import htsjdk.samtools.util.IOUtil;
import picard.PicardException;
import picard.illumina.parser.fakers.FileFaker;

import java.io.File;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public abstract class ParameterizedFileUtil {
    public static final String PER_TILE_PATTERN_STRING = "s_(\\d+)_(\\d{1,5})";
    /**
     * The file extension for this class, file extension does not have the standard meaning
     * in this instance.  It means, all the characters that come after the identifying portion of
     * the file (after lane, tile, and end that is).  So _qseq.txt and .filter are both file extensions
     */
    protected final String extension;

    /**
     * A pattern that will match files of this type for this lane
     */
    protected Pattern matchPattern;

    protected final int lane;
    protected List<Integer> tiles;
    /**
     * If you think of the file system as a tree, this is the deepest directory(node) on the tree that
     * still contains all of the files for this given type (e.g. If we're talking about BCLs the directory
     * structure is:
     * <p/>
     * BaseCall Dir
     * |
     * L001
     * |     |        |
     * C1.1 C2.1 ... Cn.1
     * |     |        |
     * bcl Files ... bclFiles
     * <p/>
     * L001 is the base because it contains every BCL file in the run (though those files are nested in
     * other folders).
     */
    protected final File base;
    protected final FileFaker faker;

    public ParameterizedFileUtil(final boolean laneTileRegex, final String extension, final File base,
                                 final FileFaker faker, final int lane) {
        this(extension, base, faker, lane);
        if (laneTileRegex) {
            matchPattern = Pattern.compile(escapePeriods(makeLaneTileRegex(processTxtExtension(extension), lane)));
        } else {
            matchPattern = Pattern.compile(escapePeriods(makeLaneRegex(extension, lane)));
        }
    }

    public ParameterizedFileUtil(final String pattern, final String extension, final File base, final FileFaker faker,
                                 final int lane) {
        this(extension, base, faker, lane);
        this.matchPattern = Pattern.compile(pattern);
    }

    private ParameterizedFileUtil(final String extension, final File base, final FileFaker faker,
                                  final int lane) {
        this.faker = faker;
        this.extension = extension;
        this.base = base;
        this.lane = lane;
    }

    /**
     * Determine whether or not files are available
     *
     * @return return true if files are found matching this types pattern, false otherwise
     */
    public abstract boolean filesAvailable();

    /**
     * Return a list of all tiles available for this file format and run
     *
     * @return A List of tile integers
     */
    public List<Integer> getTiles() {
        return tiles;
    }

    /**
     * Given the expected tiles/expected cycles for this file type, return a list of error messages describing any
     * missing/or malformed files
     *
     * @param expectedTiles  An ordered list of tile numbers
     * @param expectedCycles An ordered list of cycle numbers that may contain gaps
     * @return A list of error messages for this format
     */
    public abstract List<String> verify(List<Integer> expectedTiles, int[] expectedCycles);

    /**
     * Given the expected tiles/expected cycles for this file type create a set of fake files such that the
     * verification criteria are met.
     *
     * @param expectedTiles An ordered list of tile numbers
     * @param cycles        An ordered list of cycle numbers that may contain gaps
     * @param format        The format of the files that are to be faked
     * @return A list of error messages for this format
     */
    public abstract List<String> fakeFiles(List<Integer> expectedTiles, int[] cycles,
                                           IlluminaFileUtil.SupportedIlluminaFormat format);

    /**
     * Returns only lane and tile information as PerTileFt's do not have End information.
     *
     * @param fileName Filename to analyze for data
     * @return A LaneTile object with the discovered Lane and Tile information and a null end field.
     */
    protected Integer fileToTile(final String fileName) {
        final Matcher matcher = matchPattern.matcher(fileName);
        if (!matcher.matches()) {
            return null;
        }
        return Integer.parseInt(matcher.group(1));
    }

    /**
     * Return a regex string for finding Lane and Tile given a file extension pattern
     */
    public static String makeLaneTileRegex(final String fileNameEndPattern, final int lane) {
        if (lane < 0) {
            throw new PicardException("Lane (" + lane + ") cannot be negative");
        }
        return "^" + "s_" + lane + "_(\\d{1,5})" + fileNameEndPattern + "$";
    }

    private String makeLaneRegex(final String fileNameEndPattern, final int lane) {
        return "^s_" + lane + fileNameEndPattern + "$";
    }

    /**
     * The period separator is expected in the file extension, since some do not start with it
     */
    private String escapePeriods(final String preEscaped) {
        return preEscaped
                .replaceAll("\\.", "\\."); //In the first one the \\ is inside a regex in the second it's NOT
    }

    /**
     * For filename patterns that end with .txt tack on the option .gz extension
     */
    private String processTxtExtension(final String fileNameEndPattern) {
        if (fileNameEndPattern.endsWith(".txt")) {
            return fileNameEndPattern + "(\\.gz|\\.bz2)?";
        } else {
            return fileNameEndPattern;
        }
    }

    /**
     * Return all files that match pattern of the given file type in the given base directory
     */
    protected IlluminaFileMap getTiledFiles(final File baseDirectory, final Pattern pattern) {
        final IlluminaFileMap fileMap = new IlluminaFileMap();
        if (baseDirectory.exists()) {
            IOUtil.assertDirectoryIsReadable(baseDirectory);
            final File[] files = IOUtil.getFilesMatchingRegexp(baseDirectory, pattern);
            for (final File file : files) {
                if (file.length() > 0) {
                    fileMap.put(fileToTile(file.getName()), file);
                }
            }
        }

        return fileMap;
    }

}
TOP

Related Classes of picard.illumina.parser.ParameterizedFileUtil

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.