Package stallone.io

Source Code of stallone.io.CachedAsciiFileReader

package stallone.io;

import stallone.doubles.fastutils.LongArrayList;

import java.io.FileNotFoundException;
import java.io.IOException;

import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.regex.Pattern;
import stallone.io.CachedRandomAccessFile;

/**
* Fast reader for asci text files which contain text data in a line oriented way.
*
* @author  Martin Senne
*/
public class CachedAsciiFileReader
{
    /** */
    public static Pattern commentLine = Pattern.compile("[;#].*|\\s*");
    /** */
    public static Pattern whiteSpacePattern = Pattern.compile("\\s+");
    /** Logger. */
    private static final Logger log = Logger.getLogger(CachedAsciiFileReader.class.getName());
    /** Filename of the file we are working with. */
    protected String filename;
    /** scanned? */
    private boolean scanned = false;
    /** For fast and efficient file access. */
    private CachedRandomAccessFile randomAccessFile;
    /** Number of lines this file has, which do not match commentLine pattern. */
    private int relevantLines;
    /** Byte offsets of line starts in file. */
    private long[] lineStartOffsets;
    /** Pattern, which actually is used to determine comment lines */
    private Pattern usedCommentPattern;

    public CachedAsciiFileReader()
    {}

    /**
     * Fast reader for asci files.
     *
     * @param   filename  is the file to open for reading.
     *
     * @throws  FileNotFoundException
     * @throws  IOException
     */
    public CachedAsciiFileReader(final String filename) throws FileNotFoundException, IOException
    {
        this(filename, null);
    }

    /**
     * Fast reader for asci files. Lines matching the commentPattern are automatically discarded.
     *
     * @param   filename        is the file to open for reading.
     * @param   commentPattern  lines to automatically discard
     *
     * @throws  FileNotFoundException
     * @throws  IOException
     */
    public CachedAsciiFileReader(final String filename, final Pattern commentPattern) throws FileNotFoundException,
            IOException
    {
        this.filename = filename;
        this.usedCommentPattern = commentPattern;

        randomAccessFile = new CachedRandomAccessFile(filename);

        this.lineStartOffsets = null;
    }

    public void setFilename(final String _filename)
    {
        this.filename = _filename;
    }

    public void setCommentPattern(final Pattern _commentPattern)
    {
        this.usedCommentPattern = _commentPattern;
    }

    public void open()
            throws IOException
    {
        if (randomAccessFile != null)
            randomAccessFile.close();
        randomAccessFile = new CachedRandomAccessFile(filename);
        scanned = false;
    }

    /**
     * Determine byte offset of each line in the file.
     * Calls {@link #evaluateLineWhileScanning(java.lang.String, int) } which can be overriden in subclasses
     *   as a quasi-callback.
     *
     * This method MUST be called before invoking {@link #getLine(int) } or {@link #getNumberOfLines() }
     *
     */
    public void scan()
            throws IOException
    {
        if (scanned)
            return;
       
        final int INITIAL_SIZE = 1000000;

        final LongArrayList lineOffsets = new LongArrayList(INITIAL_SIZE);

            long currentPos = 0;
            int currentLineNumber = 0;
            long oldPos;
            String textline;

            while ((textline = randomAccessFile.readLine()) != null)
            {
                // save old position
                oldPos = currentPos;

                // if commentPattern active, then read away comment lines
                if (usedCommentPattern != null)
                {
//                    while (usedCommentPattern.matcher(textline).matches()) {
//                        textline = randomAccessFile.readLine(); // skip line
//                    }
                    if (usedCommentPattern.matcher(textline).matches())
                    {
                        continue;
                    }
                }

                scanLine(textline, currentLineNumber);

                currentLineNumber++;
                currentPos = randomAccessFile.getFilePointer();
                lineOffsets.add(oldPos);
               
                // garbage collection is one solution
                // if ( (currentLineNumber % 1000000) == 0) {
                //    System.out.println("Scanned up to line: " + currentLineNumber);
                //    System.gc();
                // }
            }
            scanEnd(currentLineNumber);

            lineOffsets.trim();

        // determine line offsets
        lineStartOffsets = lineOffsets.toLongArray();
        relevantLines = lineStartOffsets.length;
       
        scanned = true;
    }

    /**
     * Determine number of lines this file has.
     * Lines matching the comment pattern do not count as lines.
     *
     * @see #scan()
     * @return
     */
    public int getNumberOfLines()
    {
        return relevantLines;
    }


    /**
     * Get line with line number <code>lineNumber</code>.
     *
     * @param lineNumber  to get
     * @return String of requested line.
     *
     * @see #scan()
     */
    public String getLine(final int lineNumber)
    {
        if (lineStartOffsets == null)
        {
            throw new RuntimeException("No line offsets available, you need to call scan() first.");
        }

        try
        {
            if ((0 <= lineNumber) && (lineNumber < relevantLines))
            {
                randomAccessFile.seek(lineStartOffsets[lineNumber]);
                String line = randomAccessFile.readLine();
                return line;
            }
            else
            {
                throw new IllegalArgumentException("Requested line " + lineNumber + " is out of scope.");
            }
        } catch (IOException ex)
        {
            Logger.getLogger(CachedAsciiFileReader.class.getName()).log(Level.SEVERE, "I/O error while requesting line "
                    + lineNumber + " of file '" + filename + "'.", ex);
            throw new RuntimeException("I/O error while requesting line "
                    + lineNumber + " of file '" + filename + "'.");
        }
    }

    public void close() throws IOException
    {
        randomAccessFile.close();
    }

    /**
     * Passes the line to the analyzer while scanning.
     * @param textline
     * @param currentLineNumber
     * @return true if the line is accepted, false if rejected.
     */
    protected boolean scanLine(String textline, int currentLineNumber)
    {
        return true;
    }

    protected void scanEnd(int currentLineNumber)
    {
        // do nothing
    }
}
TOP

Related Classes of stallone.io.CachedAsciiFileReader

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.