Package org.haystack.io

Source Code of org.haystack.io.HZincReader

//
// Copyright (c) 2011, Brian Frank
// Licensed under the Academic Free License version 3.0
//
// History:
//   07 Jun 2011  Brian Frank  Creation
//   24 Sep 2012  Brian Frank  Repurpose HReader
//
package org.haystack.io;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.List;

import org.haystack.HDict;
import org.haystack.HDictBuilder;
import org.haystack.HFilter;
import org.haystack.HGrid;
import org.haystack.HGridBuilder;
import org.haystack.HTimeZone;
import org.haystack.ParseException;
import org.haystack.tagval.HBin;
import org.haystack.tagval.HBool;
import org.haystack.tagval.HCoord;
import org.haystack.tagval.HDate;
import org.haystack.tagval.HDateTime;
import org.haystack.tagval.HMarker;
import org.haystack.tagval.HNum;
import org.haystack.tagval.HRef;
import org.haystack.tagval.HStr;
import org.haystack.tagval.HTime;
import org.haystack.tagval.HUri;
import org.haystack.tagval.HVal;

/**
* HZincReader reads grids using the Zinc format.
*
* @see <a href='http://project-haystack.org/doc/Zinc'>Project Haystack</a>
*/
public class HZincReader extends HGridReader {
    public int getVersion() {
        return version;
    }

    //////////////////////////////////////////////////////////////////////////
    // Construction
    //////////////////////////////////////////////////////////////////////////

    /** Read from UTF-8 input stream. */
    public HZincReader(InputStream in) {
        try {
            this.in = new BufferedReader(new InputStreamReader(in, "UTF-8"));
            init();
        }
        catch (IOException e) {
            throw err(e);
        }
    }

    /** Read from in-memory string. */
    public HZincReader(String in) {
        this.in = new StringReader(in);
        init();
    }

    private void init() {
        consume();
        consume();
    }

    //////////////////////////////////////////////////////////////////////////
    // HGridReader
    //////////////////////////////////////////////////////////////////////////

    /** Read grid from the stream. */
    @Override
    public HGrid readGrid() {
        HGridBuilder b = new HGridBuilder();

        // meta line
        readVer();
        readMeta(b.meta());
        consumeNewline();

        // read cols
        int numCols = 0;
        while (true) {
            String name = readId();
            skipSpace();
            numCols++;
            readMeta(b.addCol(name));
            if (cur != ',')
                break;
            consume();
            skipSpace();
        }
        consumeNewline();

        // rows
        while (cur != '\n' && cur > 0) {
            HVal[] cells = new HVal[numCols];
            for (int i = 0; i < numCols; ++i) {
                skipSpace();
                if (cur != ',' && cur != '\n')
                    cells[i] = readVal();
                skipSpace();
                if (i + 1 < numCols) {
                    if (cur != ',')
                        throw errChar("Expecting comma in row");
                    consume();
                }
            }
            consumeNewline();
            b.addRow(cells);
        }
        if (cur == '\n')
            consumeNewline();

        return b.toGrid();
    }

    /** Read list of grids from the stream. */
    public HGrid[] readGrids() {
        List<HGrid> acc = new ArrayList<HGrid>();
        while (cur > 0)
            acc.add(readGrid());
        return acc.toArray(new HGrid[acc.size()]);
    }

    /** Close underlying input stream */
    public void close() {
        try {
            in.close();
        }
        catch (IOException e) {
            e.printStackTrace();
        }
    }

    /** Read set of name/value tags as dictionary */
    public HDict readDict() {
        HDictBuilder b = new HDictBuilder();
        readMeta(b);
        if (cur >= 0)
            throw errChar("Expected end of stream");
        return b.toDict();
    }

    //////////////////////////////////////////////////////////////////////////
    // Implementation
    //////////////////////////////////////////////////////////////////////////

    private void readVer() {
        String id = readId();
        if (!id.equals("ver"))
            throw err("Expecting zinc header 'ver:2.0', not '" + id + "'");
        if (cur != ':')
            throw err("Expecting ':' colon");
        consume();
        String ver = readStrLiteral();
        if (ver.equals("2.0"))
            version = 2;
        else
            throw err("Unsupported zinc version: " + ver);
        skipSpace();
    }

    private void readMeta(HDictBuilder b) {
        // parse pairs
        while (isIdStart(cur)) {
            // name
            String name = readId();

            // marker or :val
            HVal val = HMarker.VAL;
            skipSpace();
            if (cur == ':') {
                consume();
                skipSpace();
                val = readVal();
                skipSpace();
            }
            b.add(name, val);
            skipSpace();
        }
    }

    private String readId() {
        if (!isIdStart(cur))
            throw errChar("Invalid name start char");
        StringBuffer s = new StringBuffer();
        while (isId(cur)) {
            s.append((char) cur);
            consume();
        }
        return s.toString();
    }

    //////////////////////////////////////////////////////////////////////////
    // HVals
    //////////////////////////////////////////////////////////////////////////

    /** Read scalar value. */
    public HVal readScalar() {
        HVal val = readVal();
        if (cur >= 0)
            throw errChar("Expected end of stream");
        return val;
    }

    /** Read a single scalar value from the stream. */
    private HVal readVal() {
        if (isDigit(cur))
            return readNumVal();
        if (isAlpha(cur))
            return readWordVal();
        switch (cur) {
        case '@':
            return readRefVal();
        case '"':
            return readStrVal();
        case '`':
            return readUriVal();
        case '-':
            if (peek == 'I')
                return readWordVal();
            return readNumVal();
        default:
            throw errChar("Unexpected char for start of value");
        }
    }

    private HVal readWordVal() {
        // read into string
        StringBuffer s = new StringBuffer();
        do {
            s.append((char) cur);
            consume();
        }
        while (isAlpha(cur));
        String word = s.toString();

        // match identifier
        if (isFilter) {
            if (word.equals("true"))
                return HBool.TRUE;
            if (word.equals("false"))
                return HBool.FALSE;
        }
        else {
            if (word.equals("N"))
                return null;
            if (word.equals("M"))
                return HMarker.VAL;
            if (word.equals("R"))
                return HStr.make("_remove_");
            if (word.equals("T"))
                return HBool.TRUE;
            if (word.equals("F"))
                return HBool.FALSE;
            if (word.equals("Bin"))
                return readBinVal();
            if (word.equals("C"))
                return readCoordVal();
        }
        if (word.equals("NaN"))
            return HNum.NaN;
        if (word.equals("INF"))
            return HNum.POS_INF;
        if (word.equals("-INF"))
            return HNum.NEG_INF;
        throw err("Unknown value identifier: " + word);
    }

    private HVal readBinVal() {
        if (cur < 0)
            throw err("Expected '(' after Bin");
        consume();
        StringBuffer s = new StringBuffer();
        while (cur != ')') {
            if (cur < 0)
                throw err("Unexpected end of bin literal");
            if (cur == '\n' || cur == '\r')
                throw err("Unexpected newline in bin literal");
            s.append((char) cur);
            consume();
        }
        consume();
        return validate(HBin.make(s.toString()));
    }

    private HVal readCoordVal() {
        if (cur < 0)
            throw err("Expected '(' after Coord");
        consume();
        StringBuffer s = new StringBuffer("C(");
        while (cur != ')') {
            if (cur < 0)
                throw err("Unexpected end of coord literal");
            if (cur == '\n' || cur == '\r')
                throw err("Unexpected newline in coord literal");
            s.append((char) cur);
            consume();
        }
        consume();
        s.append(")");
        return validate(HCoord.make(s.toString()));
    }

    private HVal readNumVal() {
        // parse numeric part
        StringBuffer s = new StringBuffer();
        s.append((char) cur);
        consume();
        while (isDigit(cur) || cur == '.' || cur == '_') {
            if (cur != '_')
                s.append((char) cur);
            consume();
            if (cur == 'e' || cur == 'E') {
                if (peek == '-' || peek == '+' || isDigit(peek)) {
                    s.append((char) cur);
                    consume();
                    s.append((char) cur);
                    consume();
                }
            }
        }
        double val = Double.parseDouble(s.toString());

        // HDate - check for dash
        HDate date = null;
        HTime time = null;
        int hour = -1;
        if (cur == '-') {
            int year;
            try {
                year = Integer.parseInt(s.toString());
            }
            catch (Exception e) {
                throw err("Invalid year for date value: " + s);
            }
            consume(); // dash
            int month = readTwoDigits("Invalid digit for month in date value");
            if (cur != '-')
                throw errChar("Expected '-' for date value");
            consume();
            int day = readTwoDigits("Invalid digit for day in date value");
            date = validate(HDate.make(year, month, day));

            // check for 'T' date time
            if (cur != 'T')
                return date;

            // parse next two digits and drop down to HTime parsing
            consume();
            hour = readTwoDigits("Invalid digit for hour in date time value");
        }

        // HTime - check for colon
        if (cur == ':') {
            // hour (may have been parsed already in date time)
            if (hour < 0) {
                if (s.length() != 2) {
                    throw err("Hour must be two digits for time value: " + s);
                }
                try {
                    hour = Integer.parseInt(s.toString());
                }
                catch (Exception e) {
                    throw err("Invalid hour for time value: " + s);
                }
            }
            consume(); // colon
            int min = readTwoDigits("Invalid digit for minute in time value");
            if (cur != ':')
                throw errChar("Expected ':' for time value");
            consume();
            int sec = readTwoDigits("Invalid digit for seconds in time value");
            int ms = 0;
            if (cur == '.') {
                consume();
                int places = 0;
                while (isDigit(cur)) {
                    ms = (ms * 10) + (cur - '0');
                    consume();
                    places++;
                }
                switch (places) {
                case 1:
                    ms *= 100;
                    break;
                case 2:
                    ms *= 10;
                    break;
                case 3:
                    break;
                default:
                    throw err("Too many digits for milliseconds in time value");
                }
            }
            time = validate(HTime.make(hour, min, sec, ms));
            if (date == null)
                return time;
        }

        // HDateTime (if we have date and time)
        if (date != null) {
            // timezone offset "Z" or "-/+hh:mm"
            int tzOffset = 0;
            if (cur == 'Z')
                consume();
            else {
                boolean neg = cur == '-';
                if (cur != '-' && cur != '+')
                    throw errChar("Expected -/+ for timezone offset");
                consume();
                int tzHours = readTwoDigits("Invalid digit for timezone offset");
                if (cur != ':')
                    throw errChar("Expected colon for timezone offset");
                consume();
                int tzMins = readTwoDigits("Invalid digit for timezone offset");
                tzOffset = (tzHours * 3600) + (tzMins * 60);
                if (neg)
                    tzOffset = -tzOffset;
            }

            // timezone name
            HTimeZone tz;
            if (cur != ' ') {
                if (tzOffset != 0)
                    throw errChar("Expected space between timezone offset and name");
                tz = HTimeZone.UTC;
            }
            else {
                consume();
                StringBuffer tzBuf = new StringBuffer();
                if (!isTz(cur))
                    throw errChar("Expected timezone name");
                while (isTz(cur)) {
                    tzBuf.append((char) cur);
                    consume();
                }
                tz = HTimeZone.make(tzBuf.toString());
            }
            return validate(HDateTime.make(date, time, tz, tzOffset));
        }

        // if we have unit, parse that
        String unit = null;
        if (isUnit(cur)) {
            s = new StringBuffer();
            while (isUnit(cur)) {
                s.append((char) cur);
                consume();
            }
            unit = s.toString();
        }

        return validate(HNum.make(val, unit));
    }

    private int readTwoDigits(String errMsg) {
        if (!isDigit(cur))
            throw errChar(errMsg);
        int tens = (cur - '0') * 10;
        consume();
        if (!isDigit(cur))
            throw errChar(errMsg);
        int val = tens + (cur - '0');
        consume();
        return val;
    }

    private HVal readRefVal() {
        consume(); // opening @
        StringBuffer s = new StringBuffer();
        while (HRef.isIdChar(cur)) {
            if (cur < 0)
                throw err("Unexpected end of ref literal");
            if (cur == '\n' || cur == '\r')
                throw err("Unexpected newline in ref literal");
            s.append((char) cur);
            consume();
        }
        skipSpace();

        String dis = null;
        if (cur == '"')
            dis = readStrLiteral();

        return validate(HRef.make(s.toString(), dis));
    }

    private HVal readStrVal() {
        return validate(HStr.make(readStrLiteral()));
    }

    private String readStrLiteral() {
        consume(); // opening quote
        StringBuffer s = new StringBuffer();
        while (cur != '"') {
            if (cur < 0)
                throw err("Unexpected end of str literal");
            if (cur == '\n' || cur == '\r')
                throw err("Unexpected newline in str literal");
            if (cur == '\\') {
                s.append((char) readEscChar());
            }
            else {
                s.append((char) cur);
                consume();
            }
        }
        consume(); // closing quote
        return s.toString();
    }

    private int readEscChar() {
        consume(); // back slash

        // check basics
        switch (cur) {
        case 'b':
            consume();
            return '\b';
        case 'f':
            consume();
            return '\f';
        case 'n':
            consume();
            return '\n';
        case 'r':
            consume();
            return '\r';
        case 't':
            consume();
            return '\t';
        case '"':
            consume();
            return '"';
        case '$':
            consume();
            return '$';
        case '\\':
            consume();
            return '\\';
        }

        // check for uxxxx
        if (cur == 'u') {
            consume();
            int n3 = toNibble(cur);
            consume();
            int n2 = toNibble(cur);
            consume();
            int n1 = toNibble(cur);
            consume();
            int n0 = toNibble(cur);
            consume();
            return (n3 << 12) | (n2 << 8) | (n1 << 4) | (n0);
        }

        throw err("Invalid escape sequence: \\" + (char) cur);
    }

    private int toNibble(int c) {
        if ('0' <= c && c <= '9')
            return c - '0';
        if ('a' <= c && c <= 'f')
            return c - 'a' + 10;
        if ('A' <= c && c <= 'F')
            return c - 'A' + 10;
        throw errChar("Invalid hex char");
    }

    private HVal readUriVal() {
        consume(); // opening backtick
        StringBuffer s = new StringBuffer();

        while (true) {
            if (cur < 0)
                throw err("Unexpected end of uri literal");
            if (cur == '\n' || cur == '\r')
                throw err("Unexpected newline in uri literal");
            if (cur == '`')
                break;
            if (cur == '\\') {
                switch (peek) {
                case ':':
                case '/':
                case '?':
                case '#':
                case '[':
                case ']':
                case '@':
                case '\\':
                case '&':
                case '=':
                case ';':
                    s.append((char) cur);
                    s.append((char) peek);
                    consume();
                    consume();
                    break;
                case '`':
                    s.append('`');
                    consume();
                    consume();
                    break;
                default:
                    if (peek == 'u' || peek == '\\')
                        s.append((char) readEscChar());
                    else
                        throw err("Invalid URI escape sequence \\" + (char) peek);
                    break;
                }
            }
            else {
                s.append((char) cur);
                consume();
            }
        }
        consume(); // closing backtick
        return validate(HUri.make(s.toString()));
    }

    //////////////////////////////////////////////////////////////////////////
    // HFilter
    //////////////////////////////////////////////////////////////////////////

    /** Never use directly. Use "HFilter.make" */
    public HFilter readFilter() {
        isFilter = true;
        skipSpace();

        if (cur == -1)
            return HFilter.all();

        HFilter q = readFilterOr();
        skipSpace();
        if (cur >= 0)
            throw errChar("Expected end of stream");
        return q;
    }

    private HFilter readFilterOr() {
        HFilter q = readFilterAnd();
        skipSpace();
        if (cur != 'o')
            return q;
        if (!readId().equals("or"))
            throw err("Expecting 'or' keyword");
        skipSpace();
        return q.or(readFilterOr());
    }

    private HFilter readFilterAnd() {
        HFilter q = readFilterAtomic();
        skipSpace();
        if (cur != 'a')
            return q;
        if (!readId().equals("and"))
            throw err("Expecting 'and' keyword");
        skipSpace();
        return q.and(readFilterAnd());
    }

    private HFilter readFilterAtomic() {
        skipSpace();
        if (cur == '(')
            return readFilterParens();

        String path = readFilterPath();
        skipSpace();

        if (path.toString().equals("not")) {
            return HFilter.missing(readFilterPath());
        }

        if (cur == '=' && peek == '=') {
            consumeCmp();
            return HFilter.eq(path, readVal());
        }
        if (cur == '!' && peek == '=') {
            consumeCmp();
            return HFilter.ne(path, readVal());
        }
        if (cur == '<' && peek == '=') {
            consumeCmp();
            return HFilter.le(path, readVal());
        }
        if (cur == '>' && peek == '=') {
            consumeCmp();
            return HFilter.ge(path, readVal());
        }
        if (cur == '<') {
            consumeCmp();
            return HFilter.lt(path, readVal());
        }
        if (cur == '>') {
            consumeCmp();
            return HFilter.gt(path, readVal());
        }
        if (cur == '~' && peek == '~') {
            consumeCmp();
            return HFilter.ilike(path, readVal());
        }
        if (cur == '~') {
            consumeCmp();
            return HFilter.like(path, readVal());
        }

        return HFilter.has(path);
    }

    private HFilter readFilterParens() {
        consume();
        skipSpace();
        HFilter q = readFilterOr();
        if (cur != ')')
            throw err("Expecting ')'");
        consume();
        return q;
    }

    private void consumeCmp() {
        consume();
        if (cur == '=' || cur == '~')
            consume();
        skipSpace();
    }

    private String readFilterPath() {
        // read first tag name
        String id = readId();

        // if not pathed, optimize for common case
        if (cur != '-' || peek != '>')
            return id;

        // parse path
        StringBuffer s = new StringBuffer().append(id);
        List<String> acc = new ArrayList<String>();
        acc.add(id);
        while (cur == '-' || peek == '>') {
            consume();
            consume();
            id = readId();
            acc.add(id);
            s.append('-').append('>').append(id);
        }
        return s.toString();
    }

    //////////////////////////////////////////////////////////////////////////
    // Char Reads
    //////////////////////////////////////////////////////////////////////////

    private ParseException errChar(String msg) {
        if (cur < 0)
            msg += " (end of stream)";
        else {
            msg += " (char=0x" + Integer.toHexString(cur);
            if (cur >= ' ')
                msg += " '" + (char) cur + "'";
            msg += ")";
        }
        return err(msg, null);
    }

    private ParseException err(String msg) {
        return err(msg, null);
    }

    private ParseException err(Throwable ex) {
        return err(ex.toString(), ex);
    }

    private ParseException err(String msg, Throwable ex) {
        return new ParseException(msg + " [Line " + lineNum + "]", ex);
    }

    private void skipSpace() {
        while (cur == ' ' || cur == '\t' || cur == '\r' || cur == '\n')
            consume();
    }

    int cur() {
        return cur;
    }

    void consumeNewline() {
        if (cur != '\n')
            throw errChar("Expecting newline");
        consume();
    }

    void consume() {
        try {
            cur = peek;
            peek = in.read();
            colNum++;
            if (cur == '\n') {
                lineNum++;
                colNum = 0;
            }
        }
        catch (IOException e) {
            throw err(e);
        }
    }

    <T extends HVal> T validate(T val) {
        if (!val.validate().isEmpty())
            throw new IllegalArgumentException("Parse error near character " + colNum + ", line " + lineNum);
        return val;
    }

    //////////////////////////////////////////////////////////////////////////
    // Char Types
    //////////////////////////////////////////////////////////////////////////

    private static boolean isDigit(int c) {
        return c > 0 && c < 128 && (charTypes[c] & DIGIT) != 0;
    }

    private static boolean isAlpha(int c) {
        return c > 0 && c < 128 && (charTypes[c] & ALPHA) != 0;
    }

    private static boolean isUnit(int c) {
        return c > 0 && (c >= 128 || (charTypes[c] & UNIT) != 0);
    }

    private static boolean isTz(int c) {
        return c > 0 && c < 128 && (charTypes[c] & TZ) != 0;
    }

    private static boolean isIdStart(int c) {
        return c > 0 && c < 128 && (charTypes[c] & ID_START) != 0;
    }

    private static boolean isId(int c) {
        return c > 0 && c < 128 && (charTypes[c] & ID) != 0;
    }

    private static final byte[] charTypes = new byte[128];
    private static final int DIGIT = 0x01;
    private static final int ALPHA_LO = 0x02;
    private static final int ALPHA_UP = 0x04;
    private static final int ALPHA = ALPHA_UP | ALPHA_LO;
    private static final int UNIT = 0x08;
    private static final int TZ = 0x10;
    private static final int ID_START = 0x20;
    private static final int ID = 0x40;
    static {
        for (int i = '0'; i <= '9'; ++i)
            charTypes[i] = (DIGIT | TZ | ID);
        for (int i = 'a'; i <= 'z'; ++i)
            charTypes[i] = (ALPHA_LO | UNIT | TZ | ID_START | ID);
        for (int i = 'A'; i <= 'Z'; ++i)
            charTypes[i] = (ALPHA_UP | UNIT | TZ | ID);
        charTypes['%'] = UNIT;
        charTypes['_'] = UNIT | TZ | ID;
        charTypes['/'] = UNIT;
        charTypes['$'] = UNIT;
        charTypes['-'] = TZ;
        charTypes['+'] = TZ;
    }

    //////////////////////////////////////////////////////////////////////////
    // Fields
    //////////////////////////////////////////////////////////////////////////

    private Reader in;
    private int cur;
    private int peek;
    private int colNum = 1;
    private int lineNum = 1;
    private int version;
    private boolean isFilter;
}
TOP

Related Classes of org.haystack.io.HZincReader

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.