Package com.ctc.wstx.io

Source Code of com.ctc.wstx.io.ReaderBootstrapper

/* Woodstox XML processor
*
* Copyright (c) 2004- Tatu Saloranta, tatu.saloranta@iki.fi
*
* Licensed under the License specified in file LICENSE, included with
* the source code.
* You may not use this file except in compliance with the License.
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package com.ctc.wstx.io;

import java.io.*;
import java.text.MessageFormat;

import javax.xml.stream.Location;
import javax.xml.stream.XMLReporter;
import javax.xml.stream.XMLStreamException;

import org.codehaus.stax2.validation.XMLValidationProblem;

import com.ctc.wstx.api.ReaderConfig;
import com.ctc.wstx.cfg.ErrorConsts;
import com.ctc.wstx.cfg.ParsingErrorMsgs;
import com.ctc.wstx.exc.*;
import com.ctc.wstx.util.StringUtil;

/**
* Input bootstrap class used when input comes from a Reader; in this case,
* encoding is already known, and thus encoding from XML declaration (if
* any) is only double-checked, not really used.
*<p>
* Note: since the actual Reader to use after bootstrapping is pre-constructed,
* the local input buffer can (and should) be quite small.
*/
public final class ReaderBootstrapper
    extends InputBootstrapper
{
    final static char CHAR_BOM_MARKER = (char) 0xFEFF;

    /*
    ////////////////////////////////////////
    // Configuration
    ////////////////////////////////////////
    */

    /**
     * Underlying Reader to use for reading content.
     */
    final Reader mIn;

    /**
     * Encoding identifier processing application passed in; if not null,
     * will be compared to actual xml declaration based encoding (if
     * declaration found)
     */
    final String mInputEncoding;

    /*
    ///////////////////////////////////////////////////////////////
    // Input buffering
    ///////////////////////////////////////////////////////////////
    */

    private char[] mCharBuffer;

    private int mInputPtr;

    private int mInputLen;

    /*
    ////////////////////////////////////////
    // Life-cycle
    ////////////////////////////////////////
    */

    private ReaderBootstrapper(String pubId, String sysId, Reader r, String appEncoding)
    {
        super(pubId, sysId);
        mIn = r;
        if (appEncoding == null) { // may still be able to figure it out
            if (r instanceof InputStreamReader) {
                appEncoding = ((InputStreamReader) r).getEncoding();
            }
        }
        mInputEncoding = appEncoding;
    }

    /*
    ////////////////////////////////////////
    // Public API
    ////////////////////////////////////////
    */

    /**
     * @param r Eventual reader that will be reading actual content, after
     *   bootstrapping finishes
     * @param appEncoding Encoding that application declared; may be null.
     *   If not null, will be compared to actual declaration found; and
     *   incompatibility reported as a potential (but not necessarily fatal)
     *   problem.
     */
    public static ReaderBootstrapper getInstance(String pubId, String sysId,
                                                 Reader r, String appEncoding)
    {
        return new ReaderBootstrapper(pubId, sysId, r, appEncoding);
    }

    /**
     * Method called to do actual bootstrapping.
     *
     * @return Actual reader to use for reading xml content
     */
    public Reader bootstrapInput(ReaderConfig cfg, boolean mainDoc, int xmlVersion)
        throws IOException, XMLStreamException
    {
        /* First order of business: allocate input buffer. Not done during
         * construction for simplicity; that way config object need not be
         * passed before actual bootstrap method is called
         */
        /* Let's make sure buffer is at least 6 chars (to know '<?xml '
         * prefix), and preferably big enough to contain the whole declaration,
         *  but not too long to waste space -- it won't be reused
         * by the real input reader.
         */
        mCharBuffer = (cfg == null) ? new char[128] : cfg.allocSmallCBuffer(128); // 128 chars should be enough

        initialLoad(7);

        /* Only need 6 for signature ("<?xml\s"), but there may be a leading
         * BOM in there... and a valid xml declaration has to be longer
         * than 7 chars anyway (although, granted, shortest valid xml docl
         * is just 4 chars... "<a/>")
         */
        if (mInputLen >= 7) {
            char c = mCharBuffer[mInputPtr];
           
            // BOM to skip?
            if (c == CHAR_BOM_MARKER) {
                c = mCharBuffer[++mInputPtr];
            }
            if (c == '<') {
                if (mCharBuffer[mInputPtr+1] == '?'
                    && mCharBuffer[mInputPtr+2] == 'x'
                    && mCharBuffer[mInputPtr+3] == 'm'
                    && mCharBuffer[mInputPtr+4] == 'l'
                    && mCharBuffer[mInputPtr+5] <= CHAR_SPACE) {
                    // Yup, got the declaration ok!
                    mInputPtr += 6; // skip declaration
                    readXmlDecl(mainDoc, xmlVersion);
                   
                    if (mFoundEncoding != null && mInputEncoding != null) {
                        verifyXmlEncoding(cfg);
                    }
                }
            } else {
                /* We may also get something that would be invalid xml
                 * ("garbage" char; neither '<' nor space). If so, and
                 * it's one of "well-known" cases, we can not only throw
                 * an exception but also indicate a clue as to what is likely
                 * to be wrong.
                 */
                /* Specifically, UTF-8 read via, say, ISO-8859-1 reader, can
                 * "leak" marker (0xEF, 0xBB, 0xBF). While we could just eat
                 * it, there's bound to be other problems cropping up, so let's
                 * inform about the problem right away.
                 */
                if (c == 0xEF) {
                    throw new WstxIOException("Unexpected first character (char code 0xEF), not valid in xml document: could be mangled UTF-8 BOM marker. Make sure that the Reader uses correct encoding or pass an InputStream instead");
                }
            }
        }
        /* Ok, now; do we have unused chars we have read that need to
         * be merged in?
         */
        if (mInputPtr < mInputLen) {
            return new MergedReader(cfg, mIn, mCharBuffer, mInputPtr, mInputLen);
        }

        return mIn;
    }

    public String getInputEncoding() {
        return mInputEncoding;
    }

    public int getInputTotal() {
        return mInputProcessed + mInputPtr;
    }

    public int getInputColumn() {
        return (mInputPtr - mInputRowStart);
    }

    /*
    ////////////////////////////////////////
    // Internal methods, parsing
    ////////////////////////////////////////
    */

    protected void verifyXmlEncoding(ReaderConfig cfg)
        throws XMLStreamException
    {
        String inputEnc = mInputEncoding;

        // Close enough?
        if (StringUtil.equalEncodings(inputEnc, mFoundEncoding)) {
            return;
        }

        /* Ok, maybe the difference is just with endianness indicator?
         * (UTF-16BE vs. UTF-16)?
         */
        // !!! TBI

        XMLReporter rep = cfg.getXMLReporter();
        if (rep != null) {
            Location loc = getLocation();
            String msg = MessageFormat.format(ErrorConsts.W_MIXED_ENCODINGS,
                                              new Object[] { mFoundEncoding,
                                                             inputEnc });
            String type = ErrorConsts.WT_XML_DECL;
            /* 30-May-2008, tatus: Should wrap all the info as XMValidationProblem
             *    since that's Woodstox' contract wrt. relatedInformation field.
             */
            XMLValidationProblem prob = new XMLValidationProblem(loc, msg, XMLValidationProblem.SEVERITY_WARNING, type);
            rep.report(msg, type, prob, loc);
        }
    }

    /*
    /////////////////////////////////////////////////////
    // Internal methods, loading input data
    /////////////////////////////////////////////////////
    */

    protected boolean initialLoad(int minimum)
        throws IOException
    {
        mInputPtr = 0;
        mInputLen = 0;

        while (mInputLen < minimum) {
            int count = mIn.read(mCharBuffer, mInputLen,
                                 mCharBuffer.length - mInputLen);
            if (count < 1) {
                return false;
            }
            mInputLen += count;
        }
        return true;
    }

    protected void loadMore()
        throws IOException, WstxException
    {
        /* Need to make sure offsets are properly updated for error
         * reporting purposes, and do this now while previous amounts
         * are still known.
         */
        mInputProcessed += mInputLen;
        mInputRowStart -= mInputLen;

        mInputPtr = 0;
        mInputLen = mIn.read(mCharBuffer, 0, mCharBuffer.length);
        if (mInputLen < 1) {
            throw new WstxEOFException(ParsingErrorMsgs.SUFFIX_IN_XML_DECL,
                                       getLocation());
        }
    }

    /*
    /////////////////////////////////////////////////////
    // Implementations of abstract parsing methods
    /////////////////////////////////////////////////////
    */

    protected void pushback() {
        --mInputPtr;
    }

    protected int getNext()
        throws IOException, WstxException
    {
        return (mInputPtr < mInputLen) ?
            mCharBuffer[mInputPtr++] : nextChar();
    }


    protected int getNextAfterWs(boolean reqWs)
        throws IOException, WstxException
    {
        int count = 0;

        while (true) {
            char c = (mInputPtr < mInputLen) ?
                mCharBuffer[mInputPtr++] : nextChar();

            if (c > CHAR_SPACE) {
                if (reqWs && count == 0) {
                    reportUnexpectedChar(c, ERR_XMLDECL_EXP_SPACE);
                }
                return c;
            }
            if (c == CHAR_CR || c == CHAR_LF) {
                skipCRLF(c);
            } else if (c == CHAR_NULL) {
                reportNull();
            }
            ++count;
        }
    }

    /**
     * @return First character that does not match expected, if any;
     *    CHAR_NULL if match succeeded
     */
    protected int checkKeyword(String exp)
        throws IOException, WstxException
    {
        int len = exp.length();
       
        for (int ptr = 1; ptr < len; ++ptr) {
            char c = (mInputPtr < mInputLen) ?
                mCharBuffer[mInputPtr++] : nextChar();
           
            if (c != exp.charAt(ptr)) {
                return c;
            }
            if (c == CHAR_NULL) {
                reportNull();
            }
        }

        return CHAR_NULL;
    }

    protected int readQuotedValue(char[] kw, int quoteChar)
        throws IOException, WstxException
    {
        int i = 0;
        int len = kw.length;

        while (true) {
            char c = (mInputPtr < mInputLen) ?
                mCharBuffer[mInputPtr++] : nextChar();
            if (c == CHAR_CR || c == CHAR_LF) {
                skipCRLF(c);
            } else if (c == CHAR_NULL) {
                reportNull();
            }
            if (c == quoteChar) {
                return (i < len) ? i : -1;
            }
      // Let's just truncate longer values, but match quote
      if (i < len) {
    kw[i++] = c;
      }
  }
    }

    protected Location getLocation()
    {
        return new WstxInputLocation(null, mPublicId, mSystemId,
                                     mInputProcessed + mInputPtr - 1,
                                     mInputRow, mInputPtr - mInputRowStart);
    }

    /*
    /////////////////////////////////////////////////////
    // Internal methods, single-byte access methods
    /////////////////////////////////////////////////////
    */

    protected char nextChar()
        throws IOException, WstxException
    {
        if (mInputPtr >= mInputLen) {
            loadMore();
        }
        return mCharBuffer[mInputPtr++];
    }

    protected void skipCRLF(char lf)
        throws IOException, WstxException
    {
        if (lf == CHAR_CR) {
            char c = (mInputPtr < mInputLen) ?
                mCharBuffer[mInputPtr++] : nextChar();
            if (c != BYTE_LF) {
                --mInputPtr; // pushback if not 2-char/byte lf
            }
        }
        ++mInputRow;
        mInputRowStart = mInputPtr;
    }
}
TOP

Related Classes of com.ctc.wstx.io.ReaderBootstrapper

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.