Package org.apache.james.mime4j.stream

Source Code of org.apache.james.mime4j.stream.RawFieldParser

/****************************************************************
* Licensed to the Apache Software Foundation (ASF) under one   *
* or more contributor license agreements.  See the NOTICE file *
* distributed with this work for additional information        *
* regarding copyright ownership.  The ASF licenses this file   *
* to you under the Apache License, Version 2.0 (the            *
* "License"); you may not use this file except in compliance   *
* with the License.  You may obtain a copy of the License at   *
*                                                              *
*   http://www.apache.org/licenses/LICENSE-2.0                 *
*                                                              *
* Unless required by applicable law or agreed to in writing,   *
* software distributed under the License is distributed on an  *
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY       *
* KIND, either express or implied.  See the License for the    *
* specific language governing permissions and limitations      *
* under the License.                                           *
****************************************************************/

package org.apache.james.mime4j.stream;

import java.util.ArrayList;
import java.util.BitSet;
import java.util.List;

import org.apache.james.mime4j.MimeException;
import org.apache.james.mime4j.util.ByteSequence;
import org.apache.james.mime4j.util.CharsetUtil;
import org.apache.james.mime4j.util.ContentUtil;

/**
* Low level parser for header field elements. The parsing routines of this class are designed
* to produce near zero intermediate garbage and make no intermediate copies of input data.
* <p/>
* This class is immutable and thread safe.
*/
public class RawFieldParser {

    public static BitSet INIT_BITSET(int ... b) {
        BitSet bitset = new BitSet(b.length);
        for (int i = 0; i < b.length; i++) {
            bitset.set(b[i]);
        }
        return bitset;
    }

    static final BitSet COLON                   = INIT_BITSET(':');
    static final BitSet EQUAL_OR_SEMICOLON      = INIT_BITSET('=', ';');
    static final BitSet SEMICOLON               = INIT_BITSET(';');

    public static final RawFieldParser DEFAULT = new RawFieldParser();

    /**
     * Parses the sequence of bytes into {@link RawField}.
     *
     * @throws MimeException if the input data does not contain a valid MIME field.
     */
    public RawField parseField(final ByteSequence raw) throws MimeException {
        if (raw == null) {
            return null;
        }
        ParserCursor cursor = new ParserCursor(0, raw.length());
        String name = parseToken(raw, cursor, COLON);
        if (cursor.atEnd()) {
            throw new MimeException("Invalid MIME field: no name/value separator found: " +
                    raw.toString());
        }
        return new RawField(raw, cursor.getPos(), name, null);
    }

    /**
     * Parses the field body containing a value with parameters into {@link RawBody}.
     *
     * @param field unstructured (raw) field
     */
    public RawBody parseRawBody(final RawField field) {
        ByteSequence buf = field.getRaw();
        int pos = field.getDelimiterIdx() + 1;
        if (buf == null) {
            String body = field.getBody();
            if (body == null) {
                return new RawBody("", null);
            }
            buf = ContentUtil.encode(body);
            pos = 0;
        }
        ParserCursor cursor = new ParserCursor(pos, buf.length());
        return parseRawBody(buf, cursor);
    }

    /**
     * Parses the sequence of bytes containing a value with parameters into {@link RawBody}.
     *
     * @param buf buffer with the sequence of bytes to be parsed
     * @param cursor defines the bounds and current position of the buffer
     */
    public RawBody parseRawBody(final ByteSequence buf, final ParserCursor cursor) {
        String value = parseToken(buf, cursor, SEMICOLON);
        if (cursor.atEnd()) {
            return new RawBody(value, new ArrayList<NameValuePair>());
        }
        cursor.updatePos(cursor.getPos() + 1);
        List<NameValuePair> params = parseParameters(buf, cursor);
        return new RawBody(value, params);
    }

    /**
     * Parses the sequence of bytes containing field parameters delimited with semicolon into
     * a list of {@link NameValuePair}s.
     *
     * @param buf buffer with the sequence of bytes to be parsed
     * @param cursor defines the bounds and current position of the buffer
     */
    public List<NameValuePair> parseParameters(final ByteSequence buf, final ParserCursor cursor) {
        List<NameValuePair> params = new ArrayList<NameValuePair>();
        skipWhiteSpace(buf, cursor);
        while (!cursor.atEnd()) {
            NameValuePair param = parseParameter(buf, cursor);
            params.add(param);
        }
        return params;
    }

    /**
     * Parses the sequence of bytes containing a field parameter delimited with semicolon into
     * {@link NameValuePair}.
     *
     * @param buf buffer with the sequence of bytes to be parsed
     * @param cursor defines the bounds and current position of the buffer
     */
    public NameValuePair parseParameter(final ByteSequence buf, final ParserCursor cursor) {
        String name = parseToken(buf, cursor, EQUAL_OR_SEMICOLON);
        if (cursor.atEnd()) {
            return new NameValuePair(name, null);
        }
        int delim = buf.byteAt(cursor.getPos());
        cursor.updatePos(cursor.getPos() + 1);
        if (delim == ';') {
            return new NameValuePair(name, null);
        }
        String value = parseValue(buf, cursor, SEMICOLON);
        if (!cursor.atEnd()) {
            cursor.updatePos(cursor.getPos() + 1);
        }
        return new NameValuePair(name, value);
    }

    /**
     * Extracts from the sequence of bytes a token terminated with any of the given delimiters
     * discarding semantically insignificant whitespace characters and comments.
     *
     * @param buf buffer with the sequence of bytes to be parsed
     * @param cursor defines the bounds and current position of the buffer
     * @param delimiters set of delimiting characters. Can be <code>null</code> if the token
     *  is not delimited by any character.
     */
    public String parseToken(final ByteSequence buf, final ParserCursor cursor, final BitSet delimiters) {
        StringBuilder dst = new StringBuilder();
        boolean whitespace = false;
        while (!cursor.atEnd()) {
            char current = (char) (buf.byteAt(cursor.getPos()) & 0xff);
            if (delimiters != null && delimiters.get(current)) {
                break;
            } else if (CharsetUtil.isWhitespace(current)) {
                skipWhiteSpace(buf, cursor);
                whitespace = true;
            } else if (current == '(') {
                skipComment(buf, cursor);
            } else {
                if (dst.length() > 0 && whitespace) {
                    dst.append(' ');
                }
                copyContent(buf, cursor, delimiters, dst);
                whitespace = false;
            }
        }
        return dst.toString();
    }

    /**
     * Extracts from the sequence of bytes a value which can be enclosed in quote marks and
     * terminated with any of the given delimiters discarding semantically insignificant
     * whitespace characters and comments.
     *
     * @param buf buffer with the sequence of bytes to be parsed
     * @param cursor defines the bounds and current position of the buffer
     * @param delimiters set of delimiting characters. Can be <code>null</code> if the value
     *  is not delimited by any character.
     */
    public String parseValue(final ByteSequence buf, final ParserCursor cursor, final BitSet delimiters) {
        StringBuilder dst = new StringBuilder();
        boolean whitespace = false;
        while (!cursor.atEnd()) {
            char current = (char) (buf.byteAt(cursor.getPos()) & 0xff);
            if (delimiters != null && delimiters.get(current)) {
                break;
            } else if (CharsetUtil.isWhitespace(current)) {
                skipWhiteSpace(buf, cursor);
                whitespace = true;
            } else if (current == '(') {
                skipComment(buf, cursor);
            } else if (current == '\"') {
                if (dst.length() > 0 && whitespace) {
                    dst.append(' ');
                }
                copyQuotedContent(buf, cursor, dst);
                whitespace = false;
            } else {
                if (dst.length() > 0 && whitespace) {
                    dst.append(' ');
                }
                copyContent(buf, cursor, delimiters, dst);
                whitespace = false;
            }
        }
        return dst.toString();
    }

    /**
     * Skips semantically insignificant whitespace characters and moves the cursor to the closest
     * non-whitespace character.
     *
     * @param buf buffer with the sequence of bytes to be parsed
     * @param cursor defines the bounds and current position of the buffer
     */
    public void skipWhiteSpace(final ByteSequence buf, final ParserCursor cursor) {
        int pos = cursor.getPos();
        int indexFrom = cursor.getPos();
        int indexTo = cursor.getUpperBound();
        for (int i = indexFrom; i < indexTo; i++) {
            char current = (char) (buf.byteAt(i) & 0xff);
            if (!CharsetUtil.isWhitespace(current)) {
                break;
            } else {
                pos++;
            }
        }
        cursor.updatePos(pos);
    }

    /**
     * Skips semantically insignificant content if the current position is positioned at the
     * beginning of a comment and moves the cursor past the end of the comment.
     * Nested comments and escaped characters are recognized and handled appropriately.
     *
     * @param buf buffer with the sequence of bytes to be parsed
     * @param cursor defines the bounds and current position of the buffer
     */
    public void skipComment(final ByteSequence buf, final ParserCursor cursor) {
        if (cursor.atEnd()) {
            return;
        }
        int pos = cursor.getPos();
        int indexFrom = cursor.getPos();
        int indexTo = cursor.getUpperBound();
        char current = (char) (buf.byteAt(pos) & 0xff);
        if (current != '(') {
            return;
        }
        pos++;
        indexFrom++;

        int level = 1;
        boolean escaped = false;
        for (int i = indexFrom; i < indexTo; i++, pos++) {
            current = (char) (buf.byteAt(i) & 0xff);
            if (escaped) {
                escaped = false;
            } else {
                if (current == '\\') {
                    escaped = true;
                } else if (current == '(') {
                    level++;
                } else if (current == ')') {
                    level--;
                }
            }
            if (level <= 0) {
                pos++;
                break;
            }
        }
        cursor.updatePos(pos);
    }

    /**
     * Skips semantically insignificant whitespace characters and comments and moves the cursor
     * to the closest semantically significant non-whitespace character.
     * Nested comments and escaped characters are recognized and handled appropriately.
     *
     * @param buf buffer with the sequence of bytes to be parsed
     * @param cursor defines the bounds and current position of the buffer
     */
    public void skipAllWhiteSpace(final ByteSequence buf, final ParserCursor cursor) {
        while (!cursor.atEnd()) {
            char current = (char) (buf.byteAt(cursor.getPos()) & 0xff);
            if (CharsetUtil.isWhitespace(current)) {
                skipWhiteSpace(buf, cursor);
            } else if (current == '(') {
                skipComment(buf, cursor);
            } else {
                break;
            }
        }
    }

    /**
     * Transfers content into the destination buffer until a whitespace character, a comment,
     * or any of the given delimiters is encountered.
     *
     * @param buf buffer with the sequence of bytes to be parsed
     * @param cursor defines the bounds and current position of the buffer
     * @param delimiters set of delimiting characters. Can be <code>null</code> if the value
     *  is delimited by a whitespace or a comment only.
     * @param dst destination buffer
     */
    public void copyContent(final ByteSequence buf, final ParserCursor cursor, final BitSet delimiters,
            final StringBuilder dst) {
        int pos = cursor.getPos();
        int indexFrom = cursor.getPos();
        int indexTo = cursor.getUpperBound();
        for (int i = indexFrom; i < indexTo; i++) {
            char current = (char) (buf.byteAt(i) & 0xff);
            if ((delimiters != null && delimiters.get(current))
                    || CharsetUtil.isWhitespace(current) || current == '(') {
                break;
            } else {
                pos++;
                dst.append(current);
            }
        }
        cursor.updatePos(pos);
    }

    /**
     * Transfers content enclosed with quote marks into the destination buffer.
     *
     * @param buf buffer with the sequence of bytes to be parsed
     * @param cursor defines the bounds and current position of the buffer
     * @param dst destination buffer
     */
    public void copyQuotedContent(final ByteSequence buf, final ParserCursor cursor,
            final StringBuilder dst) {
        if (cursor.atEnd()) {
            return;
        }
        int pos = cursor.getPos();
        int indexFrom = cursor.getPos();
        int indexTo = cursor.getUpperBound();
        char current = (char) (buf.byteAt(pos) & 0xff);
        if (current != '\"') {
            return;
        }
        pos++;
        indexFrom++;
        boolean escaped = false;
        for (int i = indexFrom; i < indexTo; i++, pos++) {
            current = (char) (buf.byteAt(i) & 0xff);
            if (escaped) {
                if (current != '\"' && current != '\\') {
                    dst.append('\\');
                }
                dst.append(current);
                escaped = false;
            } else {
                if (current == '\"') {
                    pos++;
                    break;
                }
                if (current == '\\') {
                    escaped = true;
                } else if (current != '\r' && current != '\n') {
                    dst.append(current);
                }
            }
        }
        cursor.updatePos(pos);
    }

}
TOP

Related Classes of org.apache.james.mime4j.stream.RawFieldParser

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.