Package com.salas.bb.utils.xml

Source Code of com.salas.bb.utils.xml.TestUTF8Reader

// BlogBridge -- RSS feed reader, manager, and web based service
// Copyright (C) 2002-2006 by R. Pito Salas
//
// This program is free software; you can redistribute it and/or modify it under
// the terms of the GNU General Public License as published by the Free Software Foundation;
// either version 2 of the License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
// without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
// See the GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License along with this program;
// if not, write to the Free Software Foundation, Inc., 59 Temple Place,
// Suite 330, Boston, MA 02111-1307 USA
//
// Contact: R. Pito Salas
// mailto:pitosalas@users.sourceforge.net
// More information: about BlogBridge
// http://www.blogbridge.com
// http://sourceforge.net/projects/blogbridge
//
// $Id: TestUTF8Reader.java,v 1.6 2006/01/08 05:28:19 kyank Exp $
//

package com.salas.bb.utils.xml;

import junit.framework.TestCase;
import org.jdom.Document;
import org.jdom.input.SAXBuilder;

import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.Reader;
import java.util.logging.ConsoleHandler;
import java.util.logging.Level;
import java.util.logging.Logger;

/**
* @see UTF8Reader
*/
public class TestUTF8Reader extends TestCase
{
    private static final Logger LOG = Logger.getLogger(TestUTF8Reader.class.getName());

    /**
     * Tests reading plain ASCII characters.
     */
    public void testPlainASCII()
    {
        char[] src = new char[128];
        int[] dst = new int[128];
        for (int i = 0; i < src.length; i++)
        {
            src[i] = (char)i;
            dst[i] = i;
        }

        check(src, dst);
    }

    /**
     * Tests 2-byte multi-byte sequences.
     */
    public void test2Byte()
    {
        // First possible 2-byte sequence 0x80
        check(new char[] { 't', (char)0xc2, (char)0x80, 'e' }, new int[] { 't', 0x80, 'e' });
        check(new char[] { (char)0xc2, (char)0x80 }, new int[] { 0x80 });

        // Something in the middle 0xa9
        check(new char[] { 't', (char)0xc2, (char)0xa9, 'e' }, new int[] { 't', 0xa9, 'e' });
        check(new char[] { (char)0xc2, (char)0xa9 }, new int[] { 0xa9 });

        // Last possible 2-byte sequence 0x7ff
        check(new char[] { 't', (char)0xdf, (char)0xbf, 'e' }, new int[] { 't', 0x7ff, 'e' });
        check(new char[] { (char)0xdf, (char)0xbf }, new int[] { 0x7ff });
    }

    /**
     * Tests 3-byte multi-byte sequences.
     */
    public void test3Byte()
    {
        // First possible 3-byte sequence 0x800
        check(new char[] { 't', (char)0xe0, (char)0xa0, (char)0x80, 'e' },
              new int[] { 't', 0x800, 'e' });
        check(new char[] { (char)0xe0, (char)0xa0, (char)0x80 }, new int[] { 0x800 });

        // Something in the middle 0xd7ff
        check(new char[] { 't', (char)0xed, (char)0x9f, (char)0xbf, 'e' },
              new int[] { 't', 0xd7ff, 'e' });
        check(new char[] { (char)0xed, (char)0x9f, (char)0xbf }, new int[] { 0xd7ff });

        // Last possible 3-byte sequence 0xffff
        check(new char[] { 't', (char)0xef, (char)0xbf, (char)0xbf, 'e' },
              new int[] { 't', 0xffff, 'e' });
        check(new char[] { (char)0xef, (char)0xbf, (char)0xbf }, new int[] { 0xffff });
    }

    /**
     * Tests 4-byte multi-byte sequences.
     */
    public void test4Byte()
    {
        // First possible 4-byte sequence 0x100000
        check(new char[] { 't', (char)0xf0, (char)0x90, (char)0x80, (char)0x80, 'e' },
              new int[] { 't', 0xD800, 0xDC00, 'e' });
        check(new char[] { (char)0xf0, (char)0x90, (char)0x80, (char)0x80 },
              new int[] { 0xD800, 0xDC00 });

        // Something in the middle 0x10ffff
        check(new char[] { 't', (char)0xf4, (char)0x8f, (char)0xbf, (char)0xbf, 'e' },
              new int[] { 't', 0xDBFF, 0xDFFF, 'e' });
        check(new char[] { (char)0xf4, (char)0x8f, (char)0xbf, (char)0xbf },
              new int[] { 0xDBFF, 0xDFFF });

        // Last possible 4-byte sequence 0x1fffff
//        check(new char[] { 't', (char)0xf7, (char)0xbf, (char)0xbf, (char)0xbf, 'e' },
//              new int[] { 't', 0x1fffff, 'e' });
//        check(new char[] { (char)0xf7, (char)0xbf, (char)0xbf, (char)0xbf },
//              new int[] { 0x1fffff });
    }

    // ---------------------------------------------------------------------------------------------
    // Malformed sequences
    // ---------------------------------------------------------------------------------------------

    /**
     * Tests parsing of unexpected continuation byte.
     */
    public void testFirstContinuationByte()
    {
        check(new char[] { 't', (char)0x80, 'e' }, new int[] { 't', 0x80, 'e' });
        check(new char[] { (char)0x80 }, new int[] { 0x80 });
    }

    /**
     * Tests parsing of unexpected continuation byte.
     */
    public void testLastContinuationByte()
    {
        check(new char[] { 't', (char)0xbf, 'e' }, new int[] { 't', 0xbf, 'e' });
        check(new char[] { (char)0xbf }, new int[] { 0xbf });
    }

    /**
     * Tests parsing of unexpected continuation bytes in group.
     */
    public void testContinuationBytes()
    {
        // 2 continuation bytes
        check(new char[] { 't', (char)0x80, (char)0xbf, 'e' }, new int[] { 't', 0x80, 0xbf, 'e' });
        check(new char[] { (char)0x80, (char)0xbf }, new int[] { 0x80, 0xbf });

        // 3 continuation bytes
        check(new char[] { 't', (char)0x80, (char)0xbf, (char)0x80, 'e' },
            new int[] { 't', 0x80, 0xbf, 0x80, 'e' });
        check(new char[] { (char)0x80, (char)0xbf, (char)0x80 }, new int[] { 0x80, 0xbf, 0x80 });

        // 4 continuation bytes
        check(new char[] { 't', (char)0x80, (char)0xbf, (char)0x80, (char)0xbf, 'e' },
            new int[] { 't', 0x80, 0xbf, 0x80, 0xbf, 'e' });
        check(new char[] { (char)0x80, (char)0xbf, (char)0x80, (char)0xbf },
            new int[] { 0x80, 0xbf, 0x80, 0xbf });

        // 5 continuation bytes
        check(new char[] { 't', (char)0x80, (char)0xbf, (char)0x80, (char)0xbf, (char)0x80, 'e' },
            new int[] { 't', 0x80, 0xbf, 0x80, 0xbf, 0x80, 'e' });
        check(new char[] { (char)0x80, (char)0xbf, (char)0x80, (char)0xbf, (char)0x80 },
            new int[] { 0x80, 0xbf, 0x80, 0xbf, 0x80 });
    }

    /**
     * Tests parsing of unexpected continuation bytes.
     */
    public void testSequenceOfAll64PossibleContinuationBytes()
    {
        char[] src = new char[64];
        int[] dst = new int[64];
        for (int i = 0; i < src.length; i++)
        {
            int ch = 0x80 + i;
            src[i] = (char)ch;
            dst[i] = ch;
        }

        check(src, dst);
    }

    /**
     * Tests parsing of unexpected first bytes of 2-byte multi-byte sequence.
     */
    public void testFirstBytesOf2ByteSequnces()
    {
        char[] src = new char[64];
        int[] dst = new int[64];
        for (int i = 0; i < 32; i++)
        {
            int ch = 0xc0 + i;
            src[i*2] = (char)ch;
            src[i*2 + 1] = ' ';
            dst[i*2] = ch;
            dst[i*2 + 1] = ' ';
        }

        check(src, dst);
    }

    /**
     * Tests parsing of unexpected first bytes of 3-byte multi-byte sequence.
     */
    public void testFirstBytesOf3ByteSequnces()
    {
        char[] src = new char[32];
        int[] dst = new int[32];
        for (int i = 0; i < 16; i++)
        {
            int ch = 0xe0 + i;
            src[i*2] = (char)ch;
            src[i*2 + 1] = ' ';
            dst[i*2] = ch;
            dst[i*2 + 1] = ' ';
        }

        check(src, dst);
    }

    /**
     * Tests parsing of unexpected first bytes of 4-byte multi-byte sequence.
     */
    public void testFirstBytesOf4ByteSequnces()
    {
        char[] src = new char[16];
        int[] dst = new int[16];
        for (int i = 0; i < 8; i++)
        {
            int ch = 0xf0 + i;
            src[i*2] = (char)ch;
            src[i*2 + 1] = ' ';
            dst[i*2] = ch;
            dst[i*2 + 1] = ' ';
        }

        check(src, dst);
    }

    /**
     * Tests sequences without last continuation byte.
     */
    public void testSeqencesWithLastContinuationByteMissing()
    {
        check(new char[] { (char)0xc0 }, new int[] { 0xc0 });
        check(new char[] { (char)0xe0, (char)0x80 }, new int[] { 0xe0, 0x80 });
        check(new char[] { (char)0xf0, (char)0x80, (char)0x80 }, new int[] { 0xf0, 0x80, 0x80 });

        // All sequences joined
        check(new char[] { (char)0xc0, (char)0xe0, (char)0x80, (char)0xf0, (char)0x80, (char)0x80 },
            new int[] { 0xc0, 0xe0, 0x80, 0xf0, 0x80, 0x80 });
    }

    /**
     * Tests different unexpected combinations.
     */
    public void testGoodBadCombinations()
    {
        check(new char[] { (char)0x80, (char)0xc2, (char)0xa9, (char)0x80 },
            new int[] { 0x80, 0xa9, 0x80 });
        check(new char[] { 'a', (char)0xc2, (char)0xa9, (char)0xbb, 'b'},
            new int[] { 'a', 0xa9, 0xbb, 'b' });
        check(new char[] { 'a', (char)0xc2, (char)0xa9, (char)0xe1, 'b'},
            new int[] { 'a', 0xa9, 0xe1, 'b' });

    }

    /**
     * Tests sequences broken into several packet blocks.
     */
    public void testBrockenIntoBlocks()
    {
        check(new char[][]
        {
            new char[] { (char)0xc2 },
            new char[] { (char)0xa9 }
        }, new int[] { 0xa9 });

        check(new char[][]
        {
            new char[] { (char)0xc2 },
            new char[] { (char)0x09 }
        }, new int[] { 0xc2, 0x09 });

        check(new char[][]
        {
            new char[] { (char)0xe2, 0x80 },
            new char[] { (char)0x09 }
        }, new int[] { 0xe2, 0x80, 0x09 });
    }

    /**
     * Small real-life practice test.
     *
     * @throws Exception in case of error.
     */
    public void testPracticeTest() throws Exception
    {
        String s = "<?xml version=\"1.0\" encoding=\"utf-8\"?><root>a" + (char)0xc2 + (char)0xa9 +
            (char)0xbb + "b</root>";

        SAXBuilder builder = new SAXBuilder();
        Reader reader = XmlReaderFactory.create(streamForString(s.toCharArray()));
        Document doc = builder.build(reader);

        String text = doc.getRootElement().getText();

        assertEquals('a', text.charAt(0));
        assertEquals('\u00a9', text.charAt(1));
        assertEquals('\u00bb', text.charAt(2));
        assertEquals('b', text.charAt(3));
    }

    /**
     * Check how source is read and if it matches destination.
     *
     * @param src   source.
     * @param dst   destination.
     */
    private void check(char[] src, int[] dst)
    {
        Reader reader = createReader(src);

        check0(reader, dst);
    }

    /**
     * Tests how several source blocks are read into single stream.
     *
     * @param srcs  source packet blocks.
     * @param dst   destination.
     */
    private void check(char[][] srcs, int[] dst)
    {
        Reader reader = createReader(srcs);

        check0(reader, dst);
    }

    /**
     * Checks if the reader will return exactly destination sequence.
     *
     * @param reader    reader.
     * @param dst       destination.
     */
    private void check0(Reader reader, int[] dst)
    {
        try
        {
            int offset = 0;
            int ch;
            while ((ch = reader.read()) != -1)
            {
                if (LOG.isLoggable(Level.FINE))
                {
                    LOG.fine("Pos: " + offset + " Expected: " + Integer.toHexString(dst[offset]) +
                        " Found: " + Integer.toHexString(ch));
                }

                assertEquals("Pos: " + offset, dst[offset], ch);
                offset++;
            }

            assertEquals("Not all bytes read.", dst.length, offset);
        } catch (IOException e)
        {
            e.printStackTrace();
            fail();
        }
    }

    /**
     * Creates our reader for "reading" of sequence of chars.
     *
     * @param chars source sequence.
     *
     * @return reader.
     */
    private UTF8Reader createReader(char[] chars)
    {
        return new UTF8Reader(streamForString(chars));
    }

    /**
     * Creates our reader for "reading" of sequence of charse broken into packet blocks.
     *
     * @param charsList list of packets.
     *
     * @return reader.
     */
    private UTF8Reader createReader(char[][] charsList)
    {
        InputStream[] streams = new InputStream[charsList.length];
        for (int i = 0; i < charsList.length; i++)
        {
            char[] chars = charsList[i];
            streams[i] = streamForString(chars);
        }

        return new UTF8Reader(new CombinedInputStream(streams));
    }

    /**
     * Simple configurer of FINE logging.
     */
    private void setFineLogging()
    {
        LOG.setLevel(Level.FINE);
        ConsoleHandler handler = new ConsoleHandler();
        handler.setLevel(Level.FINE);
        LOG.addHandler(handler);
    }

    /**
     * Sets how combined stream does its job.
     */
    public void testCombinedInputStream()
        throws IOException
    {
        InputStream[] streams = new InputStream[]
        {
            streamForString("abcdefg".toCharArray()),
            streamForString("gfedcba".toCharArray()),
            streamForString("1".toCharArray()),
            streamForString("ab".toCharArray())
        };

        InputStream is = new CombinedInputStream(streams);
        byte[] buf = new byte[10];

        try
        {
            assertEquals(7, is.read(buf));
            assertEquals(7, is.read(buf));
            assertEquals(1, is.read(buf));
            assertEquals(2, is.read(buf));
            assertEquals(-1, is.read(buf));
        } catch (IOException e)
        {
            e.printStackTrace();
            fail();
        } finally
        {
            is.close();
        }
    }

    /**
     * Simple combined stream for combining several packet blocks into single stream,
     * but through several reads.
     */
    private static class CombinedInputStream extends InputStream
    {
        private InputStream[]   streams;
        private int             current;

        public CombinedInputStream(InputStream[] streams)
        {
            this.streams = streams;
            current = 0;
        }

        public int read() throws IOException
        {
            int ch = -1;

            while (ch == -1 && current < streams.length)
            {
                InputStream currentStream = streams[current];
                ch = currentStream.read();
                if (ch < 0) current++;
            }

            return ch;
        }

        public int read(byte b[]) throws IOException
        {
            int read = -1;

            // No checks here because it's only for testing purposes
            if (current < streams.length)
            {
                read = streams[current].read(b);
                if (read <= 0)
                {
                    current++;
                    if (current < streams.length) read = streams[current].read(b);
                }
            }

            return read;
        }

        public int read(byte b[], int off, int len) throws IOException
        {
            int read = -1;

            // No checks here because it's only for testing purposes
            if (current < streams.length)
            {
                read = streams[current].read(b, off, len);
                if (read <= 0)
                {
                    current++;
                    if (current < streams.length) read = streams[current].read(b, off, len);
                }
            }

            return read;
        }
    }

    /** Creates stream for string. */
    private InputStream streamForString(char[] text)
    {
        byte[] bytes = new byte[text.length];
        for (int i = 0; i < text.length; i++) bytes[i] = (byte)text[i];
        return new ByteArrayInputStream(bytes);
    }
}
TOP

Related Classes of com.salas.bb.utils.xml.TestUTF8Reader

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.