Package org.jwat.common

Examples of org.jwat.common.ByteCountingPushBackInputStream


        if (buffer_size <= 0) {
            throw new IllegalArgumentException(
                    "buffer_size is less or equals to zero: " + buffer_size);
        }
        in = new BufferedInputStream(in, buffer_size);
        pbin = new ByteCountingPushBackInputStream(in, DEFAULT_INPUT_BUFFER_SIZE);
        inputBytes = new byte[DEFAULT_INPUT_BUFFER_SIZE];
    }
View Full Code Here


public class TestArcReaderFactory_IsMagic {

    @Test
    public void test_magicbytes() {
        byte[] bytes;
        ByteCountingPushBackInputStream pbin;
        try {
            /*
             * isArcFile().
             */
            bytes = ArcConstants.ARC_MAGIC_HEADER.getBytes();
            pbin = new ByteCountingPushBackInputStream(new ByteArrayInputStream(bytes), ArcReaderFactory.PUSHBACK_BUFFER_SIZE);
            Assert.assertTrue(ArcReaderFactory.isArcFile(pbin));
            pbin.close();

            bytes = "filedesc://url".getBytes();
            pbin = new ByteCountingPushBackInputStream(new ByteArrayInputStream(bytes), ArcReaderFactory.PUSHBACK_BUFFER_SIZE);
            Assert.assertTrue(ArcReaderFactory.isArcFile(pbin));
            pbin.close();

            bytes = ArcConstants.ARC_MAGIC_HEADER.toUpperCase().getBytes();
            pbin = new ByteCountingPushBackInputStream(new ByteArrayInputStream(bytes), ArcReaderFactory.PUSHBACK_BUFFER_SIZE);
            Assert.assertFalse(ArcReaderFactory.isArcFile(pbin));
            pbin.close();

            bytes = "FILEDESC://url".getBytes();
            pbin = new ByteCountingPushBackInputStream(new ByteArrayInputStream(bytes), ArcReaderFactory.PUSHBACK_BUFFER_SIZE);
            Assert.assertFalse(ArcReaderFactory.isArcFile(pbin));
            pbin.close();

            bytes = "filedesc".getBytes();
            pbin = new ByteCountingPushBackInputStream(new ByteArrayInputStream(bytes), ArcReaderFactory.PUSHBACK_BUFFER_SIZE);
            Assert.assertFalse(ArcReaderFactory.isArcFile(pbin));
            pbin.close();
            /*
             * isArcRecord().
             */
            bytes = ArcConstants.ARC_MAGIC_HEADER.getBytes();
            pbin = new ByteCountingPushBackInputStream(new ByteArrayInputStream(bytes), ArcReaderFactory.PUSHBACK_BUFFER_SIZE);
            Assert.assertTrue(ArcReaderFactory.isArcRecord(pbin));
            pbin.close();

            bytes = "filedesc://url".getBytes();
            pbin = new ByteCountingPushBackInputStream(new ByteArrayInputStream(bytes), ArcReaderFactory.PUSHBACK_BUFFER_SIZE);
            Assert.assertTrue(ArcReaderFactory.isArcRecord(pbin));
            pbin.close();

            bytes = ArcConstants.ARC_MAGIC_HEADER.toUpperCase().getBytes();
            pbin = new ByteCountingPushBackInputStream(new ByteArrayInputStream(bytes), ArcReaderFactory.PUSHBACK_BUFFER_SIZE);
            Assert.assertTrue(ArcReaderFactory.isArcRecord(pbin));
            pbin.close();

            bytes = "FILEDESC://url".getBytes();
            pbin = new ByteCountingPushBackInputStream(new ByteArrayInputStream(bytes), ArcReaderFactory.PUSHBACK_BUFFER_SIZE);
            Assert.assertTrue(ArcReaderFactory.isArcRecord(pbin));
            pbin.close();

            bytes = "filedesc".getBytes();
            pbin = new ByteCountingPushBackInputStream(new ByteArrayInputStream(bytes), ArcReaderFactory.PUSHBACK_BUFFER_SIZE);
            Assert.assertFalse(ArcReaderFactory.isArcRecord(pbin));
            pbin.close();
        } catch (IOException e) {
            e.printStackTrace();
            Assert.fail("Exception not expected!");
        }
        try {
            /*
             * isArcFile().
             */
            bytes = "http://url".getBytes();
            pbin = new ByteCountingPushBackInputStream(new ByteArrayInputStream(bytes), ArcReaderFactory.PUSHBACK_BUFFER_SIZE);
            Assert.assertFalse(ArcReaderFactory.isArcFile(pbin));
            pbin.close();

            bytes = "HTTPS://url".getBytes();
            pbin = new ByteCountingPushBackInputStream(new ByteArrayInputStream(bytes), ArcReaderFactory.PUSHBACK_BUFFER_SIZE);
            Assert.assertFalse(ArcReaderFactory.isArcFile(pbin));
            pbin.close();

            bytes = "http".getBytes();
            pbin = new ByteCountingPushBackInputStream(new ByteArrayInputStream(bytes), ArcReaderFactory.PUSHBACK_BUFFER_SIZE);
            Assert.assertFalse(ArcReaderFactory.isArcFile(pbin));
            pbin.close();
            /*
             * isArcRecord().
             */
            bytes = "http://url".getBytes();
            pbin = new ByteCountingPushBackInputStream(new ByteArrayInputStream(bytes), ArcReaderFactory.PUSHBACK_BUFFER_SIZE);
            Assert.assertTrue(ArcReaderFactory.isArcRecord(pbin));
            pbin.close();

            bytes = "HTTPS://url".getBytes();
            pbin = new ByteCountingPushBackInputStream(new ByteArrayInputStream(bytes), ArcReaderFactory.PUSHBACK_BUFFER_SIZE);
            Assert.assertTrue(ArcReaderFactory.isArcRecord(pbin));
            pbin.close();

            bytes = "http".getBytes();
            pbin = new ByteCountingPushBackInputStream(new ByteArrayInputStream(bytes), ArcReaderFactory.PUSHBACK_BUFFER_SIZE);
            Assert.assertFalse(ArcReaderFactory.isArcRecord(pbin));
            pbin.close();
        } catch (IOException e) {
            e.printStackTrace();
            Assert.fail("Exception not expected!");
        }
    }
View Full Code Here

            @Override
            public void close() throws IOException {
                throw new IOException();
            }
        };
        ByteCountingPushBackInputStream pbin = new ByteCountingPushBackInputStream(in, 42) {
            @Override
            public int read() throws IOException {
                return 0;
            }
            @Override
View Full Code Here

            @Override
            public void close() throws IOException {
                throw new IOException();
            }
        };
        ByteCountingPushBackInputStream pbin = new ByteCountingPushBackInputStream(in, 42) {
            @Override
            public int read() throws IOException {
                return 0;
            }
            @Override
View Full Code Here

        }
        if (offset < -1) {
            throw new IllegalArgumentException(
                    "The 'offset' is less than -1: " + offset);
        }
        ByteCountingPushBackInputStream pbin =
                new ByteCountingPushBackInputStream(rin, PUSHBACK_BUFFER_SIZE);
        currentRecord = ArcRecordBase.parseRecord(pbin, this);
        if (currentRecord != null) {
            startOffset = offset;
            currentRecord.header.startOffset = offset;
        }
View Full Code Here

        if (buffer_size <= 0) {
            throw new IllegalArgumentException(
                    "The 'buffer_size' is less than or equal to zero: "
                    + buffer_size);
        }
        ByteCountingPushBackInputStream pbin =
                new ByteCountingPushBackInputStream(
                        new BufferedInputStream(rin, buffer_size),
                        PUSHBACK_BUFFER_SIZE);
        currentRecord = ArcRecordBase.parseRecord(pbin, this);
        if (currentRecord != null) {
            startOffset = offset;
View Full Code Here

    @Test
    public void test_warcheader_version_parser() {
        String header;
        ByteArrayInputStream in;
        ByteCountingPushBackInputStream pbin;
        WarcReader reader;
        WarcRecord record;
        Object[][] cases;
        boolean bValidVersion;

        try {
            cases = new Object[][] {
                    {true, "WARC/0.16\r\n", true, true, false, 0, 16},
                    {true, "WARC/0.17\r\n", true, true, true, 0, 17},
                    {true, "WARC/0.18\r\n", true, true, true, 0, 18},
                    {true, "WARC/0.19\r\n", true, true, false, 0, 19},
                    {true, "WARC/0.99\r\n", true, true, false, 0, 99},
                    {true, "WARC/1.0\r\n", true, true, true, 1, 0},
                    {true, "WARC/1.1\r\n", true, true, false, 1, 1},
                    {true, "WARC/2.0\r\n", true, true, false, 2, 0},
                    {true, "WARC/x.x\r\n", true, true, false, -1, -1},
                    {true, "WARC/1.0.0\r\n", true, true, false, 1, 0},
                    {true, "WARC/1.0.1\r\n", true, true, false, 1, 0},
                    {true, "WARC/1\r\n", true, false, false, -1, -1},
                    {true, "WARC/1.2.3.4.5\r\n", true, false, false, -1, -1},
                    {true, "WARC/\r\n", true, false, false, -1, -1},
                    {true, "WARC/WARC\r\n", true, false, false, -1, -1},
                    {false, "WARC\r\n", false, false, false, -1, -1},
                    {false, "WARC", false, false, false, -1, -1},
                    {false, "", false, false, false, -1, -1},
                    {false, "WARC-Type: resource\r\n", false, false, false, -1, -1},
                    {false, "WARC-Type: resource\r\nWARC", false, false, false, -1, -1},
                    {false, "\r\n", false, false, false, -1, -1},
            };

            for (int i=0; i<cases.length; ++i) {
                bValidVersion = (Boolean)cases[i][0];
                header = (String)cases[i][1];
                // debug
                //System.out.println(header);
                in = new ByteArrayInputStream(header.getBytes("ISO8859-1"));
                pbin = new ByteCountingPushBackInputStream(in, WarcReaderFactory.PUSHBACK_BUFFER_SIZE);
                reader = WarcReaderFactory.getReader(pbin);
                record = reader.getNextRecord();
                if (bValidVersion) {
                    Assert.assertNotNull(record);
                    Assert.assertNotNull(record.header);
                    Assert.assertEquals(cases[i][2], record.header.bMagicIdentified);
                    Assert.assertEquals(cases[i][3], record.header.bVersionParsed);
                    Assert.assertEquals(cases[i][4], record.header.bValidVersion);
                    Assert.assertEquals(cases[i][5], record.header.major);
                    Assert.assertEquals(cases[i][6], record.header.minor);
                } else {
                    Assert.assertNull(record);
                }

                // Save testfile.
                SaveWarcTestFiles.saveTestWarcHeaderVersion(header.getBytes("ISO8859-1"));
            }

            cases = new Object[][] {
                    {"WARC/1.0\r\n\r\n"},
                    {"WARC/1.0\r\nWARC-Type: resource"},
                    {"WARC/1.0\r\nWARC-Type: resource\r\n"},
                    {"WARC/1.0\r\nWARC-Type resource\r\n"},
                    {"WARC/1.0\r\n: resource\r\n"}
            };

            for (int i=0; i<cases.length; ++i) {
                //bValidVersion = (Boolean)cases[i][0];
                bValidVersion = true;
                header = (String)cases[i][0];
                // debug
                //System.out.println(header);
                in = new ByteArrayInputStream(header.getBytes("ISO8859-1"));
                pbin = new ByteCountingPushBackInputStream(in, WarcReaderFactory.PUSHBACK_BUFFER_SIZE);
                reader = WarcReaderFactory.getReader(pbin);
                record = reader.getNextRecord();
                if (bValidVersion) {
                    Assert.assertNotNull(record);
                    Assert.assertNotNull(record.header);
                    Assert.assertEquals(true, record.header.bMagicIdentified);
                    Assert.assertEquals(true, record.header.bVersionParsed);
                    Assert.assertEquals(true, record.header.bValidVersion);
                    Assert.assertEquals(1, record.header.major);
                    Assert.assertEquals(0, record.header.minor);
                } else {
                    Assert.assertNull(record);
                }

                // Save testfile.
                SaveWarcTestFiles.saveTestWarcHeaderVersion(header.getBytes("ISO8859-1"));
            }

            header = "WARC/1.0\r\nWARC-Type: resource\r\n";
            in = new ByteArrayInputStream(header.getBytes("ISO8859-1"));
            pbin = new ByteCountingPushBackInputStream(in, WarcReaderFactory.PUSHBACK_BUFFER_SIZE);

            // Save testfile.
            SaveWarcTestFiles.saveTestWarcHeaderVersion(header.getBytes("ISO8859-1"));

            reader = WarcReaderFactory.getReader(pbin);
View Full Code Here

        if (buffer_size <= 0) {
            throw new IllegalArgumentException(
                    "The 'buffer_size' is less than or equal to zero: "
                    + buffer_size);
        }
        ByteCountingPushBackInputStream pbin =
                new ByteCountingPushBackInputStream(
                        new BufferedInputStream(in, buffer_size),
                                                PUSHBACK_BUFFER_SIZE);
        if (GzipReader.isGzipped(pbin)) {
            return new ArcReaderCompressed(new GzipReader(pbin),
                                           buffer_size);
View Full Code Here

    public static ArcReader getReader(InputStream in) throws IOException {
        if (in == null) {
            throw new IllegalArgumentException(
                    "The inputstream 'in' is null");
        }
        ByteCountingPushBackInputStream pbin =
                new ByteCountingPushBackInputStream(in, PUSHBACK_BUFFER_SIZE);
        if (GzipReader.isGzipped(pbin)) {
            return new ArcReaderCompressed(new GzipReader(pbin));
        }
        return new ArcReaderUncompressed(pbin);
    }
View Full Code Here

                                                        throws IOException {
        if (in == null) {
            throw new IllegalArgumentException(
                    "The inputstream 'in' is null");
        }
        ByteCountingPushBackInputStream pbin =
                new ByteCountingPushBackInputStream(in, PUSHBACK_BUFFER_SIZE);
        return new ArcReaderUncompressed(pbin);
    }
View Full Code Here

TOP

Related Classes of org.jwat.common.ByteCountingPushBackInputStream

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.