Package org.jwat.arc

Source Code of org.jwat.arc.TestArcReaderCompressed$ArcEntry

/**
* Java Web Archive Toolkit - Software to read and validate ARC, WARC
* and GZip files. (http://jwat.org/)
* Copyright 2011-2012 Netarkivet.dk (http://netarkivet.dk/)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.jwat.arc;

import static org.hamcrest.CoreMatchers.equalTo;
import static org.hamcrest.CoreMatchers.is;
import static org.hamcrest.CoreMatchers.not;

import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.RandomAccessFile;
import java.net.URL;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Iterator;
import java.util.List;

import org.junit.Assert;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.Parameterized;
import org.junit.runners.Parameterized.Parameters;
import org.jwat.common.ByteCountingInputStream;
import org.jwat.common.ByteCountingPushBackInputStream;
import org.jwat.common.RandomAccessFileInputStream;
import org.jwat.common.Uri;
import org.jwat.gzip.GzipEntry;
import org.jwat.gzip.GzipReader;

@RunWith(Parameterized.class)
public class TestArcReaderCompressed {

    private int expected_records;
    private boolean bDigest;
    private String arcFile;
    private String arcFile2;

    @Parameters
    public static Collection<Object[]> configs() {
        return Arrays.asList(new Object[][] {
                {300, false, "IAH-20080430204825-00000-blackbook.arc.gz", "IAH-20080430204825-00000-blackbook.arc"},
                {300, true, "IAH-20080430204825-00000-blackbook.arc.gz", "IAH-20080430204825-00000-blackbook.arc"}
        });
    }

    public TestArcReaderCompressed(int records, boolean bDigest, String arcFile, String arcFile2) {
        this.expected_records = records;
        this.bDigest = bDigest;
        this.arcFile = arcFile;
        this.arcFile2 = arcFile2;
    }

    public String getUrlPath(URL url) {
        String path = url.getFile();
        path = path.replaceAll("%5b", "[");
        path = path.replaceAll("%5d", "]");
        return path;
    }

    @Test
    public void test_arcreaderfactory_compressed_sequential() {
        boolean bDebugOutput = System.getProperty("jwat.debug.output") != null;

        URL url;
        File file;
        RandomAccessFile ram;
        InputStream in;

        ArcReader reader;
        ArcRecordBase record;

        int records = 0;
        long consumed = 0;
        int errors = 0;
        int warnings = 0;

        try {
            List<ArcEntry> entries = indexArcFile();
            ArcEntry entry;

            /*
             * getReaderCompressed(in) / getNextRecord().
             */

            records = 0;
            consumed = 0;
            errors = 0;
            warnings = 0;

            url = this.getClass().getClassLoader().getResource(arcFile);
            file = new File(getUrlPath(url));
            ram = new RandomAccessFile(file, "r");
            in = new RandomAccessFileInputStream(ram);

            reader = ArcReaderFactory.getReaderCompressed(in);

            reader.setBlockDigestEnabled( bDigest );
            Assert.assertTrue(reader.setBlockDigestAlgorithm( "sha1" ));
            reader.setPayloadDigestEnabled( bDigest );
            Assert.assertTrue(reader.setPayloadDigestAlgorithm( "sha1" ));

            for (int i=0; i<entries.size(); ++i) {
                entry = entries.get(i);

                try {
                    reader.getNextRecordFrom(in, entry.offset);
                    Assert.fail("Exception expected!");
                } catch (IllegalStateException e) {
                }

                try {
                    reader.getNextRecordFrom(in, entry.offset, 8192);
                    Assert.fail("Exception expected!");
                } catch (IllegalStateException e) {
                }

                if ((record = reader.getNextRecord()) != null) {
                    if (bDebugOutput) {
                        TestBaseUtils.printRecord(record);
                        //RecordDebugBase.printRecordErrors(record);
                    }

                    record.close();

                    consumed += record.getConsumed();
                    Assert.assertEquals(record.consumed, record.getConsumed());

                    if ( bDigest ) {
                        if ( (record.payload != null && record.computedBlockDigest == null)
                                || (record.httpHeader != null && record.computedPayloadDigest == null) ) {
                            Assert.fail( "Digest missing!" );
                        }
                    }

                    ++records;

                    switch (records) {
                    case 1:
                        Assert.assertEquals(ArcRecordBase.RT_VERSION_BLOCK, record.recordType);
                        Assert.assertTrue(record.isCompliant());
                        Assert.assertNotNull(record.versionHeader);
                        Assert.assertNotNull(record.versionHeader.isValid());
                        Assert.assertEquals(ArcVersion.VERSION_1_1, record.versionHeader.version);
                        break;
                    default:
                        Assert.assertEquals(ArcRecordBase.RT_ARC_RECORD, record.recordType);
                        Assert.assertTrue(record.isCompliant());
                        Assert.assertNull(record.versionHeader);
                        break;
                    }

                    if (record.diagnostics.hasErrors()) {
                        errors += record.diagnostics.getErrors().size();
                    }
                    if (record.diagnostics.hasWarnings()) {
                        warnings += record.diagnostics.getWarnings().size();
                    }

                    if (record.header.urlUri.compareTo(entry.recordId) != 0) {
                        Assert.fail("Wrong record");
                    }
                } else {
                    Assert.fail("Location incorrect");
                }
            }

            record = reader.getNextRecord();
            Assert.assertNull(record);

            url = this.getClass().getClassLoader().getResource(arcFile2);
            file = new File(getUrlPath(url));

            Assert.assertEquals(ram.length(), reader.getConsumed());
            Assert.assertEquals(ram.length(), reader.getOffset());
            Assert.assertEquals(file.length(), consumed);

            reader.close();

            Assert.assertEquals(ram.length(), reader.getConsumed());
            Assert.assertEquals(ram.length(), reader.getOffset());
            Assert.assertEquals(file.length(), consumed);

            in.close();
            ram.close();

            if (bDebugOutput) {
                TestBaseUtils.printStatus(records, errors, warnings);
            }

            Assert.assertEquals(expected_records, records);
            Assert.assertEquals(0, errors);
            Assert.assertEquals(0, warnings);

            /*
             * getReaderCompressed(in, buffer_size) / getNextRecord().
             */

            records = 0;
            consumed = 0;
            errors = 0;
            warnings = 0;

            url = this.getClass().getClassLoader().getResource(arcFile);
            file = new File(getUrlPath(url));
            ram = new RandomAccessFile(file, "r");
            in = new RandomAccessFileInputStream(ram);

            reader = ArcReaderFactory.getReaderCompressed(in, 8192);

            reader.setBlockDigestEnabled( bDigest );
            Assert.assertTrue(reader.setBlockDigestAlgorithm( "sha1" ));
            reader.setPayloadDigestEnabled( bDigest );
            Assert.assertTrue(reader.setPayloadDigestAlgorithm( "sha1" ));

            for (int i=0; i<entries.size(); ++i) {
                entry = entries.get(i);

                try {
                    reader.getNextRecordFrom(in, entry.offset);
                    Assert.fail("Exception expected!");
                } catch (IllegalStateException e) {
                }

                try {
                    reader.getNextRecordFrom(in, entry.offset, 8192);
                    Assert.fail("Exception expected!");
                } catch (IllegalStateException e) {
                }

                if ((record = reader.getNextRecord()) != null) {
                    if (bDebugOutput) {
                        TestBaseUtils.printRecord(record);
                        //RecordDebugBase.printRecordErrors(record);
                    }

                    record.close();

                    consumed += record.getConsumed();
                    Assert.assertEquals(record.consumed, record.getConsumed());

                    if ( bDigest ) {
                        if ( (record.payload != null && record.computedBlockDigest == null)
                                || (record.httpHeader != null && record.computedPayloadDigest == null) ) {
                            Assert.fail( "Digest missing!" );
                        }
                    }

                    ++records;

                    switch (records) {
                    case 1:
                        Assert.assertEquals(ArcRecordBase.RT_VERSION_BLOCK, record.recordType);
                        Assert.assertTrue(record.isCompliant());
                        Assert.assertNotNull(record.versionHeader);
                        Assert.assertNotNull(record.versionHeader.isValid());
                        Assert.assertEquals(ArcVersion.VERSION_1_1, record.versionHeader.version);
                        break;
                    default:
                        Assert.assertEquals(ArcRecordBase.RT_ARC_RECORD, record.recordType);
                        Assert.assertTrue(record.isCompliant());
                        Assert.assertNull(record.versionHeader);
                        break;
                    }

                    if (record.diagnostics.hasErrors()) {
                        errors += record.diagnostics.getErrors().size();
                    }
                    if (record.diagnostics.hasWarnings()) {
                        warnings += record.diagnostics.getWarnings().size();
                    }

                    if (record.header.urlUri.compareTo(entry.recordId) != 0) {
                        Assert.fail("Wrong record");
                    }
                } else {
                    Assert.fail("Location incorrect");
                }
            }

            record = reader.getNextRecord();
            Assert.assertNull(record);

            url = this.getClass().getClassLoader().getResource(arcFile2);
            file = new File(getUrlPath(url));

            Assert.assertEquals(ram.length(), reader.getConsumed());
            Assert.assertEquals(ram.length(), reader.getOffset());
            Assert.assertEquals(file.length(), consumed);

            reader.close();

            Assert.assertEquals(ram.length(), reader.getConsumed());
            Assert.assertEquals(ram.length(), reader.getOffset());
            Assert.assertEquals(file.length(), consumed);

            in.close();
            ram.close();

            if (bDebugOutput) {
                TestBaseUtils.printStatus(records, errors, warnings);
            }

            Assert.assertEquals(expected_records, records);
            Assert.assertEquals(0, errors);
            Assert.assertEquals(0, warnings);
        } catch (IOException e) {
            e.printStackTrace();
            Assert.fail("Unexpected i/o exception");
        }
    }

    @Test
    public void test_arcreaderfactory_compressed_random() {
        boolean bDebugOutput = System.getProperty("jwat.debug.output") != null;

        URL url;
        File file;
        RandomAccessFile ram;
        InputStream in;

        ArcReader reader;
        ArcRecordBase record;

        int records = 0;
        long consumed = 0;
        int errors = 0;
        int warnings = 0;

        try {
            List<ArcEntry> entries = indexArcFile();
            ArcEntry entry;

            /*
             * getReaderUncompressed() / getNextRecordFrom(in).
             */

            records = 0;
            consumed = 0;
            errors = 0;
            warnings = 0;

            url = this.getClass().getClassLoader().getResource(arcFile);
            file = new File(getUrlPath(url));
            ram = new RandomAccessFile(file, "r");
            in = new RandomAccessFileInputStream(ram);

            reader = ArcReaderFactory.getReaderCompressed();

            reader.setBlockDigestEnabled( bDigest );
            Assert.assertTrue(reader.setBlockDigestAlgorithm( "sha1" ));
            reader.setPayloadDigestEnabled( bDigest );
            Assert.assertTrue(reader.setPayloadDigestAlgorithm( "sha1" ));

            for (int i=0; i<entries.size(); ++i) {
                entry = entries.get(i);

                ram.seek(entry.offset);

                try {
                    reader.getNextRecord();
                    Assert.fail("Exception expected!");
                } catch (IllegalStateException e) {
                }

                if ((record = reader.getNextRecordFrom(in, entry.offset)) != null) {
                    if (bDebugOutput) {
                        TestBaseUtils.printRecord(record);
                        //RecordDebugBase.printRecordErrors(record);
                    }

                    record.close();

                    consumed += record.getConsumed();
                    Assert.assertEquals(record.consumed, record.getConsumed());

                    if ( bDigest ) {
                        if ( (record.payload != null && record.computedBlockDigest == null)
                                || (record.httpHeader != null && record.computedPayloadDigest == null) ) {
                            Assert.fail( "Digest missing!" );
                        }
                    }

                    ++records;

                    switch (records) {
                    case 1:
                        Assert.assertEquals(ArcRecordBase.RT_VERSION_BLOCK, record.recordType);
                        Assert.assertTrue(record.isCompliant());
                        Assert.assertNotNull(record.versionHeader);
                        Assert.assertNotNull(record.versionHeader.isValid());
                        Assert.assertEquals(ArcVersion.VERSION_1_1, record.versionHeader.version);
                        break;
                    default:
                        Assert.assertEquals(ArcRecordBase.RT_ARC_RECORD, record.recordType);
                        Assert.assertTrue(record.isCompliant());
                        Assert.assertNull(record.versionHeader);
                        break;
                    }

                    if (record.diagnostics.hasErrors()) {
                        errors += record.diagnostics.getErrors().size();
                    }
                    if (record.diagnostics.hasWarnings()) {
                        warnings += record.diagnostics.getWarnings().size();
                    }

                    if (record.header.urlUri.compareTo(entry.recordId) != 0) {
                        Assert.fail("Wrong record");
                    }
                } else {
                    Assert.fail("Location incorrect");
                }
            }

            record = reader.getNextRecordFrom(in, reader.getConsumed());
            Assert.assertNull(record);

            url = this.getClass().getClassLoader().getResource(arcFile2);
            file = new File(getUrlPath(url));

            Assert.assertEquals(ram.length(), reader.getConsumed());
            Assert.assertEquals(ram.length(), reader.getOffset());
            Assert.assertEquals(file.length(), consumed);

            reader.close();

            Assert.assertEquals(ram.length(), reader.getConsumed());
            Assert.assertEquals(ram.length(), reader.getOffset());
            Assert.assertEquals(file.length(), consumed);

            in.close();
            ram.close();

            if (bDebugOutput) {
                TestBaseUtils.printStatus(records, errors, warnings);
            }

            Assert.assertEquals(expected_records, records);
            Assert.assertEquals(0, errors);
            Assert.assertEquals(0, warnings);

            /*
             * getReaderUncompressed() / getNextRecordFrom(in, buffer_size).
             */

            records = 0;
            consumed = 0;
            errors = 0;
            warnings = 0;

            url = this.getClass().getClassLoader().getResource(arcFile);
            file = new File(getUrlPath(url));
            ram = new RandomAccessFile(file, "r");
            in = new RandomAccessFileInputStream(ram);

            reader = ArcReaderFactory.getReaderCompressed();

            reader.setBlockDigestEnabled( bDigest );
            Assert.assertTrue(reader.setBlockDigestAlgorithm( "sha1" ));
            reader.setPayloadDigestEnabled( bDigest );
            Assert.assertTrue(reader.setPayloadDigestAlgorithm( "sha1" ));

            for (int i=0; i<entries.size(); ++i) {
                entry = entries.get(i);

                ram.seek(entry.offset);

                try {
                    reader.getNextRecord();
                    Assert.fail("Exception expected!");
                } catch (IllegalStateException e) {
                }

                if ((record = reader.getNextRecordFrom(in, entry.offset, 8192)) != null) {
                    if (bDebugOutput) {
                        TestBaseUtils.printRecord(record);
                        //RecordDebugBase.printRecordErrors(record);
                    }

                    record.close();

                    consumed += record.getConsumed();
                    Assert.assertEquals(record.consumed, record.getConsumed());

                    if ( bDigest ) {
                        if ( (record.payload != null && record.computedBlockDigest == null)
                                || (record.httpHeader != null && record.computedPayloadDigest == null) ) {
                            Assert.fail( "Digest missing!" );
                        }
                    }

                    ++records;

                    switch (records) {
                    case 1:
                        Assert.assertEquals(ArcRecordBase.RT_VERSION_BLOCK, record.recordType);
                        Assert.assertTrue(record.isCompliant());
                        Assert.assertNotNull(record.versionHeader);
                        Assert.assertNotNull(record.versionHeader.isValid());
                        Assert.assertEquals(ArcVersion.VERSION_1_1, record.versionHeader.version);
                        break;
                    default:
                        Assert.assertEquals(ArcRecordBase.RT_ARC_RECORD, record.recordType);
                        Assert.assertTrue(record.isCompliant());
                        Assert.assertNull(record.versionHeader);
                        break;
                    }

                    if (record.diagnostics.hasErrors()) {
                        errors += record.diagnostics.getErrors().size();
                    }
                    if (record.diagnostics.hasWarnings()) {
                        warnings += record.diagnostics.getWarnings().size();
                    }

                    if (record.header.urlUri.compareTo(entry.recordId) != 0) {
                        Assert.fail("Wrong record");
                    }
                } else {
                    Assert.fail("Location incorrect");
                }
            }

            record = reader.getNextRecordFrom(in, reader.getConsumed(), 8192);
            Assert.assertNull(record);

            url = this.getClass().getClassLoader().getResource(arcFile2);
            file = new File(getUrlPath(url));

            Assert.assertEquals(ram.length(), reader.getConsumed());
            Assert.assertEquals(ram.length(), reader.getOffset());
            Assert.assertEquals(file.length(), consumed);

            reader.close();

            Assert.assertEquals(ram.length(), reader.getConsumed());
            Assert.assertEquals(ram.length(), reader.getOffset());
            Assert.assertEquals(file.length(), consumed);

            in.close();
            ram.close();

            if (bDebugOutput) {
                TestBaseUtils.printStatus(records, errors, warnings);
            }

            Assert.assertEquals(expected_records, records);
            Assert.assertEquals(0, errors);
            Assert.assertEquals(0, warnings);
        } catch (IOException e) {
            e.printStackTrace();
            Assert.fail("Unexpected i/o exception");
        }
    }

    class ArcEntry {
        Uri recordId;
        long offset;
    }

    public List<ArcEntry> indexArcFile() {
        boolean bDebugOutput = System.getProperty("jwat.debug.output") != null;

        List<ArcEntry> arcEntries = new ArrayList<ArcEntry>();
        ArcEntry arcEntry;

        int records = 0;
        long consumed = 0;
        int errors = 0;
        int warnings = 0;

        try {
            InputStream in = this.getClass().getClassLoader().getResourceAsStream(arcFile);
            ByteCountingInputStream bcin = new ByteCountingInputStream(in);
            ArcReader reader = ArcReaderFactory.getReader(bcin);
            ArcRecordBase record;

            reader.setBlockDigestEnabled( bDigest );
            Assert.assertTrue(reader.setBlockDigestAlgorithm( "sha1" ));
            reader.setPayloadDigestEnabled( bDigest );
            Assert.assertTrue(reader.setPayloadDigestAlgorithm( "sha1" ));

            Iterator<ArcRecordBase> recordIterator = reader.iterator();

            while (recordIterator.hasNext()) {
                record = recordIterator.next();
                ++records;

                switch (records) {
                case 1:
                    Assert.assertEquals(ArcRecordBase.RT_VERSION_BLOCK, record.recordType);
                    Assert.assertTrue(record.isCompliant());
                    Assert.assertNotNull(record.versionHeader);
                    Assert.assertNotNull(record.versionHeader.isValid());
                    Assert.assertEquals(ArcVersion.VERSION_1_1, record.versionHeader.version);
                    break;
                default:
                    Assert.assertEquals(ArcRecordBase.RT_ARC_RECORD, record.recordType);
                    Assert.assertTrue(record.isCompliant());
                    Assert.assertNull(record.versionHeader);
                    break;
                }

                if (record.header.urlUri == null) {
                    Assert.fail("Invalid arc uri");
                }

                Assert.assertThat(record.getStartOffset(), is(equalTo(reader.getStartOffset())));
                Assert.assertThat(record.getStartOffset(), is(not(equalTo(reader.getOffset()))));

                //System.out.println(record.getStartOffset());
                //System.out.println(reader.getStartOffset());
                //System.out.println(reader.getOffset());

                arcEntry = new ArcEntry();
                arcEntry.recordId = record.header.urlUri;
                arcEntry.offset = record.getStartOffset();
                arcEntries.add(arcEntry);

                if (bDebugOutput) {
                    System.out.println("0x" + Long.toString(arcEntry.offset, 16) + "(" + arcEntry.offset + ") - " + arcEntry.recordId);
                }

                record.close();

                consumed += record.getConsumed();
                Assert.assertEquals(record.consumed, record.getConsumed());

                Assert.assertThat(record.getStartOffset(), is(equalTo(reader.getStartOffset())));
                Assert.assertThat(record.getStartOffset(), is(not(equalTo(reader.getOffset()))));

                if ( bDigest ) {
                    if ( (record.payload != null && record.computedBlockDigest == null)
                            || (record.httpHeader != null && record.computedPayloadDigest == null) ) {
                        Assert.fail( "Digest missing!" );
                    }
                }

                //System.out.println(record.getStartOffset());
                //System.out.println(reader.getStartOffset());
                //System.out.println(reader.getOffset());

                if (record.diagnostics.hasErrors()) {
                    errors += record.diagnostics.getErrors().size();
                }
                if (record.diagnostics.hasWarnings()) {
                    warnings += record.diagnostics.getWarnings().size();
                }
            }

            if (reader.getIteratorExceptionThrown() != null) {
                reader.getIteratorExceptionThrown().printStackTrace();
                Assert.fail("Unexpected exception!");
            }

            URL url = this.getClass().getClassLoader().getResource(arcFile2);
            File file = new File(getUrlPath(url));

            Assert.assertEquals(bcin.getConsumed(), reader.getConsumed());
            Assert.assertEquals(bcin.getConsumed(), reader.getOffset());
            Assert.assertEquals(file.length(), consumed);

            reader.close();
            bcin.close();

            Assert.assertEquals(bcin.getConsumed(), reader.getConsumed());
            Assert.assertEquals(bcin.getConsumed(), reader.getOffset());
            Assert.assertEquals(file.length(), consumed);
        } catch (IOException e) {
            e.printStackTrace();
            Assert.fail("Unexpected exception");
        }

        Assert.assertEquals(expected_records, records);
        Assert.assertEquals(0, errors);
        Assert.assertEquals(0, warnings);

        return arcEntries;
    }

    @Test
    public void test_arcreadercompressed_exceptions() {
        ArcReaderCompressed reader = ArcReaderFactory.getReaderCompressed();

        InputStream in = new InputStream() {
            @Override
            public int read() throws IOException {
                return 0;
            }
            @Override
            public void close() throws IOException {
                throw new IOException();
            }
        };
        GzipReader gzipReader = new GzipReader(in) {
            @Override
            public void close() throws IOException {
                throw new IOException();
            }
        };
        ArcRecordBase record = new ArcRecordBase() {
            @Override
            protected void processPayload(ByteCountingPushBackInputStream in,
                    ArcReader reader) throws IOException {
            }
            @Override
            public void close() throws IOException {
                throw new IOException();
            }
        };
        GzipEntry gzipEntry = new GzipEntry() {
            @Override
            public void close() throws IOException {
                throw new IOException();
            }
        };

        Assert.assertNull(reader.reader);
        Assert.assertNull(reader.currentRecord);

        reader.reader = gzipReader;
        reader.close();
        Assert.assertNull(reader.reader);
        Assert.assertNull(reader.currentRecord);

        reader.currentRecord = record;
        reader.close();
        Assert.assertNull(reader.reader);
        Assert.assertNull(reader.currentRecord);

        try {
            reader.recordClosed();
            Assert.fail("Exception expected!");
        } catch (IllegalStateException e) {
        }

        Assert.assertNull(reader.currentEntry);
        reader.currentEntry = gzipEntry;
        reader.recordClosed();
        Assert.assertNull(reader.currentEntry);
    }

}
TOP

Related Classes of org.jwat.arc.TestArcReaderCompressed$ArcEntry

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.