Package org.jwat.arc

Source Code of org.jwat.arc.TestArcHeader

/**
* Java Web Archive Toolkit - Software to read and validate ARC, WARC
* and GZip files. (http://jwat.org/)
* Copyright 2011-2012 Netarkivet.dk (http://netarkivet.dk/)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.jwat.arc;

import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.net.InetAddress;

import org.junit.Assert;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.JUnit4;
import org.jwat.common.ByteCountingPushBackInputStream;
import org.jwat.common.ContentType;
import org.jwat.common.Diagnosis;
import org.jwat.common.DiagnosisType;
import org.jwat.common.Diagnostics;
import org.jwat.common.Uri;

@RunWith(JUnit4.class)
public class TestArcHeader {

    @Test
    public void test_archeader_fields() {
        ByteArrayOutputStream out = new ByteArrayOutputStream();
        ByteArrayInputStream in;
        ByteCountingPushBackInputStream pbin;
        ArcWriter writer;
        ArcReader reader;
        byte[] bytes;
        ArcHeader header;
        boolean success;
        String tmpStr;
        Object[][] test_cases;
        String[] fields;
        try {
            /*
             * Writer.
             */
            out.reset();
            Diagnostics<Diagnosis> diagnostics = new Diagnostics<Diagnosis>();

            writer = ArcWriterFactory.getWriter(out, false);
            writer.fieldParsers.diagnostics = diagnostics;
            header = ArcHeader.initHeader(writer, diagnostics);

            tmpStr = header.toString();
            Assert.assertNotNull(tmpStr);
            /*
             * Test cases, parseHeaders().
             */
            test_cases = new Object[][] {
                    /*
                     * Invalid number of fields.
                     */
                    {0, "".getBytes(), new String[4], new Object[][] {}, new Object[][] {}
                    },
                    /*
                     * Null fields v1.
                     */
                    {1, "".getBytes(), new String[5], new Object[][] {
                        {DiagnosisType.REQUIRED_MISSING, "'" + ArcConstants.FN_URL + "' value", 0},
                        {DiagnosisType.REQUIRED_MISSING, "'" + ArcConstants.FN_IP_ADDRESS + "' value", 0},
                        {DiagnosisType.REQUIRED_MISSING, "'" + ArcConstants.FN_ARCHIVE_DATE + "' value", 0},
                        {DiagnosisType.REQUIRED_MISSING, "'" + ArcConstants.FN_CONTENT_TYPE + "' value", 0},
                        {DiagnosisType.REQUIRED_MISSING, "'" + ArcConstants.FN_ARCHIVE_LENGTH + "' value", 0}
                    }, new Object[][] {}},
                    /*
                     * Null fields v2.
                     */
                    {2, "".getBytes(), new String[10], new Object[][] {
                        {DiagnosisType.REQUIRED_MISSING, "'" + ArcConstants.FN_URL + "' value", 0},
                        {DiagnosisType.REQUIRED_MISSING, "'" + ArcConstants.FN_IP_ADDRESS + "' value", 0},
                        {DiagnosisType.REQUIRED_MISSING, "'" + ArcConstants.FN_ARCHIVE_DATE + "' value", 0},
                        {DiagnosisType.REQUIRED_MISSING, "'" + ArcConstants.FN_CONTENT_TYPE + "' value", 0},
                        {DiagnosisType.REQUIRED_MISSING, "'" + ArcConstants.FN_RESULT_CODE + "' value", 0},
                        //{DiagnosisType.REQUIRED_MISSING, "'" + ArcConstants.FN_CHECKSUM + "' value", 0},
                        //{DiagnosisType.REQUIRED_MISSING, "'" + ArcConstants.FN_LOCATION + "' value", 0},
                        {DiagnosisType.REQUIRED_MISSING, "'" + ArcConstants.FN_OFFSET + "' value", 0},
                        {DiagnosisType.REQUIRED_MISSING, "'" + ArcConstants.FN_FILENAME + "' value", 0},
                        {DiagnosisType.REQUIRED_MISSING, "'" + ArcConstants.FN_ARCHIVE_LENGTH + "' value", 0}
                    }, new Object[][] {}}
            };
            diagnostics.reset();
            for (int i=0; i<test_cases.length; ++i) {
                bytes = (byte[])test_cases[i][1];
                in = new ByteArrayInputStream(bytes);
                reader = ArcReaderFactory.getReader(in);
                reader.fieldParsers.diagnostics = diagnostics;
                header = ArcHeader.initHeader(reader, 42L, diagnostics);
                fields = (String[])test_cases[i][2];
                header.parseHeaders(fields);
                Assert.assertEquals(test_cases[i][0], header.recordFieldVersion);
                String[] expected_fieldStrings = new String[] {null, null, null, null, null, null, null, null, null, null};
                Object[] expected_fieldObjects = new Object[] {null, null, null, null, null, null, null};
                Object[][] expected_errors = (Object[][])test_cases[i][3];
                Object[][] expected_warnings = (Object[][])test_cases[i][4];
                TestBaseUtils.assert_header(header, expected_fieldStrings, expected_fieldObjects, expected_errors, expected_warnings);
                diagnostics.reset();

                // Save testfile.
                SaveArcTestFiles.saveTestArcHeader(bytes, expected_errors.length == 0 && expected_warnings.length == 0);
            }
            /*
             * Test cases, parseHeader().
             */
            test_cases = new Object[][] {
                    /*
                     * Empty file.
                     */
                    {false, 0, "".getBytes(), new String[] {
                        null,
                        null,
                        null,
                        null,
                        null,
                        null,
                        null,
                        null,
                        null,
                        null
                    }, new Object[] {
                        null,
                        null,
                        null,
                        null,
                        null,
                        null,
                        null
                    }, new Object[][] {}, new Object[][] {}},
                    /*
                     * Newline.
                     */
                    {false, 0, "\n".getBytes(), new String[] {
                        null,
                        null,
                        null,
                        null,
                        null,
                        null,
                        null,
                        null,
                        null,
                        null
                    }, new Object[] {
                        null,
                        null,
                        null,
                        null,
                        null,
                        null,
                        null
                    }, new Object[][] {
                            {DiagnosisType.INVALID, "Empty lines before ARC record", 0}
                    }, new Object[][] {}},
                    /*
                     * Garbage.
                     */
                    {false, 0, "garbage\n".getBytes(), new String[] {
                        null,
                        null,
                        null,
                        null,
                        null,
                        null,
                        null,
                        null,
                        null,
                        null
                    }, new Object[] {
                        null,
                        null,
                        null,
                        null,
                        null,
                        null,
                        null
                    }, new Object[][] {
                            {DiagnosisType.INVALID, "Data before ARC record", 0}
                    }, new Object[][] {}},
                    /*
                     * v2 all "-"
                     */
                    {true, 2, "- - - - - - - - - -\n".getBytes(), new String[] {
                        null,
                        null,
                        null,
                        null,
                        null,
                        null,
                        null,
                        null,
                        null,
                        null
                    }, new Object[] {
                        null,
                        null,
                        null,
                        null,
                        null,
                        null,
                        null
                    }, new Object[][] {
                            {DiagnosisType.REQUIRED_MISSING, "'" + ArcConstants.FN_URL + "' value", 0},
                            {DiagnosisType.REQUIRED_MISSING, "'" + ArcConstants.FN_IP_ADDRESS + "' value", 0},
                            {DiagnosisType.REQUIRED_MISSING, "'" + ArcConstants.FN_ARCHIVE_DATE + "' value", 0},
                            {DiagnosisType.REQUIRED_MISSING, "'" + ArcConstants.FN_CONTENT_TYPE + "' value", 0},
                            {DiagnosisType.REQUIRED_MISSING, "'" + ArcConstants.FN_RESULT_CODE + "' value", 0},
                            //{DiagnosisType.REQUIRED_MISSING, "'" + ArcConstants.FN_CHECKSUM + "' value", 0},
                            //{DiagnosisType.REQUIRED_MISSING, "'" + ArcConstants.FN_LOCATION + "' value", 0},
                            {DiagnosisType.REQUIRED_MISSING, "'" + ArcConstants.FN_OFFSET + "' value", 0},
                            {DiagnosisType.REQUIRED_MISSING, "'" + ArcConstants.FN_FILENAME + "' value", 0},
                            {DiagnosisType.REQUIRED_MISSING, "'" + ArcConstants.FN_ARCHIVE_LENGTH + "' value", 0}
                    }, new Object[][] {}},
                    /*
                     * Valid record V1.
                     */
                    {true, 1, bytes = "http://cctr.umkc.edu:80/user/jbenz/tst.htm 134.193.4.1 19970417175710 text/html 649\n".getBytes(), new String[] {
                        "http://cctr.umkc.edu:80/user/jbenz/tst.htm",
                        "134.193.4.1",
                        "19970417175710",
                        "text/html",
                        null,
                        null,
                        null,
                        null,
                        null,
                        "649"
                    }, new Object[] {
                        Uri.create("http://cctr.umkc.edu:80/user/jbenz/tst.htm"),
                        InetAddress.getByName("134.193.4.1"),
                        ArcDateParser.getDate("19970417175710"),
                        ContentType.parseContentType("text/html"),
                        null,
                        null,
                        new Long(649)
                    }, new Object[][] {}, new Object[][] {}},
                    /*
                     * Valid record v2.
                     */
                    {true, 2, "http://www.antiaction.com/ 192.168.1.2 20120712144000 text/htlm 200 checksum location 229 filename 649\n".getBytes(), new String[] {
                        "http://www.antiaction.com/",
                        "192.168.1.2",
                        "20120712144000",
                        "text/htlm",
                        "200",
                        "checksum",
                        "location",
                        "229",
                        "filename",
                        "649"
                    }, new Object[] {
                        Uri.create("http://www.antiaction.com/"),
                        InetAddress.getByName("192.168.1.2"),
                        ArcDateParser.getDate("20120712144000"),
                        ContentType.parseContentType("text/htlm"),
                        new Integer(200),
                        new Long(229),
                        new Long(649),
                    }, new Object[][] {}, new Object[][] {}},
                    /*
                     * Invalid record V1.
                     */
                    {true, 1, "4270 http://cctr.umkc.edu:80/user/jbenz/tst.htm 134.193.4.1 19970417175710 text/html\n".getBytes(), new String[] {
                        "4270",
                        "http://cctr.umkc.edu:80/user/jbenz/tst.htm",
                        "134.193.4.1",
                        "19970417175710",
                        null,
                        null,
                        null,
                        null,
                        null,
                        "text/html"
                    }, new Object[] {
                        null,
                        null,
                        null,
                        null,
                        null,
                        null,
                        null
                    }, new Object[][] {
                            {DiagnosisType.INVALID_EXPECTED, "'" + ArcConstants.FN_URL + "' value", 2},
                            {DiagnosisType.INVALID_EXPECTED, "'" + ArcConstants.FN_IP_ADDRESS + "' value", 2},
                            {DiagnosisType.INVALID_EXPECTED, "'" + ArcConstants.FN_ARCHIVE_DATE + "' value", 2},
                            {DiagnosisType.INVALID_EXPECTED, "'" + ArcConstants.FN_CONTENT_TYPE + "' value", 2},
                            {DiagnosisType.INVALID_EXPECTED, "'" + ArcConstants.FN_ARCHIVE_LENGTH + "' value", 2}
                    }, new Object[][] {}},
                    /*
                     * Invalid record v2.
                     */
                    {true, 2, "40 http://www.antiaction.com/ 192.168.1.2 20120712144000 text/htlm 200 checksum location 1234 filename\n".getBytes(), new String[] {
                        "40",
                        "http://www.antiaction.com/",
                        "192.168.1.2",
                        "20120712144000",
                        "text/htlm",
                        "200",
                        "checksum",
                        "location",
                        "1234",
                        "filename"
                    }, new Object[] {
                        null,
                        null,
                        null,
                        null,
                        null,
                        null,
                        null
                    }, new Object[][] {
                            {DiagnosisType.INVALID_EXPECTED, "'" + ArcConstants.FN_URL + "' value", 2},
                            {DiagnosisType.INVALID_EXPECTED, "'" + ArcConstants.FN_IP_ADDRESS + "' value", 2},
                            {DiagnosisType.INVALID_EXPECTED, "'" + ArcConstants.FN_ARCHIVE_DATE + "' value", 2},
                            {DiagnosisType.INVALID_EXPECTED, "'" + ArcConstants.FN_CONTENT_TYPE + "' value", 2},
                            {DiagnosisType.INVALID_EXPECTED, "'" + ArcConstants.FN_RESULT_CODE + "' value", 2},
                            {DiagnosisType.INVALID_EXPECTED, "'" + ArcConstants.FN_OFFSET + "' value", 2},
                            {DiagnosisType.INVALID_EXPECTED, "'" + ArcConstants.FN_ARCHIVE_LENGTH + "' value", 2}
                    }, new Object[][] {}},
                    /*
                     * Valid record v2 with "-".
                     */
                    {true, 2, "http://www.antiaction.com/ 192.168.1.2 20120712144000 text/htlm 200 checksum - 229 filename 649\n".getBytes(), new String[] {
                        "http://www.antiaction.com/",
                        "192.168.1.2",
                        "20120712144000",
                        "text/htlm",
                        "200",
                        "checksum",
                        null,
                        "229",
                        "filename",
                        "649"
                    }, new Object[] {
                        Uri.create("http://www.antiaction.com/"),
                        InetAddress.getByName("192.168.1.2"),
                        ArcDateParser.getDate("20120712144000"),
                        ContentType.parseContentType("text/htlm"),
                        new Integer(200),
                        new Long(229),
                        new Long(649)
                    }, new Object[][] {}, new Object[][] {}},
                    /*
                     * Semi-valid record v2 content-type=no-type, result-code out of bounds, minus offset, length.
                     */
                    {true, 2, "http://www.antiaction.com/ 192.168.1.2 20120712144000 no-type 99 checksum location -4321 filename -42\n".getBytes(), new String[] {
                        "http://www.antiaction.com/",
                        "192.168.1.2",
                        "20120712144000",
                        "no-type",
                        "99",
                        "checksum",
                        "location",
                        "-4321",
                        "filename",
                        "-42"
                    }, new Object[] {
                        Uri.create("http://www.antiaction.com/"),
                        InetAddress.getByName("192.168.1.2"),
                        ArcDateParser.getDate("20120712144000"),
                        null,
                        new Integer(99),
                        new Long(-4321),
                        new Long(-42)
                    }, new Object[][] {
                        {DiagnosisType.INVALID_EXPECTED, "'" + ArcConstants.FN_RESULT_CODE + "' value", 2},
                        {DiagnosisType.INVALID_EXPECTED, "'" + ArcConstants.FN_OFFSET + "' value", 2},
                        {DiagnosisType.INVALID_EXPECTED, "'" + ArcConstants.FN_ARCHIVE_LENGTH + "' value", 2}
                    }, new Object[][] {}},
                    /*
                     * Semi-valid record v2 content-type=no-type, result-code out of bounds.
                     */
                    {true, 2, "http://www.antiaction.com/ 192.168.1.2 20120712144000 no-type 1000 checksum location 4321 filename 42\n".getBytes(), new String[] {
                        "http://www.antiaction.com/",
                        "192.168.1.2",
                        "20120712144000",
                        "no-type",
                        "1000",
                        "checksum",
                        "location",
                        "4321",
                        "filename",
                        "42"
                    }, new Object[] {
                        Uri.create("http://www.antiaction.com/"),
                        InetAddress.getByName("192.168.1.2"),
                        ArcDateParser.getDate("20120712144000"),
                        null,
                        new Integer(1000),
                        new Long(4321),
                        new Long(42)
                    }, new Object[][] {
                        {DiagnosisType.INVALID_EXPECTED, "'" + ArcConstants.FN_RESULT_CODE + "' value", 2}
                    }, new Object[][] {}}
            };
            diagnostics.reset();
            for (int i=0; i<test_cases.length; ++i) {
                bytes = (byte[])test_cases[i][2];
                in = new ByteArrayInputStream(bytes);
                reader = ArcReaderFactory.getReader(in);
                reader.fieldParsers.diagnostics = diagnostics;
                header = ArcHeader.initHeader(reader, 42L, diagnostics);
                pbin = ((ArcReaderUncompressed)reader).in;
                success = header.parseHeader(pbin);
                tmpStr = header.toString();
                Assert.assertNotNull(tmpStr);
                Assert.assertEquals(test_cases[i][0], success);
                Assert.assertEquals(test_cases[i][1], header.recordFieldVersion);
                String[] expected_fieldStrings = (String[])test_cases[i][3];
                Object[] expected_fieldObjects = (Object[])test_cases[i][4];
                Object[][] expected_errors = (Object[][])test_cases[i][5];
                Object[][] expected_warnings = (Object[][])test_cases[i][6];
                TestBaseUtils.assert_header(header, expected_fieldStrings, expected_fieldObjects, expected_errors, expected_warnings);
                diagnostics.reset();

                // Save testfile.
                SaveArcTestFiles.saveTestArcHeader(bytes, expected_errors.length == 0 && expected_warnings.length == 0);
            }
        } catch (IOException e) {
            e.printStackTrace();
            Assert.fail("Unexpected exception!");
        }
    }

}
TOP

Related Classes of org.jwat.arc.TestArcHeader

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.