Package org.jwat.warc

Source Code of org.jwat.warc.TestWarcFieldParsers

/**
* Java Web Archive Toolkit - Software to read and validate ARC, WARC
* and GZip files. (http://jwat.org/)
* Copyright 2011-2012 Netarkivet.dk (http://netarkivet.dk/)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.jwat.warc;

import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;

import org.junit.Assert;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.JUnit4;
import org.jwat.common.DiagnosisType;
import org.jwat.common.UriProfile;

@RunWith(JUnit4.class)
public class TestWarcFieldParsers {

    public static final String emptyFieldsFile = "invalid-warcfile-fields-empty.warc";

    @Test
    public void test_parsers_empty() {
        boolean bDebugOutput = System.getProperty("jwat.debug.output") != null;

        InputStream in;

        int records = 0;
        int errors = 0;
        int warnings = 0;

        try {
            in = this.getClass().getClassLoader().getResourceAsStream(emptyFieldsFile);

            WarcReader reader = WarcReaderFactory.getReader(in);
            WarcRecord record;

            while ((record = reader.getNextRecord()) != null) {
                record.close();

                if (bDebugOutput) {
                    TestBaseUtils.printRecord(record);
                    TestBaseUtils.printRecordErrors(record);
                }

                errors = 0;
                warnings = 0;
                if (record.diagnostics.hasErrors()) {
                    errors += record.diagnostics.getErrors().size();
                }
                if (record.diagnostics.hasWarnings()) {
                    warnings += record.diagnostics.getWarnings().size();
                }

                ++records;
            }

            reader.close();
            in.close();

            if (bDebugOutput) {
                TestBaseUtils.printStatus(records, errors, warnings);
            }
        } catch (FileNotFoundException e) {
            Assert.fail("Input file missing");
        } catch (IOException e) {
            Assert.fail("Unexpected i/o exception");
        }

        //Assert.assertEquals(expected_records, records);
        //Assert.assertEquals(expected_errors, errors);
        //Assert.assertEquals(expected_warnings, warnings);
    }

    public static final String missingFieldsFile = "invalid-warcfile-fields-missing.warc";

    @Test
    public void test_parsers_missing() {
        boolean bDebugOutput = System.getProperty("jwat.debug.output") != null;

        InputStream in;

        int records = 0;
        int errors = 0;
        int warnings = 0;

        try {
            in = this.getClass().getClassLoader().getResourceAsStream(missingFieldsFile);

            WarcReader reader = WarcReaderFactory.getReader(in);
            WarcRecord record;

            while ((record = reader.getNextRecord()) != null) {
                record.close();

                if (bDebugOutput) {
                    TestBaseUtils.printRecord(record);
                    TestBaseUtils.printRecordErrors(record);
                }

                errors = 0;
                warnings = 0;
                if (record.diagnostics.hasErrors()) {
                    errors += record.diagnostics.getErrors().size();
                }
                if (record.diagnostics.hasWarnings()) {
                    warnings += record.diagnostics.getWarnings().size();
                }

                ++records;
            }

            reader.close();
            in.close();

            if (bDebugOutput) {
                TestBaseUtils.printStatus(records, errors, warnings);
            }
        } catch (FileNotFoundException e) {
            Assert.fail("Input file missing");
        } catch (IOException e) {
            Assert.fail("Unexpected i/o exception");
        }
    }

    public static final String invalidFormatFieldsFile = "invalid-warcfile-fields-invalidformat.warc";

    @Test
    public void test_parsers_invalid_format() {
        boolean bDebugOutput = System.getProperty("jwat.debug.output") != null;

        InputStream in;

        int records = 0;
        int errors = 0;
        int warnings = 0;

        UriProfile uriProfile = UriProfile.RFC3986;

        Object[][] expectedDiagnoses;

        try {
            in = this.getClass().getClassLoader().getResourceAsStream(invalidFormatFieldsFile);

            WarcReader reader = WarcReaderFactory.getReader(in);
            WarcRecord record;

            while ((record = reader.getNextRecord()) != null) {
                record.close();

                // Test diagnostics.
                reader.fieldParsers.diagnostics.reset();
                Assert.assertNull(reader.fieldParsers.parseInteger(null, "1"));
                Assert.assertNull(reader.fieldParsers.parseLong(null, "2"));
                Assert.assertNull(reader.fieldParsers.parseString(null, "3"));
                Assert.assertNull(reader.fieldParsers.parseContentType(null, "4"));
                Assert.assertNull(reader.fieldParsers.parseIpAddress(null, "5"));
                Assert.assertNull(reader.fieldParsers.parseUri(null, false, uriProfile, "6"));
                Assert.assertNull(reader.fieldParsers.parseUri(null, true, uriProfile, "7"));
                Assert.assertNull(reader.fieldParsers.parseDate(null, "8"));
                Assert.assertNull(reader.fieldParsers.parseDigest(null, "9"));
                Assert.assertFalse(reader.fieldParsers.diagnostics.hasErrors());
                Assert.assertTrue(reader.fieldParsers.diagnostics.hasWarnings());
                //TestBaseUtils.printDiagnoses(reader.fieldParsers.diagnostics.getErrors());
                //TestBaseUtils.printDiagnoses(reader.fieldParsers.diagnostics.getWarnings());
                expectedDiagnoses = new Object[][] {
                        {DiagnosisType.EMPTY, "'1' field", 0},
                        {DiagnosisType.EMPTY, "'2' field", 0},
                        {DiagnosisType.EMPTY, "'3' field", 0},
                        {DiagnosisType.EMPTY, "'4' field", 0},
                        {DiagnosisType.EMPTY, "'5' field", 0},
                        {DiagnosisType.EMPTY, "'6' field", 0},
                        {DiagnosisType.EMPTY, "'7' field", 0},
                        {DiagnosisType.EMPTY, "'8' field", 0},
                        {DiagnosisType.EMPTY, "'9' field", 0}
                };
                TestBaseUtils.compareDiagnoses(expectedDiagnoses, reader.fieldParsers.diagnostics.getWarnings());

                reader.fieldParsers.diagnostics.reset();
                Assert.assertNull(reader.fieldParsers.parseInteger("", "1"));
                Assert.assertNull(reader.fieldParsers.parseLong("", "2"));
                Assert.assertEquals(reader.fieldParsers.parseString("", "3"), "");
                Assert.assertNull(reader.fieldParsers.parseContentType("", "4"));
                Assert.assertNull(reader.fieldParsers.parseIpAddress("", "5"));
                Assert.assertNull(reader.fieldParsers.parseUri("", false, uriProfile, "6"));
                Assert.assertNull(reader.fieldParsers.parseUri("", true, uriProfile, "7"));
                Assert.assertNull(reader.fieldParsers.parseDate("", "8"));
                Assert.assertNull(reader.fieldParsers.parseDigest("", "9"));
                Assert.assertFalse(reader.fieldParsers.diagnostics.hasErrors());
                Assert.assertTrue(reader.fieldParsers.diagnostics.hasWarnings());
                //TestBaseUtils.printDiagnoses(reader.fieldParsers.diagnostics.getErrors());
                //TestBaseUtils.printDiagnoses(reader.fieldParsers.diagnostics.getWarnings());
                expectedDiagnoses = new Object[][] {
                        {DiagnosisType.EMPTY, "'1' field", 0},
                        {DiagnosisType.EMPTY, "'2' field", 0},
                        {DiagnosisType.EMPTY, "'3' field", 0},
                        {DiagnosisType.EMPTY, "'4' field", 0},
                        {DiagnosisType.EMPTY, "'5' field", 0},
                        {DiagnosisType.EMPTY, "'6' field", 0},
                        {DiagnosisType.EMPTY, "'7' field", 0},
                        {DiagnosisType.EMPTY, "'8' field", 0},
                        {DiagnosisType.EMPTY, "'9' field", 0}
                };
                TestBaseUtils.compareDiagnoses(expectedDiagnoses, reader.fieldParsers.diagnostics.getWarnings());

                reader.fieldParsers.diagnostics.reset();
                Assert.assertNull(reader.fieldParsers.parseInteger("one", "1"));
                Assert.assertNull(reader.fieldParsers.parseLong("very lengthy", "2"));
                Assert.assertNull(reader.fieldParsers.parseContentType("gif\\image", "3"));
                Assert.assertNull(reader.fieldParsers.parseIpAddress("a.b.c.d", "4"));
                Assert.assertNull(reader.fieldParsers.parseDate("blue monday", "5"));
                Assert.assertNull(reader.fieldParsers.parseDigest("sharif-1; omar", "6"));
                Assert.assertTrue(reader.fieldParsers.diagnostics.hasErrors());
                Assert.assertFalse(reader.fieldParsers.diagnostics.hasWarnings());
                //TestBaseUtils.printDiagnoses(reader.fieldParsers.diagnostics.getErrors());
                //TestBaseUtils.printDiagnoses(reader.fieldParsers.diagnostics.getWarnings());
                expectedDiagnoses = new Object[][] {
                        {DiagnosisType.INVALID_EXPECTED, "'1' value", 2},
                        {DiagnosisType.INVALID_EXPECTED, "'2' value", 2},
                        {DiagnosisType.INVALID_EXPECTED, "'3' value", 2},
                        {DiagnosisType.INVALID_EXPECTED, "'4' value", 2},
                        {DiagnosisType.INVALID_EXPECTED, "'5' value", 2},
                        {DiagnosisType.INVALID_EXPECTED, "'6' value", 2}
                };
                TestBaseUtils.compareDiagnoses(expectedDiagnoses, reader.fieldParsers.diagnostics.getErrors());

                reader.fieldParsers.diagnostics.reset();
                Assert.assertNull(reader.fieldParsers.parseUri("bad_uri", false, uriProfile, "1"));
                Assert.assertNull(reader.fieldParsers.parseUri("bad_uri", true, uriProfile, "2"));
                Assert.assertNull(reader.fieldParsers.parseUri("<zaphod>", false, uriProfile, "3"));
                Assert.assertNull(reader.fieldParsers.parseUri("<zaphod>", true, uriProfile, "4"));
                Assert.assertNull(reader.fieldParsers.parseUri("1331:muhid:42", false, uriProfile, "5"));
                Assert.assertNull(reader.fieldParsers.parseUri("1331:muhid:42", true, uriProfile, "6"));
                Assert.assertNull(reader.fieldParsers.parseUri("bad_uri", false, UriProfile.RFC3986_ABS_16BIT_LAX, "7"));
                Assert.assertNull(reader.fieldParsers.parseUri("bad_uri", true, UriProfile.RFC3986_ABS_16BIT_LAX, "8"));
                Assert.assertNull(reader.fieldParsers.parseUri("<zaphod>", false, UriProfile.RFC3986_ABS_16BIT_LAX, "9"));
                Assert.assertNull(reader.fieldParsers.parseUri("<zaphod>", true, UriProfile.RFC3986_ABS_16BIT_LAX, "10"));
                Assert.assertNull(reader.fieldParsers.parseUri("1331:muhid:42", false, UriProfile.RFC3986_ABS_16BIT_LAX, "11"));
                Assert.assertNull(reader.fieldParsers.parseUri("1331:muhid:42", true, UriProfile.RFC3986_ABS_16BIT_LAX, "12"));
                Assert.assertTrue(reader.fieldParsers.diagnostics.hasErrors());
                Assert.assertFalse(reader.fieldParsers.diagnostics.hasWarnings());
                //TestBaseUtils.printDiagnoses(reader.fieldParsers.diagnostics.getErrors());
                //TestBaseUtils.printDiagnoses(reader.fieldParsers.diagnostics.getWarnings());
                expectedDiagnoses = new Object[][] {
                        {DiagnosisType.INVALID_EXPECTED, "'1' value", 2},
                        {DiagnosisType.INVALID_EXPECTED, "'2' value", 2},
                        {DiagnosisType.INVALID_EXPECTED, "'2' value", 2},
                        {DiagnosisType.INVALID_EXPECTED, "'3' value", 2},
                        {DiagnosisType.INVALID_EXPECTED, "'3' value", 2},
                        {DiagnosisType.INVALID_EXPECTED, "'4' value", 2},
                        {DiagnosisType.INVALID_EXPECTED, "'5' value", 2},
                        {DiagnosisType.INVALID_EXPECTED, "'6' value", 2},
                        {DiagnosisType.INVALID_EXPECTED, "'6' value", 2},
                        {DiagnosisType.INVALID_EXPECTED, "'7' value", 2},
                        {DiagnosisType.INVALID_EXPECTED, "'8' value", 2},
                        {DiagnosisType.INVALID_EXPECTED, "'8' value", 2},
                        {DiagnosisType.INVALID_EXPECTED, "'9' value", 2},
                        {DiagnosisType.INVALID_EXPECTED, "'9' value", 2},
                        {DiagnosisType.INVALID_EXPECTED, "'10' value", 2},
                        {DiagnosisType.INVALID_EXPECTED, "'11' value", 2},
                        {DiagnosisType.INVALID_EXPECTED, "'12' value", 2},
                        {DiagnosisType.INVALID_EXPECTED, "'12' value", 2}
                };
                TestBaseUtils.compareDiagnoses(expectedDiagnoses, reader.fieldParsers.diagnostics.getErrors());

                reader.fieldParsers.diagnostics.reset();
                Assert.assertNotNull(reader.fieldParsers.parseInteger("42", null));
                Assert.assertNotNull(reader.fieldParsers.parseLong("12345678901234", null));
                Assert.assertNotNull(reader.fieldParsers.parseString("JWAT", null));
                Assert.assertNotNull(reader.fieldParsers.parseContentType("text/plain", null));
                Assert.assertNotNull(reader.fieldParsers.parseIpAddress("127.0.0.1", null));
                Assert.assertNotNull(reader.fieldParsers.parseDate("2012-12-24T20:12:34Z", null));
                Assert.assertNotNull(reader.fieldParsers.parseDigest("sha1:1234567890abcdef", null));
                Assert.assertNotNull(reader.fieldParsers.parseUri("http://jwat.org", false, uriProfile, null));
                Assert.assertNotNull(reader.fieldParsers.parseUri("<http://jwat.org>", true, uriProfile, null));
                Assert.assertNotNull(reader.fieldParsers.parseUri("http://jwat.org", false, UriProfile.RFC3986_ABS_16BIT_LAX, null));
                Assert.assertNotNull(reader.fieldParsers.parseUri("<http://jwat.org>", true, UriProfile.RFC3986_ABS_16BIT_LAX, null));
                Assert.assertFalse(reader.fieldParsers.diagnostics.hasErrors());
                Assert.assertFalse(reader.fieldParsers.diagnostics.hasWarnings());

                reader.fieldParsers.diagnostics.reset();
                Assert.assertNotNull(reader.fieldParsers.parseUri("<http://jwat.org", true, uriProfile, "1"));
                Assert.assertNotNull(reader.fieldParsers.parseUri("http://jwat.org>", true, uriProfile, "2"));
                Assert.assertNotNull(reader.fieldParsers.parseUri("http://jwat.org", true, uriProfile, "3"));
                Assert.assertNotNull(reader.fieldParsers.parseUri("<http://jwat.org", false, uriProfile, "4"));
                Assert.assertNotNull(reader.fieldParsers.parseUri("http://jwat.org>", false, uriProfile, "5"));
                Assert.assertNotNull(reader.fieldParsers.parseUri("<http://jwat.org>", false, uriProfile, "6"));
                Assert.assertNotNull(reader.fieldParsers.parseUri("<http://jwat.org", true, UriProfile.RFC3986_ABS_16BIT_LAX, "7"));
                Assert.assertNotNull(reader.fieldParsers.parseUri("http://jwat.org>", true, UriProfile.RFC3986_ABS_16BIT_LAX, "8"));
                Assert.assertNotNull(reader.fieldParsers.parseUri("http://jwat.org", true, UriProfile.RFC3986_ABS_16BIT_LAX, "9"));
                Assert.assertNotNull(reader.fieldParsers.parseUri("<http://jwat.org", false, UriProfile.RFC3986_ABS_16BIT_LAX, "10"));
                Assert.assertNotNull(reader.fieldParsers.parseUri("http://jwat.org>", false, UriProfile.RFC3986_ABS_16BIT_LAX, "11"));
                Assert.assertNotNull(reader.fieldParsers.parseUri("<http://jwat.org>", false, UriProfile.RFC3986_ABS_16BIT_LAX, "12"));
                Assert.assertTrue(reader.fieldParsers.diagnostics.hasErrors());
                Assert.assertFalse(reader.fieldParsers.diagnostics.hasWarnings());
                //TestBaseUtils.printDiagnoses(reader.fieldParsers.diagnostics.getErrors());
                //TestBaseUtils.printDiagnoses(reader.fieldParsers.diagnostics.getWarnings());
                expectedDiagnoses = new Object[][] {
                        {DiagnosisType.INVALID_EXPECTED, "'1' value", 2},
                        {DiagnosisType.INVALID_EXPECTED, "'2' value", 2},
                        {DiagnosisType.INVALID_EXPECTED, "'3' value", 2},
                        {DiagnosisType.INVALID_EXPECTED, "'4' value", 2},
                        {DiagnosisType.INVALID_EXPECTED, "'5' value", 2},
                        {DiagnosisType.INVALID_EXPECTED, "'6' value", 2},
                        {DiagnosisType.INVALID_EXPECTED, "'7' value", 2},
                        {DiagnosisType.INVALID_EXPECTED, "'8' value", 2},
                        {DiagnosisType.INVALID_EXPECTED, "'9' value", 2},
                        {DiagnosisType.INVALID_EXPECTED, "'10' value", 2},
                        {DiagnosisType.INVALID_EXPECTED, "'11' value", 2},
                        {DiagnosisType.INVALID_EXPECTED, "'12' value", 2}
                };
                TestBaseUtils.compareDiagnoses(expectedDiagnoses, reader.fieldParsers.diagnostics.getErrors());

                if (bDebugOutput) {
                    TestBaseUtils.printRecord(record);
                    TestBaseUtils.printRecordErrors(record);
                }

                errors = 0;
                warnings = 0;
                if (record.diagnostics.hasErrors()) {
                    errors += record.diagnostics.getErrors().size();
                }
                if (record.diagnostics.hasWarnings()) {
                    warnings += record.diagnostics.getWarnings().size();
                }

                ++records;
            }

            reader.close();
            in.close();

            if (bDebugOutput) {
                TestBaseUtils.printStatus(records, errors, warnings);
            }
        } catch (FileNotFoundException e) {
            Assert.fail("Input file missing");
        } catch (IOException e) {
            Assert.fail("Unexpected i/o exception");
        }

        //Assert.assertEquals(expected_records, records);
        //Assert.assertEquals(expected_errors, errors);
        //Assert.assertEquals(expected_warnings, warnings);
    }

}
TOP

Related Classes of org.jwat.warc.TestWarcFieldParsers

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.