Package org.jwat.common

Examples of org.jwat.common.Diagnosis


@RunWith(JUnit4.class)
public class TestWarcReader_Diagnosis {

    @Test
    public void test_warcreader_diagnosis() {
        Diagnosis d;
        try {
            ByteArrayOutputStream out = new ByteArrayOutputStream();
            out.write("\r\n".getBytes());
            out.write("The monkeys are a coming!\r\n".getBytes());
            out.write("\r\n".getBytes());
View Full Code Here


         * Check for version and parse if present.
         */
        if (versionLine != null && versionLine.length() > 0) {
            String[] versionArr = versionLine.split(" ", -1);
            if (versionArr.length != ArcConstants.VERSION_DESC_FIELDS.length) {
                diagnostics.addError(new Diagnosis(DiagnosisType.INVALID,
                        ArcConstants.ARC_VERSION_BLOCK,
                        "Invalid version description"));
            }
            /*
             * Get version and origin.
             */
            switch (versionArr.length) {
            default:
            case 3:
                originCode = versionArr[ArcConstants.FN_IDX_ORIGIN_CODE];
                originCode = fieldParsers.parseString(
                            originCode, ArcConstants.FN_ORIGIN_CODE, false);
            case 2:
                reservedStr = versionArr[ArcConstants.FN_IDX_RESERVED];
                reserved = fieldParsers.parseInteger(
                            reservedStr, ArcConstants.FN_RESERVED, false);
            case 1:
                versionNumberStr = versionArr[ArcConstants.FN_IDX_VERSION_NUMBER];
                versionNumber = fieldParsers.parseInteger(
                            versionNumberStr, ArcConstants.FN_VERSION_NUMBER, false);
            case 0:
                break;
            }
            /*
             *  Check version.
             */
            version = null;
            if (versionNumber != null && reserved != null) {
                bValidVersionFormat = true;
                versionStr = Integer.toString(versionNumber) + "." + Integer.toString(reserved);
                // Check ARC version number
                version = ArcVersion.fromValues(versionNumber.intValue(),
                        reserved.intValue());
            }
            isVersionValid = (version != null);
            if (!isVersionValid) {
                // Add validation error
                diagnostics.addError(new Diagnosis(DiagnosisType.INVALID,
                        ArcConstants.ARC_VERSION_BLOCK,
                        "Invalid version: [version number: " + versionNumber
                        + ", reserved: " + reserved +']'));
            }
        } else {
            diagnostics.addError(new Diagnosis(DiagnosisType.ERROR,
                    ArcConstants.ARC_VERSION_BLOCK,
                    "Version line empty"));
        }
        /*
         * Identify block description.
         */
        if (blockDescLine != null && blockDescLine.length() > 0) {
            if (ArcConstants.VERSION_1_BLOCK_DEF.equals(blockDescLine)) {
                isValidBlockdDesc = true;
                blockDescVersion = 1;
            } else if (ArcConstants.VERSION_2_BLOCK_DEF.equals(blockDescLine)) {
                isValidBlockdDesc = true;
                blockDescVersion = 2;
            } else {
                diagnostics.addError(new Diagnosis(DiagnosisType.INVALID,
                        ArcConstants.ARC_VERSION_BLOCK,
                        "Unsupported version block definition"));
            }
        } else {
            diagnostics.addError(new Diagnosis(DiagnosisType.ERROR,
                    ArcConstants.ARC_VERSION_BLOCK,
                    "Block definition empty"));
        }
        boolean bIsValidVersionBlock = (version != null) && (blockDescVersion > 0);
        if (bIsValidVersionBlock) {
            switch (blockDescVersion) {
            case 1:
                if (version != ArcVersion.VERSION_1 && version != ArcVersion.VERSION_1_1) {
                    bIsValidVersionBlock = false;
                }
                break;
            case 2:
                if (version != ArcVersion.VERSION_2) {
                    bIsValidVersionBlock = false;
                }
                break;
            }
            if (!bIsValidVersionBlock) {
                diagnostics.addError(new Diagnosis(DiagnosisType.INVALID,
                        ArcConstants.ARC_VERSION_BLOCK,
                        "Version number does not match the block definition"));
            }
        }
        return bIsValidVersionBlock;
View Full Code Here

            record.startOffset = startOffset;
            // Check read and computed offset value only if we're reading
            // a plain ARC file, not a GZipped ARC.
            if ((header.offset != null) && (header.startOffset > 0L)
                                && (header.offset.longValue() != header.startOffset)) {
                diagnostics.addError(new Diagnosis(DiagnosisType.INVALID_EXPECTED,
                        "'" + ArcConstants.FN_OFFSET + "' value",
                        header.offset.toString(),
                        Long.toString(header.startOffset)));
            }
            if (reader.records == 1) {
                if (record.recordType == ArcRecordBase.RT_ARC_RECORD) {
                    diagnostics.addError(new Diagnosis(DiagnosisType.ERROR_EXPECTED,
                            ArcConstants.ARC_FILE,
                            "Expected a version block as the first record."));
                }
            } else {
                if (record.recordType == ArcRecordBase.RT_VERSION_BLOCK) {
                    diagnostics.addError(new Diagnosis(DiagnosisType.ERROR_EXPECTED,
                            ArcConstants.ARC_FILE,
                            "Expected an ARC record not version block."));
                }
            }
            if (reader.versionHeader != null && reader.versionHeader.blockDescVersion > 0
                    && record.header.recordFieldVersion != reader.versionHeader.blockDescVersion) {
                diagnostics.addError(new Diagnosis(DiagnosisType.INVALID_EXPECTED,
                        "ARC record does not match the version block definition",
                        Integer.toString(record.header.recordFieldVersion),
                        Integer.toString(reader.versionHeader.blockDescVersion)));
            }
            // Preliminary compliance status, will be updated when the
            // payload/record is closed.
            if (diagnostics.hasErrors() || diagnostics.hasWarnings()) {
                record.bIsCompliant = false;
            } else {
                record.bIsCompliant = true;
            }
            reader.bIsCompliant &= record.bIsCompliant;
        } else {
            // Transfer errors/warnings identified in the header parser
            // to the reader since we are not returning a record.
            reader.consumed += in.getConsumed() - startOffset;
            reader.diagnostics.addAll(diagnostics);
            if (diagnostics.hasErrors() || diagnostics.hasWarnings()) {
                reader.errors += diagnostics.getErrors().size();
                reader.warnings += diagnostics.getWarnings().size();
                reader.bIsCompliant = false;
            }
            // Require one or more records to be present.
            if (reader.records == 0) {
                reader.diagnostics.addError(new Diagnosis(DiagnosisType.ERROR_EXPECTED, "ARC file", "One or more records"));
                ++reader.errors;
                reader.bIsCompliant = false;
            }
        }
        return record;
View Full Code Here

     * @param type diagnosis type
     * @param entity entity examined
     * @param information optional extra information
     */
    protected void addErrorDiagnosis(DiagnosisType type, String entity, String... information) {
        diagnostics.addError(new Diagnosis(type, entity, information));
    }
View Full Code Here

                if (httpHeader != null) {
                    if (httpHeader.isValid()) {
                        payload.setPayloadHeaderWrapped(httpHeader);
                    } else {
                        diagnostics.addError(
                                new Diagnosis(DiagnosisType.ERROR,
                                        "http header",
                                        "Unable to parse http header!"));
                    }
                }
            }
        } else if (HttpHeader.isSupported(header.urlScheme)) {
            // Never! -> && !ArcConstants.CONTENT_TYPE_NO_TYPE.equals(header.contentTypeStr)
            diagnostics.addError(new Diagnosis(DiagnosisType.ERROR_EXPECTED,
                    ArcConstants.ARC_FILE,
                    "Expected payload not found in the record block"));
        }
        return;
    }
View Full Code Here

     * Errors and/or warnings are reported on the diagnostics object.
     */
    protected void validateContentType() {
        if (header.contentType == null) {
            // Version block content-type is required.
            diagnostics.addError(new Diagnosis(DiagnosisType.ERROR_EXPECTED,
                    "'" + ArcConstants.FN_CONTENT_TYPE + "' value",
                    ArcConstants.CONTENT_TYPE_FORMAT));
        } else if (!ArcConstants.VERSION_BLOCK_CONTENT_TYPE.equals(
                header.contentType.contentType) ||
                !ArcConstants.VERSION_BLOCK_MEDIA_TYPE.equals(header.contentType.mediaType)) {
            // Version block content-type should be equal to "text/plain"
            diagnostics.addWarning(new Diagnosis(DiagnosisType.INVALID_EXPECTED,
                    "'" + ArcConstants.FN_CONTENT_TYPE + "' value",
                    header.contentTypeStr,
                    ArcConstants.CONTENT_TYPE_TEXT_PLAIN));
        }
    }
View Full Code Here

                version = versionHeader.version;
                if (versionHeader.isValid()) {
                    payload.setPayloadHeaderWrapped(versionHeader);
                } else {
                    diagnostics.addError(
                            new Diagnosis(DiagnosisType.ERROR,
                                    ArcConstants.ARC_VERSION_BLOCK,
                                    "Version block is not valid!"));
                }
            }
        } else {
            diagnostics.addError(
                    new Diagnosis(DiagnosisType.INVALID,
                            ArcConstants.ARC_FILE,
                            "VersionBlock length missing!"));
        }
        if (versionHeader != null && versionHeader.isValid()) {
            if (ArcVersion.VERSION_1_1.equals(version)) {
                if ((versionHeader.getRemaining() == 0)) {
                    bHasPseudoEmptyPayload = true;
                    diagnostics.addError(new Diagnosis(DiagnosisType.ERROR_EXPECTED,
                            ArcConstants.ARC_FILE,
                            "Expected metadata payload not found in the version block"));
                }
            } else {
                if (versionHeader.getRemaining() == 0) {
                    bHasPseudoEmptyPayload = true;
                } else {
                    if (!reader.bStrict) {
                        // I'm going on a limb here that IA's ARC writer will
                        // not write in excess of 4GB useless newlines.
                        if (versionHeader.getRemaining() > ArcConstants.ARC_VB_MAX_TRAILING_NEWLINES) {
                            throw new IOException(
                                    "This amount of useless data in the version block is unacceptable!");
                        }
                        ByteArrayOutputStream out_payload = new ByteArrayOutputStream(
                                (int)versionHeader.getRemaining());
                        InputStream in_payload = versionHeader.getPayloadInputStream();
                        int read;
                        byte[] tmpBuf = new byte[1024];
                        while ((read = in_payload.read(tmpBuf)) != -1) {
                            out_payload.write(tmpBuf, 0, read);
                        }
                        in_payload.close();
                        out_payload.close();
                        excessiveMetadata = out_payload.toByteArray();
                        ByteArrayInputStream in_newlines = new ByteArrayInputStream(excessiveMetadata);
                        if (!isValidStreamOfCRLF(in_newlines)) {
                            diagnostics.addError(new Diagnosis(DiagnosisType.UNDESIRED_DATA,
                                    "version block metadata payload",
                                    "Metadata payload must not be present in this version"));
                        } else {
                            bHasPseudoEmptyPayload = true;
                        }
                        in_newlines.close();
                    } else {
                        diagnostics.addError(new Diagnosis(DiagnosisType.UNDESIRED_DATA,
                                "version block metadata payload",
                                "Metadata payload must not be present in this version"));
                    }
                }
            }
View Full Code Here

                gzipEntry.xfl = (short)(headerBytes[8] & 255);
                gzipEntry.os = (short)(headerBytes[9] & 255);
                crc.update(headerBytes);
                if (gzipEntry.magic != GzipConstants.GZIP_MAGIC) {
                    gzipEntry.diagnostics.addError(
                            new Diagnosis(
                                    DiagnosisType.INVALID_EXPECTED,
                                    "Magic Value",
                                    Integer.toHexString(gzipEntry.magic),
                                    Integer.toHexString(GzipConstants.GZIP_MAGIC)
                                )
                            );
                }
                if (gzipEntry.cm != GzipConstants.CM_DEFLATE) {
                    // Currently only the deflate compression method is supported in GZip.
                    gzipEntry.diagnostics.addError(
                            new Diagnosis(
                                    DiagnosisType.INVALID_EXPECTED,
                                    "Compression Method",
                                    Integer.toHexString(gzipEntry.cm),
                                    Integer.toHexString(GzipConstants.CM_DEFLATE)
                                )
                            );
                } else {
                    // Currently only the deflate compression method is supported in GZip.
                    // Check to see whether some xfl reserved bits have been used.
                    if ((gzipEntry.xfl & GzipConstants.DEFLATE_XLF_RESERVED) != 0) {
                        gzipEntry.diagnostics.addWarning(
                                new Diagnosis(
                                        DiagnosisType.RESERVED,
                                        "eXtra FLags",
                                        Integer.toHexString(gzipEntry.xfl & GzipConstants.DEFLATE_XLF_RESERVED)
                                    )
                                );
                    }
                    if ((gzipEntry.xfl & GzipConstants.DEFLATE_XFL_COMPRESSION_MASK) == GzipConstants.DEFLATE_XFL_COMPRESSION_MASK) {
                        gzipEntry.diagnostics.addError(
                                new Diagnosis(
                                        DiagnosisType.INVALID_DATA,
                                        "eXtra FLags",
                                        Integer.toHexString(gzipEntry.xfl & GzipConstants.DEFLATE_XFL_COMPRESSION_MASK)
                                    )
                                );
                    }
                }
                if ((gzipEntry.flg & GzipConstants.FLG_FRESERVED) != 0) {
                    gzipEntry.diagnostics.addWarning(
                            new Diagnosis(
                                    DiagnosisType.RESERVED,
                                    "FLaGs",
                                    Integer.toHexString(gzipEntry.flg & GzipConstants.FLG_FRESERVED)
                                )
                            );
                }
                if (!GzipConstants.osIdxStr.containsKey((int)gzipEntry.os)) {
                    gzipEntry.diagnostics.addWarning(
                            new Diagnosis(
                                    DiagnosisType.UNKNOWN,
                                    "Operating System",
                                    Integer.toString(gzipEntry.os)
                        )
                    );
                }
                /*
                 * FTEXT.
                 */
                if ((gzipEntry.flg & GzipConstants.FLG_FTEXT) == GzipConstants.FLG_FTEXT) {
                    gzipEntry.bFText = true;
                }
                /*
                 * FEXTRA.
                 */
                if ((gzipEntry.flg & GzipConstants.FLG_FEXTRA) == GzipConstants.FLG_FEXTRA) {
                    read = pbin.read(xlenBytes);
                    if (read == 2) {
                        gzipEntry.xlen = ((xlenBytes[1] & 255) << 8) | (xlenBytes[0] & 255);
                        if (gzipEntry.xlen > 0) {
                            gzipEntry.extraBytes = new byte[gzipEntry.xlen];
                            read = pbin.readFully(gzipEntry.extraBytes);
                            if (read != gzipEntry.xlen) {
                                throw new EOFException("Unexpected EOF!");
                            }
                            int idx = 0;
                            boolean b = true;
                            GzipExtraData extraData;
                            int len;
                            while (b) {
                                if (idx <= gzipEntry.extraBytes.length - 4) {
                                    extraData = new GzipExtraData();
                                    extraData.si1 = (byte)(gzipEntry.extraBytes[idx++] & 255);
                                    extraData.si2 = (byte)(gzipEntry.extraBytes[idx++] & 255);
                                    len = ((gzipEntry.extraBytes[idx+1] & 255) << 8) | (gzipEntry.extraBytes[idx] & 255);
                                    idx += 2;
                                    if (idx + len <= gzipEntry.extraBytes.length) {
                                        extraData.data = new byte[len];
                                        System.arraycopy(gzipEntry.extraBytes, idx, extraData.data, 0, len);
                                        idx += len;
                                        gzipEntry.extraData.add(extraData);
                                    } else {
                                        b = false;
                                    }
                                } else {
                                    b = false;
                                }
                            }
                            if (idx != gzipEntry.extraBytes.length) {
                                gzipEntry.diagnostics.addError(
                                        new Diagnosis(
                                                DiagnosisType.INVALID_DATA,
                                                "FEXTRA",
                                                "Invalid structure",
                                                "Data truncated"
                                            )
                                        );
                            }
                        } else {
                            gzipEntry.extraBytes = new byte[0];
                        }
                    } else {
                        throw new EOFException("Unexpected EOF!");
                    }
                    crc.update(xlenBytes);
                    crc.update(gzipEntry.extraBytes);
                }
                /*
                 * FNAME.
                 */
                if ((gzipEntry.flg & GzipConstants.FLG_FNAME) == GzipConstants.FLG_FNAME) {
                    fnameBytes = readZeroTerminated();
                    if (fnameBytes == null) {
                        throw new EOFException("Unexpected EOF!");
                    }
                    if (!iso8859_1.decode(fnameBytes, "")) {
                        gzipEntry.diagnostics.addWarning(
                                new Diagnosis(
                                        DiagnosisType.INVALID_ENCODING,
                                        "FName",
                                        iso8859_1.decoded,
                                        "ISO-8859-1"
                                    )
                                );
                    }
                    gzipEntry.fname = iso8859_1.decoded;
                    crc.update(fnameBytes);
                    crc.update(0);
                }
                /*
                 * FCOMMENT.
                 */
                if ((gzipEntry.flg & GzipConstants.FLG_FCOMMENT) == GzipConstants.FLG_FCOMMENT) {
                    fcommentBytes = readZeroTerminated();
                    if (fcommentBytes == null) {
                        throw new EOFException("Unexpected EOF!");
                    }
                    if (!iso8859_1.decode(fcommentBytes, "\n")) {
                        gzipEntry.diagnostics.addWarning(
                                new Diagnosis(
                                        DiagnosisType.INVALID_ENCODING,
                                        "FComment",
                                        iso8859_1.decoded,
                                        "ISO-8859-1"
                                    )
                                );
                    }
                    gzipEntry.fcomment = iso8859_1.decoded;
                    crc.update(fcommentBytes);
                    crc.update(0);
                }
                /*
                 * FHCRC.
                 */
                if ((gzipEntry.flg & GzipConstants.FLG_FHCRC) == GzipConstants.FLG_FHCRC) {
                    read = pbin.read(crc16Bytes);
                    if (read == 2) {
                        gzipEntry.bFhCrc = true;
                        gzipEntry.crc16 = ((crc16Bytes[1] & 255) << 8) | (crc16Bytes[0] & 255);
                    } else {
                        throw new EOFException("Unexpected EOF!");
                    }
                }
                /*
                 * Computed crc16.
                 */
                gzipEntry.comp_crc16 = ((int)crc.getValue()) & 0x0000ffff;
                crc.reset();
                if (gzipEntry.crc16 != null && gzipEntry.crc16 != gzipEntry.comp_crc16) {
                    gzipEntry.diagnostics.addError(
                            new Diagnosis(
                                    DiagnosisType.INVALID_EXPECTED,
                                    "CRC16",
                                    Integer.toHexString(gzipEntry.crc16),
                                    Integer.toHexString(gzipEntry.comp_crc16)
                                )
                            );
                }
                /*
                 * Prepare Entry InputStream.
                 */
                lastInput = 0;
                gzipEntry.in = new GzipEntryInputStream(this, gzipEntry);
                // Compliance
                if (gzipEntry.diagnostics.hasErrors() || gzipEntry.diagnostics.hasWarnings()) {
                    gzipEntry.bIsCompliant = false;
                } else {
                    gzipEntry.bIsCompliant = true;
                }
                bIsCompliant &= gzipEntry.bIsCompliant;
                ++entries;
            } catch (EOFException e) {
                partialEntry = gzipEntry;
                diagnostics.addError(new Diagnosis(DiagnosisType.INVALID_DATA, "GZip file", "Unexpected EOF!"));
                bIsCompliant = false;
                gzipEntry = null;
            }
        } else {
            // Require one or more entries to be present.
            if (entries == 0) {
                diagnostics.addError(new Diagnosis(DiagnosisType.ERROR_EXPECTED, "GZip file", "One or more records"));
                bIsCompliant = false;
            }
            if (pbin.read() != -1) {
                diagnostics.addError(new Diagnosis(DiagnosisType.INVALID_DATA, "GZip file", "Unexpected trailing data!"));
                bIsCompliant = false;
            }
        }
        return gzipEntry;
    }
View Full Code Here

            entry.isize = ((trailerBytes[7] & 255) << 24) | ((trailerBytes[6] & 255) << 16) | ((trailerBytes[5] & 255) << 8) | (trailerBytes[4] & 255);
            entry.comp_crc32 = (int)(crc.getValue() & 0xffffffff);
            entry.comp_isize = (int)(inf.getBytesWritten() & 0xffffffff);
            if (entry.comp_crc32 != entry.crc32) {
                entry.diagnostics.addError(
                        new Diagnosis(
                                DiagnosisType.INVALID_EXPECTED,
                                "CRC32",
                                Integer.toHexString(entry.crc32),
                                Integer.toHexString(entry.comp_crc32)
                            )
                        );
            }
            if (entry.comp_isize != entry.isize) {
                entry.diagnostics.addError(
                        new Diagnosis(
                                DiagnosisType.INVALID_EXPECTED,
                                "ISize",
                                Long.toString(entry.isize),
                                Long.toString(entry.comp_isize)
                            )
                        );
            }
        } else {
            gzipEntry.diagnostics.addError(new Diagnosis(DiagnosisType.INVALID_DATA, "GZip file", "Unexpected EOF!"));
            bIsCompliant = false;
        }
        // Compliance
        if (gzipEntry.diagnostics.hasErrors() || gzipEntry.diagnostics.hasWarnings()) {
            gzipEntry.bIsCompliant = false;
View Full Code Here

        int inflated = 0;
        while((inflated = inf.inflate(b, off, len)) == 0) {
            if (inf.finished()) {
                return -1;
            } else if (inf.needsDictionary()) {
                gzipEntry.diagnostics.addError(new Diagnosis(DiagnosisType.INVALID_DATA, "GZip file", "Unexpected EOF!"));
                bIsCompliant = false;
                throw new DataFormatException("Dictionary needed!");
            } else if (inf.needsInput()) {
                lastInput = pbin.read(inputBytes, 0, inputBytes.length);
                if (lastInput == -1) {
                    gzipEntry.diagnostics.addError(new Diagnosis(DiagnosisType.INVALID_DATA, "GZip file", "Unexpected EOF!"));
                    bIsCompliant = false;
                    throw new DataFormatException("Data missing!");
                }
                inf.setInput(inputBytes, 0, lastInput);
            } else {
                gzipEntry.diagnostics.addError(new Diagnosis(DiagnosisType.INVALID_DATA, "GZip file", "Unexpected EOF!"));
                bIsCompliant = false;
                throw new DataFormatException("Inflater malfunction!");
            }
        }
        return inflated;
View Full Code Here

TOP

Related Classes of org.jwat.common.Diagnosis

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.