Package org.apache.pdfbox.preflight.process

Source Code of org.apache.pdfbox.preflight.process.MetadataValidationProcess

/*****************************************************************************
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied.  See the License for the
* specific language governing permissions and limitations
* under the License.
*
****************************************************************************/

package org.apache.pdfbox.preflight.process;

import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.List;

import org.apache.pdfbox.cos.COSBase;
import org.apache.pdfbox.cos.COSDictionary;
import org.apache.pdfbox.cos.COSDocument;
import org.apache.pdfbox.cos.COSName;
import org.apache.pdfbox.cos.COSObject;
import org.apache.pdfbox.io.IOUtils;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.common.PDStream;
import org.apache.pdfbox.preflight.PreflightConstants;
import org.apache.pdfbox.preflight.PreflightContext;
import org.apache.pdfbox.preflight.ValidationResult.ValidationError;
import org.apache.pdfbox.preflight.exception.ValidationException;
import org.apache.pdfbox.preflight.metadata.PDFAIdentificationValidation;
import org.apache.pdfbox.preflight.metadata.RDFAboutAttributeConcordanceValidation;
import org.apache.pdfbox.preflight.metadata.RDFAboutAttributeConcordanceValidation.DifferentRDFAboutException;
import org.apache.pdfbox.preflight.metadata.SynchronizedMetaDataValidation;
import org.apache.pdfbox.preflight.metadata.XpacketParsingException;
import org.apache.pdfbox.preflight.utils.COSUtils;
import org.apache.xmpbox.XMPMetadata;
import org.apache.xmpbox.xml.DomXmpParser;
import org.apache.xmpbox.xml.XmpParsingException;
import org.apache.xmpbox.xml.XmpParsingException.ErrorType;

public class MetadataValidationProcess extends AbstractProcess
{

    @Override
    public void validate(PreflightContext ctx) throws ValidationException
    {
        try
        {
            PDDocument document = ctx.getDocument();

            byte[] tmp = getXpacket(document.getDocument());
            DomXmpParser builder;
            builder = new DomXmpParser();
            XMPMetadata metadata;
            metadata = builder.parse(tmp);
            ctx.setMetadata(metadata);

            // 6.7.5 no deprecated attribute in xpacket processing instruction
            if (metadata.getXpacketBytes() != null)
            {
                addValidationError(ctx, new ValidationError(PreflightConstants.ERROR_METADATA_XPACKET_DEPRECATED,
                        "bytes attribute is forbidden"));
            }
            if (metadata.getXpacketEncoding() != null)
            {
                addValidationError(ctx, new ValidationError(PreflightConstants.ERROR_METADATA_XPACKET_DEPRECATED,
                        "encoding attribute is forbidden"));
            }

            // Call metadata synchronization checking
            addValidationErrors(ctx,
                    new SynchronizedMetaDataValidation().validateMetadataSynchronization(document, metadata));

            // Call PDF/A Identifier checking
            addValidationErrors(ctx, new PDFAIdentificationValidation().validatePDFAIdentifer(metadata));

            // Call rdf:about checking
            try
            {
                new RDFAboutAttributeConcordanceValidation().validateRDFAboutAttributes(metadata);
            }
            catch (DifferentRDFAboutException e)
            {
                addValidationError(ctx, new ValidationError(
                        PreflightConstants.ERROR_METADATA_RDF_ABOUT_ATTRIBUTE_INEQUAL_VALUE, e.getMessage(), e));
            }

        }
        catch (XpacketParsingException e)
        {
            if (e.getError() != null)
            {
                addValidationError(ctx, e.getError());
            }
            else
            {
                addValidationError(ctx, new ValidationError(PreflightConstants.ERROR_METADATA_MAIN, "Unexpected error", e));
            }
        }
        catch (XmpParsingException e)
        {
            if (e.getErrorType() == ErrorType.NoValueType)
            {
                addValidationError(ctx,
                        new ValidationError(PreflightConstants.ERROR_METADATA_UNKNOWN_VALUETYPE, e.getMessage(), e));
            }
            else if (e.getErrorType() == ErrorType.RequiredProperty)
            {
                addValidationError(ctx,
                        new ValidationError(PreflightConstants.ERROR_METADATA_PROPERTY_MISSING, e.getMessage(), e));
            }
            else if (e.getErrorType() == ErrorType.InvalidPrefix)
            {
                addValidationError(ctx, new ValidationError(
                        PreflightConstants.ERROR_METADATA_ABSENT_DESCRIPTION_SCHEMA, e.getMessage(), e));
            }
            else if (e.getErrorType() == ErrorType.InvalidType)
            {
                addValidationError(ctx,
                        new ValidationError(PreflightConstants.ERROR_METADATA_PROPERTY_UNKNOWN, e.getMessage(), e));
            }
            else if (e.getErrorType() == ErrorType.XpacketBadEnd)
            {
                throw new ValidationException("Unable to parse font metadata due to : " + e.getMessage(), e);
            }
            else if (e.getErrorType() == ErrorType.NoSchema)
            {
                addValidationError(ctx, new ValidationError(
                        PreflightConstants.ERROR_METADATA_ABSENT_DESCRIPTION_SCHEMA, e.getMessage(), e));
            }
            else if (e.getErrorType() == ErrorType.InvalidPdfaSchema)
            {
                addValidationError(ctx,
                        new ValidationError(PreflightConstants.ERROR_METADATA_WRONG_NS_URI, e.getMessage(), e));
            }
            else
            {
                addValidationError(ctx, new ValidationError(PreflightConstants.ERROR_METADATA_FORMAT, e.getMessage(), e));
            }
        }
        catch (IOException e)
        {
            throw new ValidationException("Failed while validating", e);
        }
    }

    /**
     * Return the xpacket from the dictionary's stream
     */
    public static byte[] getXpacket(COSDocument cdocument) throws IOException, XpacketParsingException
    {
        COSObject catalog = cdocument.getCatalog();
        COSBase cb = catalog.getDictionaryObject(COSName.METADATA);
        if (cb == null)
        {
            // missing Metadata Key in catalog
            ValidationError error = new ValidationError(PreflightConstants.ERROR_METADATA_FORMAT,
                    "Missing Metadata Key in catalog");
            throw new XpacketParsingException("Failed while retrieving xpacket", error);
        }
        // no filter key
        COSDictionary metadataDictionnary = COSUtils.getAsDictionary(cb, cdocument);
        if (metadataDictionnary.getItem(COSName.FILTER) != null)
        {
            // should not be defined
            ValidationError error = new ValidationError(PreflightConstants.ERROR_SYNTAX_STREAM_INVALID_FILTER,
                    "Filter specified in metadata dictionnary");
            throw new XpacketParsingException("Failed while retrieving xpacket", error);
        }

        PDStream stream = PDStream.createFromCOS(metadataDictionnary);
        ByteArrayOutputStream bos = new ByteArrayOutputStream();
        InputStream is = stream.createInputStream();
        IOUtils.copy(is, bos);
        is.close();
        bos.close();
        return bos.toByteArray();
    }

    /**
     * Check if metadata dictionary has no stream filter
     *
     * @param doc the document to check.
     * @return the list of validation errors.
     */
    protected List<ValidationError> checkStreamFilterUsage(PDDocument doc)
    {
        List<ValidationError> ve = new ArrayList<ValidationError>();
        List<?> filters = doc.getDocumentCatalog().getMetadata().getFilters();
        if (filters != null && !filters.isEmpty())
        {
            ve.add(new ValidationError(PreflightConstants.ERROR_METADATA_MAIN,
                    "Using stream filter on metadata dictionary is forbidden"));
        }
        return ve;
    }
}
TOP

Related Classes of org.apache.pdfbox.preflight.process.MetadataValidationProcess

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.