/*****************************************************************************
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*
****************************************************************************/
package org.apache.padaf.preflight.contentstream;
import static org.apache.padaf.preflight.ValidationConstants.DICTIONARY_KEY_RESOURCES;
import static org.apache.padaf.preflight.ValidationConstants.ERROR_FONTS_ENCODING_ERROR;
import static org.apache.padaf.preflight.ValidationConstants.ERROR_FONTS_FONT_FILEX_INVALID;
import static org.apache.padaf.preflight.ValidationConstants.ERROR_FONTS_UNKNOWN_FONT_REF;
import static org.apache.padaf.preflight.ValidationConstants.ERROR_SYNTAX_CONTENT_STREAM_INVALID_ARGUMENT;
import static org.apache.padaf.preflight.ValidationConstants.ERROR_SYNTAX_CONTENT_STREAM_UNSUPPORTED_OP;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import org.apache.padaf.preflight.DocumentHandler;
import org.apache.padaf.preflight.ValidationException;
import org.apache.padaf.preflight.ValidationResult.ValidationError;
import org.apache.padaf.preflight.font.AbstractFontContainer;
import org.apache.padaf.preflight.font.AbstractFontContainer.State;
import org.apache.padaf.preflight.font.GlyphException;
import org.apache.padaf.preflight.utils.ContentStreamEngine;
import org.apache.pdfbox.cos.COSArray;
import org.apache.pdfbox.cos.COSDictionary;
import org.apache.pdfbox.cos.COSFloat;
import org.apache.pdfbox.cos.COSInteger;
import org.apache.pdfbox.cos.COSStream;
import org.apache.pdfbox.cos.COSString;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.PDResources;
import org.apache.pdfbox.pdmodel.common.PDStream;
import org.apache.pdfbox.pdmodel.font.PDFont;
import org.apache.pdfbox.pdmodel.graphics.PDGraphicsState;
import org.apache.pdfbox.pdmodel.graphics.xobject.PDXObjectForm;
import org.apache.pdfbox.pdmodel.text.PDTextState;
import org.apache.pdfbox.util.PDFOperator;
import org.apache.pdfbox.util.operator.OperatorProcessor;
public class ContentStreamWrapper extends ContentStreamEngine {
public ContentStreamWrapper(DocumentHandler handler) {
super(handler);
}
/**
* Process the validation of a PageContent (The page is initialized by the
* constructor)
*
* @return A list of validation error. This list is empty if the validation
* succeed.
* @throws ValidationException.
*/
public List<ValidationError> validPageContentStream(PDPage page)
throws ValidationException {
List<ValidationError> errors = new ArrayList<ValidationError>();
try {
PDStream pstream = page.getContents();
if (pstream != null) {
processStream(page, page.findResources(), pstream.getStream());
}
} catch (ContentStreamException e) {
errors.add(new ValidationError(e.getValidationError(), e.getMessage()));
} catch (IOException e) {
throw new ValidationException("Unable to check the ContentStream : "
+ e.getMessage(), e);
}
return errors;
}
/**
* Process the validation of a XObject Form
*
* @param xobj
* @return A list of validation error. This list is empty if the validation
* succeed.
* @throws ValidationException
*/
public List<ValidationError> validXObjContentStream(PDXObjectForm xobj)
throws ValidationException {
List<ValidationError> errors = new ArrayList<ValidationError>();
try {
resetEnginContext();
processSubStream(null, xobj.getResources(), xobj.getCOSStream());
} catch (ContentStreamException e) {
errors.add(new ValidationError(e.getValidationError(), e.getMessage()));
} catch (IOException e) {
throw new ValidationException("Unable to check the ContentStream : "
+ e.getMessage(), e);
}
return errors;
}
/**
* Process the validation of a Tiling Pattern
*
* @param pattern
* @return A list of validation error. This list is empty if the validation
* succeed.
* @throws ValidationException
*/
public List<ValidationError> validPatternContentStream(COSStream pattern)
throws ValidationException {
List<ValidationError> errors = new ArrayList<ValidationError>();
try {
COSDictionary res = (COSDictionary) pattern
.getDictionaryObject(DICTIONARY_KEY_RESOURCES);
resetEnginContext();
processSubStream(null, new PDResources(res), pattern);
} catch (ContentStreamException e) {
errors.add(new ValidationError(e.getValidationError(), e.getMessage()));
} catch (IOException e) {
throw new ValidationException("Unable to check the ContentStream : "
+ e.getMessage(), e);
}
return errors;
}
public final void resetEnginContext() {
this.setGraphicsState(new PDGraphicsState());
this.setTextMatrix(null);
this.setTextLineMatrix(null);
this.getGraphicsStack().clear();
// this.streamResourcesStack.clear();
}
/*
* (non-Javadoc)
*
* @see
* org.apache.pdfbox.util.PDFStreamEngine#processOperator(org.apache.pdfbox
* .util.PDFOperator, java.util.List)
*/
protected void processOperator(PDFOperator operator, List arguments)
throws IOException {
// ---- Here is a copy of the super method because the else block is
// different. (If the operator is unknown, throw an exception)
String operation = operator.getOperation();
OperatorProcessor processor = (OperatorProcessor) contentStreamEngineOperators.get(operation);
if (processor != null) {
processor.setContext(this);
processor.process(operator, arguments);
} else {
throwContentStreamException("The operator \"" + operation
+ "\" isn't supported.", ERROR_SYNTAX_CONTENT_STREAM_UNSUPPORTED_OP);
}
// --- Process Specific Validation
// --- The Generic Processing is useless for PDFA validation
if ("BI".equals(operation)) {
validImageFilter(operator);
validImageColorSpace(operator);
}
checkShowTextOperators(operator, arguments);
checkColorOperators(operation);
validRenderingIntent(operator, arguments);
checkSetColorSpaceOperators(operator, arguments);
validNumberOfGraphicStates(operator);
}
/**
* Process Text Validation. According to the operator one of the both method
* will be called. (validStringDefinition(PDFOperator operator, List<?>
* arguments) / validStringArray(PDFOperator operator, List<?> arguments))
*
* @param operator
* @param arguments
* @throws ContentStreamException
* @throws IOException
*/
protected void checkShowTextOperators(PDFOperator operator, List<?> arguments)
throws ContentStreamException, IOException {
String op = operator.getOperation();
if ("Tj".equals(op)
|| "'".equals(op)
|| "\"".equals(op)) {
validStringDefinition(operator, arguments);
}
if ("TJ".equals(op)) {
validStringArray(operator, arguments);
}
}
/**
* Process Text Validation for the Operands of a Tj, "'" and "\"" operator.
*
* If the validation fails for an unexpected reason, a IOException is thrown.
* If the validation fails due to validation error, a ContentStreamException
* is thrown. (Use the ValidationError attribute to know the cause)
*
* @param operator
* @param arguments
* @throws ContentStreamException
* @throws IOException
*/
private void validStringDefinition(PDFOperator operator, List<?> arguments)
throws ContentStreamException, IOException {
// ---- For a Text operator, the arguments list should contain only one
// COSString object
if ("\"".equals(operator.getOperation())) {
if (arguments.size() != 3) {
throwContentStreamException("Invalid argument for the operator : "
+ operator.getOperation(),
ERROR_SYNTAX_CONTENT_STREAM_INVALID_ARGUMENT);
}
Object arg0 = arguments.get(0);
Object arg1 = arguments.get(1);
Object arg2 = arguments.get(2);
if (!(arg0 instanceof COSInteger || arg0 instanceof COSFloat) ||
!(arg1 instanceof COSInteger || arg1 instanceof COSFloat) ) {
throwContentStreamException("Invalid argument for the operator : "
+ operator.getOperation(),
ERROR_SYNTAX_CONTENT_STREAM_INVALID_ARGUMENT);
}
if (arg2 instanceof COSString) {
validText(((COSString) arg2).getBytes());
} else {
throwContentStreamException("Invalid argument for the operator : "
+ operator.getOperation(),
ERROR_SYNTAX_CONTENT_STREAM_INVALID_ARGUMENT);
}
} else {
Object objStr = arguments.get(0);
if (objStr instanceof COSString) {
validText(((COSString) objStr).getBytes());
} else if (!(objStr instanceof COSInteger)) {
throwContentStreamException("Invalid argument for the operator : "
+ operator.getOperation(),
ERROR_SYNTAX_CONTENT_STREAM_INVALID_ARGUMENT);
}
}
}
/**
* Process Text Validation for the Operands of a TJ operator.
*
* If the validation fails for an unexpected reason, a IOException is thrown.
* If the validation fails due to validation error, a ContentStreamException
* is thrown. (Use the ValidationError attribute to know the cause)
*
* @param operator
* @param arguments
* @throws ContentStreamException
* @throws IOException
*/
private void validStringArray(PDFOperator operator, List<?> arguments)
throws ContentStreamException, IOException {
for (Object object : arguments) {
if (object instanceof COSArray) {
validStringArray(operator, ((COSArray) object).toList());
} else if (object instanceof COSString) {
validText(((COSString) object).getBytes());
} else if (!(object instanceof COSInteger || object instanceof COSFloat)) {
throwContentStreamException("Invalid argument for the operator : "
+ operator.getOperation(),
ERROR_SYNTAX_CONTENT_STREAM_INVALID_ARGUMENT);
}
}
}
/**
* Process the validation of a Text operand contains in a ContentStream This
* validation checks that :
* <UL>
* <li>The font isn't missing if the Rendering Mode isn't 3
* <li>The font metrics are consistent
* <li>All character used in the text are defined in the font program.
* </UL>
*
* @param string
* @throws IOException
*/
public void validText(byte[] string) throws IOException {
// --- TextSize accessible through the TextState
PDTextState textState = getGraphicsState().getTextState();
final int renderingMode = textState.getRenderingMode();
final PDFont font = textState.getFont();
if (font == null) {
// ---- Unable to decode the Text without Font
throwContentStreamException("Text operator can't be process without Font", ERROR_FONTS_UNKNOWN_FONT_REF);
}
// FontContainer fontContainer = documentHandler.retrieveFontContainer(font);
AbstractFontContainer fontContainer = documentHandler.getFont(font.getCOSObject());
if (fontContainer != null && fontContainer.isValid() == State.INVALID) {
return;
}
if (renderingMode == 3 && (fontContainer == null || !fontContainer.isFontProgramEmbedded())) {
// font not embedded and rendering mode is 3. Valid case and nothing to check
return ;
}
if (fontContainer == null) {
// ---- Font Must be embedded if the RenderingMode isn't 3
throwContentStreamException(font.getBaseFont() + " is unknown wasn't found by the FontHelperValdiator", ERROR_FONTS_UNKNOWN_FONT_REF);
}
if (!fontContainer.isFontProgramEmbedded()) {
throwContentStreamException(font.getBaseFont() + " isn't embedded and the rendering mode isn't 3", ERROR_FONTS_FONT_FILEX_INVALID);
}
int codeLength = 1;
for (int i = 0; i < string.length; i += codeLength) {
int cid = -1;
codeLength = 1;
try {
cid = font.encodeToCID(string, i, codeLength);
if (cid == -1 && i + 1 < string.length) {
// maybe a multibyte encoding
codeLength++;
cid = font.encodeToCID(string, i, codeLength);
}
} catch (IOException e) {
throwContentStreamException("Encoding can't interpret the character code", ERROR_FONTS_ENCODING_ERROR);
}
try {
fontContainer.checkCID(cid);
} catch (GlyphException e) {
if (renderingMode != 3) {
throwContentStreamException(e.getMessage(), e.getErrorCode());
}
}
}
}
}