try {
PRTokeniser tokeniser = new PRTokeniser(contentBytes);
PdfContentParser ps = new PdfContentParser(tokeniser);
ArrayList<PdfObject> operands = new ArrayList<PdfObject>();
while (ps.parse(operands).size() > 0){
PdfLiteral operator = (PdfLiteral)operands.get(operands.size()-1);
// special handling for embedded images. If we hit an ID operator, we need
// to skip all content until we reach an EI operator surrounded by whitespace.
// The following algorithm has one potential issue: what if the image stream
// contains <ws>EI<ws> ?
// it sounds like we would have to actually decode the content stream, which
// I'd rather avoid right now.
if ("ID".equals(operator.toString())){
ByteArrayOutputStream baos = new ByteArrayOutputStream();
ByteArrayOutputStream accumulated = new ByteArrayOutputStream();
int ch;
int found = 0;
while ((ch = tokeniser.read()) != -1){
if (found == 0 && PRTokeniser.isWhitespace(ch)){
found++;
accumulated.write(ch);
} else if (found == 1 && ch == 'E'){
found++;
accumulated.write(ch);
} else if (found == 2 && ch == 'I'){
found++;
accumulated.write(ch);
} else if (found == 3 && PRTokeniser.isWhitespace(ch)){
operands = new ArrayList<PdfObject>();
operands.add(new PdfLiteral("ID"));
invokeOperator((PdfLiteral)operands.get(operands.size()-1), operands);
// we should probably eventually do something to make the accumulated image content stream available
operands = new ArrayList<PdfObject>();
operands.add(new PdfLiteral("EI"));
invokeOperator((PdfLiteral)operands.get(operands.size()-1), operands);
break;
} else {
baos.write(accumulated.toByteArray());