*
* @throws IOException If there is an error reading the stream.
*/
protected COSStream parseCOSStream( COSDictionary dic, RandomAccess file ) throws IOException
{
COSStream stream = new COSStream( dic, file );
OutputStream out = null;
try
{
String streamString = readString();
//long streamLength;
if (!streamString.equals(STREAM_STRING))
{
throw new IOException("expected='stream' actual='" + streamString + "'");
}
//PDF Ref 3.2.7 A stream must be followed by either
//a CRLF or LF but nothing else.
int whitespace = pdfSource.read();
//see brother_scan_cover.pdf, it adds whitespaces
//after the stream but before the start of the
//data, so just read those first
while (whitespace == 0x20)
{
whitespace = pdfSource.read();
}
if( whitespace == 0x0D )
{
whitespace = pdfSource.read();
if( whitespace != 0x0A )
{
pdfSource.unread( whitespace );
//The spec says this is invalid but it happens in the real
//world so we must support it.
}
}
else if (whitespace == 0x0A)
{
//that is fine
}
else
{
//we are in an error.
//but again we will do a lenient parsing and just assume that everything
//is fine
pdfSource.unread( whitespace );
}
/*This needs to be dic.getItem because when we are parsing, the underlying object
* might still be null.
*/
COSBase streamLength = dic.getItem(COSName.LENGTH);
//Need to keep track of the
out = stream.createFilteredStream( streamLength );
// try to read stream length - even if it is an indirect object
int length = -1;
if ( streamLength instanceof COSNumber )
{
length = ( (COSNumber) streamLength).intValue();
}
// commented out next chunk since for the sequentially working PDFParser
// we do not know if length object is redefined later on and the currently
// read indirect object might be obsolete (e.g. not referenced in xref table);
// this would result in reading wrong number of bytes;
// Thus the only reliable information is a direct length.
// This exclusion shouldn't harm much since in case of indirect objects they will
// typically be defined after the stream object, thus keeping the directly
// provided length will fix most cases
// else if ( ( streamLength instanceof COSObject ) &&
// ( ( (COSObject) streamLength ).getObject() instanceof COSNumber ) )
// {
// length = ( (COSNumber) ( (COSObject) streamLength ).getObject() ).intValue();
// }
if ( length == -1 )
{
// Couldn't determine length from dict: just
// scan until we find endstream:
readUntilEndStream( out );
}
else
{
// Copy length bytes over:
int left = length;
while ( left > 0 )
{
final int chunk = Math.min( left, strmBufLen );
final int readCount = pdfSource.read( strmBuf, 0, chunk );
if ( readCount == -1 )
{
break;
}
out.write( strmBuf, 0, readCount );
left -= readCount;
}
// in order to handle broken documents we test if 'endstream' is reached
// if not, length value possibly was wrong, fall back to scanning for endstream
// fill buffer with next bytes and test for 'endstream' (with leading whitespaces)
int readCount = pdfSource.read( strmBuf, 0, 20 );
if ( readCount > 0 )
{
boolean foundEndstream = false;
int nextEndstreamCIdx = 0;
for ( int cIdx = 0; cIdx < readCount; cIdx++ )
{
final int ch = strmBuf[ cIdx ] & 0xff;
if ( ch == ENDSTREAM[ nextEndstreamCIdx ] )
{
if ( ++nextEndstreamCIdx >= ENDSTREAM.length )
{
foundEndstream = true;
break;
}
}
else if ( ( nextEndstreamCIdx > 0 ) || ( ! isWhitespace( ch ) ) )
{
// not found
break;
}
}
// push back test bytes
pdfSource.unread( strmBuf, 0, readCount );
// if 'endstream' was not found fall back to scanning
if ( ! foundEndstream )
{
LOG.warn("Specified stream length " + length
+ " is wrong. Fall back to reading stream until 'endstream'.");
// push back all read stream bytes
// we got a buffered stream wrapper around filteredStream thus first flush to underlying stream
out.flush();
InputStream writtenStreamBytes = stream.getFilteredStream();
ByteArrayOutputStream bout = new ByteArrayOutputStream( length );
while ( ( readCount = writtenStreamBytes.read( strmBuf ) ) >= 0 )
{
bout.write( strmBuf, 0, readCount );
}
try
{
pdfSource.unread( bout.toByteArray() );
}
catch ( IOException ioe )
{
throw new WrappedIOException( "Could not push back " + bout.size() +
" bytes in order to reparse stream. " +
"Try increasing push back buffer using system property " +
PROP_PUSHBACK_SIZE, ioe );
}
// create new filtered stream
out = stream.createFilteredStream( streamLength );
// scan until we find endstream:
readUntilEndStream( out );
}
}
}