Examples of org.apache.pdfbox.cos.COSStream

org.apache.pdfbox.cos.COSStream
This class represents a stream object in a PDF document. @author Ben Litchfield

    
    private COSStream createContentStream(COSBase contents) throws IOException 
    {
        List<COSStream> contentStreams = createContentStreamList(contents);
        // concatenate streams
        COSStream concatStream = new COSStream(new RandomAccessBuffer());
        OutputStream out = concatStream.createUnfilteredStream();
        for (COSStream contentStream : contentStreams) 
        {
            InputStream in = contentStream.getUnfilteredStream();
            byte[] buf = new byte[2048];
            int n;
            while ((n = in.read(buf)) > 0) 
            {
                out.write(buf, 0, n);
            }
            out.flush();
        }
        out.close();
        concatStream.setFilters(COSName.FLATE_DECODE);
        return concatStream;
    }

View Full Code Here

            resources.getCOSDictionary().setItem(COSName.XOBJECT, dict);
        }
        String xObjectId = MapUtil.getNextUniqueKey( resources.getXObjects(), XOBJECT_PREFIX );


        // wrap the layout content in a BBox and add it to page
        COSStream xobj = contentStream;
        xobj.setItem(COSName.RESOURCES, layoutPage.overlayResources);
        xobj.setItem(COSName.TYPE, COSName.XOBJECT);
        xobj.setItem(COSName.SUBTYPE, COSName.FORM);
        xobj.setInt(COSName.FORMTYPE, 1);
        COSArray matrix = new COSArray();
        matrix.add(COSInteger.get(1));
        matrix.add(COSInteger.get(0));
        matrix.add(COSInteger.get(0));
        matrix.add(COSInteger.get(1));
        matrix.add(COSInteger.get(0));
        matrix.add(COSInteger.get(0));
        xobj.setItem(COSName.MATRIX, matrix);
        COSArray bbox = new COSArray();
        bbox.add(COSInteger.get(0));
        bbox.add(COSInteger.get(0));
        bbox.add(COSInteger.get((int) layoutPage.overlayMediaBox.getWidth()));
        bbox.add(COSInteger.get((int) layoutPage.overlayMediaBox.getHeight()));
        xobj.setItem(COSName.BBOX, bbox);
        dict.setItem(xObjectId, xobj);
        
        return xObjectId;
    }

View Full Code Here

                + xObjectId + " Do Q\nQ\n");
    }


    private COSStream createStream(String content) throws IOException 
    {
        COSStream stream = new COSStream(new RandomAccessBuffer());
        OutputStream out = stream.createUnfilteredStream();
        out.write(content.getBytes("ISO-8859-1"));
        out.close();
        stream.setFilters(COSName.FLATE_DECODE);
        return stream;
    }

View Full Code Here

     *
     * @throws IOException If there is an error reading the stream.
     */
    protected COSStream parseCOSStream( COSDictionary dic, RandomAccess file ) throws IOException
    {
        COSStream stream = new COSStream( dic, file );
        OutputStream out = null;
        try
        {
            String streamString = readString();
            //long streamLength;


            if (!streamString.equals(STREAM_STRING))
            {
                throw new IOException("expected='stream' actual='" + streamString + "'");
            }


            //PDF Ref 3.2.7 A stream must be followed by either
            //a CRLF or LF but nothing else.


            int whitespace = pdfSource.read();


            //see brother_scan_cover.pdf, it adds whitespaces
            //after the stream but before the start of the
            //data, so just read those first
            while (whitespace == 0x20)
            {
                whitespace = pdfSource.read();
            }


            if( whitespace == 0x0D )
            {
                whitespace = pdfSource.read();
                if( whitespace != 0x0A )
                {
                    pdfSource.unread( whitespace );
                    //The spec says this is invalid but it happens in the real
                    //world so we must support it.
                }
            }
            else if (whitespace == 0x0A)
            {
                //that is fine
            }
            else
            {
                //we are in an error.
                //but again we will do a lenient parsing and just assume that everything
                //is fine
                pdfSource.unread( whitespace );
            }


            /*This needs to be dic.getItem because when we are parsing, the underlying object
             * might still be null.
             */
            COSBase streamLength = dic.getItem(COSName.LENGTH);


            //Need to keep track of the
            out = stream.createFilteredStream( streamLength );


            // try to read stream length - even if it is an indirect object
            int length = -1;
            if ( streamLength instanceof COSNumber )
            {
                length = ( (COSNumber) streamLength).intValue();
            }
// commented out next chunk since for the sequentially working PDFParser
// we do not know if length object is redefined later on and the currently
// read indirect object might be obsolete (e.g. not referenced in xref table);
// this would result in reading wrong number of bytes;
// Thus the only reliable information is a direct length. 
// This exclusion shouldn't harm much since in case of indirect objects they will
// typically be defined after the stream object, thus keeping the directly
// provided length will fix most cases
//            else if ( ( streamLength instanceof COSObject ) &&
//                      ( ( (COSObject) streamLength ).getObject() instanceof COSNumber ) )
//            {
//                length = ( (COSNumber) ( (COSObject) streamLength ).getObject() ).intValue();
//            } 
            
            if ( length == -1 )
            {
                // Couldn't determine length from dict: just
                // scan until we find endstream:
                readUntilEndStream( out );
            }
            else
            {
                // Copy length bytes over:
                int left = length;
                while ( left > 0 )
                {
                    final int chunk = Math.min( left, strmBufLen );
                    final int readCount = pdfSource.read( strmBuf, 0, chunk );
                    if ( readCount == -1 )
                    {
                        break;
                    }
                    out.write( strmBuf, 0, readCount );
                    left -= readCount;
                }
                
                // in order to handle broken documents we test if 'endstream' is reached
                // if not, length value possibly was wrong, fall back to scanning for endstream
                
                // fill buffer with next bytes and test for 'endstream' (with leading whitespaces)
                int readCount = pdfSource.read( strmBuf, 0, 20 );
                if ( readCount > 0 )
                {
                    boolean foundEndstream    = false;
                    int     nextEndstreamCIdx = 0;
                    for ( int cIdx = 0; cIdx < readCount; cIdx++ )
                    {
                        final int ch = strmBuf[ cIdx ] & 0xff; 
                        if ( ch == ENDSTREAM[ nextEndstreamCIdx ] )
                        {
                            if ( ++nextEndstreamCIdx >= ENDSTREAM.length )
                            {
                                foundEndstream = true;
                                break;
                            }
                        }
                        else if ( ( nextEndstreamCIdx > 0 ) || ( ! isWhitespace( ch ) ) )
                        {
                            // not found
                            break;
                        }
                    }
                    
                    // push back test bytes
                    pdfSource.unread( strmBuf, 0, readCount );
                    
                    // if 'endstream' was not found fall back to scanning
                    if ( ! foundEndstream )
                    {
                        LOG.warn("Specified stream length " + length 
                                + " is wrong. Fall back to reading stream until 'endstream'.");
                        
                        // push back all read stream bytes
                        // we got a buffered stream wrapper around filteredStream thus first flush to underlying stream
                        out.flush();
                        InputStream writtenStreamBytes = stream.getFilteredStream();
                        ByteArrayOutputStream     bout = new ByteArrayOutputStream( length );
                        
                        while ( ( readCount = writtenStreamBytes.read( strmBuf ) ) >= 0 )
                        {
                            bout.write( strmBuf, 0, readCount );
                        }
                        try
                        {
                            pdfSource.unread( bout.toByteArray() );
                        }
                        catch ( IOException ioe )
                        {
                            throw new WrappedIOException( "Could not push back " + bout.size() + 
                                                          " bytes in order to reparse stream. " +
                                                          "Try increasing push back buffer using system property " +
                                                          PROP_PUSHBACK_SIZE, ioe );
                        }
                        // create new filtered stream
                        out = stream.createFilteredStream( streamLength );
                        // scan until we find endstream:
                        readUntilEndStream( out );
                    }
                }
            }

View Full Code Here

        readObjectNumber();
        readGenerationNumber();
        readPattern(OBJ_MARKER);


        COSDictionary dict = parseCOSDictionary();
        COSStream xrefStream = parseCOSStream(dict, getDocument().getScratchFile());
        parseXrefStream(xrefStream, (int) objByteOffset);


        return dict.getLong(COSName.PREV);
    }

View Full Code Here

                {
                    pdfSource.unread(endObjectKey.getBytes("ISO-8859-1"));
                    pdfSource.unread(' ');
                    if (pb instanceof COSDictionary)
                    {
                        COSStream stream = parseCOSStream((COSDictionary) pb, getDocument().getScratchFile());


                        if (securityHandler != null)
                        {
                            try
                            {

View Full Code Here

     *             with 'endstream' after data read, stream too short etc.
     */
    @Override
    protected COSStream parseCOSStream(COSDictionary dic, RandomAccess file) throws IOException
    {
        final COSStream stream = new COSStream(dic, file);
        OutputStream out = null;
        try
        {
            readString(); // read 'stream'; this was already tested in
                          // parseObjectsDynamically()


            // ---- skip whitespaces before start of data
            // PDF Ref 1.7, chap. 3.2.7:
            // 'stream' should be followed by either a CRLF (0x0d 0x0a) or LF
            // but nothing else.
            {
                int whitespace = pdfSource.read();


                // see brother_scan_cover.pdf, it adds whitespaces
                // after the stream but before the start of the
                // data, so just read those first
                while (whitespace == 0x20)
                {
                    whitespace = pdfSource.read();
                }


                if (whitespace == 0x0D)
                {
                    whitespace = pdfSource.read();
                    if (whitespace != 0x0A)
                    {
                        // the spec says this is invalid but it happens in the
                        // real
                        // world so we must support it
                        pdfSource.unread(whitespace);
                    }
                }
                else if (whitespace != 0x0A)
                {
                    // no whitespace after 'stream'; PDF ref. says 'should' so
                    // that is ok
                    pdfSource.unread(whitespace);
                }
            }


            /*
             * This needs to be dic.getItem because when we are parsing, the underlying object might still be null.
             */
            COSNumber streamLengthObj = getLength(dic.getItem(COSName.LENGTH));
            if (streamLengthObj == null)
            {
                throw new IOException("Missing length for stream.");
            }


            // ---- get output stream to copy data to
            out = stream.createFilteredStream(streamLengthObj);


            long remainBytes = streamLengthObj.longValue();
            int bytesRead = 0;
            boolean unexpectedEndOfStream = false;
            if (remainBytes == 35090)
            {
                // TODO debug system out, to be removed??
                System.out.println();
            }
            while (remainBytes > 0)
            {
                final int readBytes = pdfSource.read(streamCopyBuf, 0,
                        (remainBytes > streamCopyBufLen) ? streamCopyBufLen : (int) remainBytes);
                if (readBytes <= 0)
                {
                    // throw new IOException(
                    // "No more bytes from stream but expected: " + remainBytes
                    // );
                    unexpectedEndOfStream = true;
                    break;
                }
                out.write(streamCopyBuf, 0, readBytes);
                remainBytes -= readBytes;
                bytesRead += readBytes;
            }
            if (unexpectedEndOfStream)
            {
                pdfSource.unread(bytesRead);
                out = stream.createFilteredStream(streamLengthObj);
                readUntilEndStream(out);
            }
            String endStream = readString();
            if (!endStream.equals("endstream"))
            {

View Full Code Here

        }


        if(xobject instanceof PDXObjectForm)
        {
            PDXObjectForm form = (PDXObjectForm)xobject;
            COSStream formContentstream = form.getCOSStream();
            // if there is an optional form matrix, we have to map the form space to the user space
            Matrix matrix = form.getMatrix();
            if (matrix != null) 
            {
                Matrix xobjectCTM = matrix.multiply( context.getGraphicsState().getCurrentTransformationMatrix());

View Full Code Here

            PDPage nextPage = (PDPage)pageIter.next();
            PDStream contentStream = nextPage.getContents();
            currentPageNo++;
            if( contentStream != null )
            {
                COSStream contents = contentStream.getStream();
                processPage( nextPage, contents );
            }
        }
    }

View Full Code Here

        }
    }


    public void validateStreamObject(PreflightContext context, COSObject cObj) throws ValidationException
    {
        COSStream streamObj = (COSStream) cObj.getObject();


        // ---- Check dictionary entries
        // ---- Only the Length entry is mandatory
        // ---- In a PDF/A file, F, FFilter and FDecodeParms are forbidden
        checkDictionaryEntries(context, streamObj);

View Full Code Here

0 1 2 3 4 5 6 7 8 9

TOP

Related Classes of org.apache.pdfbox.cos.COSStream

de.pdf_scrutinizer.exposures.CVE_2009_0658

net.timendum.pdf.Images2HTML

org.apache.padaf.preflight.ExtractStream

org.apache.padaf.preflight.font.CompositeFontValidator

org.apache.padaf.preflight.font.TrueTypeFontValidator

org.apache.padaf.preflight.font.Type1FontValidator

org.apache.padaf.preflight.font.Type3FontValidator

org.apache.padaf.preflight.helpers.GraphicsValidationHelper

org.apache.padaf.preflight.helpers.StreamValidationHelper

org.apache.padaf.preflight.utils.TestCOSUtils

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.