Package org.apache.pdfbox.cos

Source Code of org.apache.pdfbox.cos.COSStream

/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements.  See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License.  You may obtain a copy of the License at
*
*      http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.pdfbox.cos;

import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
import java.io.ByteArrayInputStream;
import java.io.Closeable;
import java.io.File;
import java.io.InputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.util.List;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.pdfbox.filter.DecodeResult;
import org.apache.pdfbox.filter.Filter;
import org.apache.pdfbox.filter.FilterFactory;
import org.apache.pdfbox.io.IOUtils;
import org.apache.pdfbox.io.RandomAccess;
import org.apache.pdfbox.io.RandomAccessBuffer;
import org.apache.pdfbox.io.RandomAccessFile;
import org.apache.pdfbox.io.RandomAccessFileInputStream;
import org.apache.pdfbox.io.RandomAccessFileOutputStream;
import org.apache.pdfbox.pdfparser.PDFStreamParser;

/**
* This class represents a stream object in a PDF document.
*
* @author Ben Litchfield
*/
public class COSStream extends COSDictionary implements Closeable
{
    /**
     * Log instance.
     */
    private static final Log LOG = LogFactory.getLog(COSStream.class);

    private static final int BUFFER_SIZE=16384;

    /**
     * internal buffer, either held in memory or within a scratch file.
     */
    private RandomAccess buffer;
    /**
     * The stream with all of the filters applied.
     */
    private RandomAccessFileOutputStream filteredStream;

    /**
     * The stream with no filters, this contains the useful data.
     */
    private RandomAccessFileOutputStream unFilteredStream;
    private DecodeResult decodeResult;

    /**
     * Constructor.  Creates a new stream with an empty dictionary.
     *
     */
    public COSStream( )
    {
        this(false, null);
    }

    /**
     * Constructor.
     *
     * @param dictionary The dictionary that is associated with this stream.
     *
     */
    public COSStream( COSDictionary dictionary )
    {
        this(dictionary, false, null);
    }

    /**
     * Constructor.  Creates a new stream with an empty dictionary.
     *
     * @param useScratchFiles enables the usage of a scratch file if set to true
     * @param scratchDirectory directory to be used to create the scratch file. If null java.io.temp is used instead.
     *    
     */
    public COSStream( boolean useScratchFiles, File scratchDirectory )
    {
        super();
        if (useScratchFiles)
        {
            createScratchFile(scratchDirectory);
        }
        if (buffer == null)
        {
            buffer = new RandomAccessBuffer();
        }
    }

    /**
     * Constructor.
     *
     * @param dictionary The dictionary that is associated with this stream.
     * @param useScratchFiles enables the usage of a scratch file if set to true
     * @param scratchDirectory directory to be used to create the scratch file. If null java.io.temp is used instead.
     *
     */
    public COSStream( COSDictionary dictionary, boolean useScratchFiles, File scratchDirectory  )
    {
        super( dictionary );
        if (useScratchFiles)
        {
            createScratchFile(scratchDirectory);
        }
        if (buffer == null)
        {
            buffer = new RandomAccessBuffer();
        }
    }

    /**
     * Create a scratch file to be used as buffer to decrease memory foot print.
     *
     * @param scratchDirectory directory to be used to create the scratch file. If null java.io.temp is used instead.
     *
     */
    private void createScratchFile(File scratchDirectory)
    {
        try
        {
            File scratchFile = File.createTempFile("PDFBox", null, scratchDirectory);
            // mark scratch file to deleted automatically after usage
            scratchFile.deleteOnExit();
            buffer = new RandomAccessFile(scratchFile, "rw");
        }
        catch (IOException exception)
        {
            LOG.error("Can't create temp file, using memory buffer instead", exception);
        }
    }

    /**
     * This will get all the tokens in the stream.
     *
     * @return All of the tokens in the stream.
     *
     * @throws IOException If there is an error parsing the stream.
     */
    public List<Object> getStreamTokens() throws IOException
    {
        PDFStreamParser parser = new PDFStreamParser( this );
        parser.parse();
        return parser.getTokens();
    }

    /**
     * This will get the stream with all of the filters applied.
     *
     * @return the bytes of the physical (encoded) stream
     *
     * @throws IOException when encoding/decoding causes an exception
     */
    public InputStream getFilteredStream() throws IOException
    {
        if( filteredStream == null )
        {
            doEncode();
        }
        long position = filteredStream.getPosition();
        long length = filteredStream.getLengthWritten();

        RandomAccessFileInputStream input =
            new RandomAccessFileInputStream( buffer, position, length );
        return new BufferedInputStream( input, BUFFER_SIZE );
    }

    /**
     * This will get the length of the encoded stream.
     *
     * @return the length of the encoded stream as long
     *
     * @throws IOException
     */
    public long getFilteredLength() throws IOException
    {
        if (filteredStream == null)
        {
            doEncode();
        }
        return filteredStream.getLength();
    }
   
    /**
     * This will set the expected length of the encoded stream. Call this method
     * if the previously set expected length is wrong, to avoid further trouble.
     *
     * @param length the expected length of the encoded stream.
     */
    public void setFilteredLength(long length)
    {
        filteredStream.setExpectedLength(COSInteger.get(length));
    }

    /**
     * This will get the length of the data written in the encoded stream.
     *
     * @return the length of the data written in the encoded stream as long
     *
     * @throws IOException
     */
    public long getFilteredLengthWritten() throws IOException
    {
        if (filteredStream == null)
        {
            doEncode();
        }
        return filteredStream.getLengthWritten();
    }
   

    /**
     * This will get the logical content stream with none of the filters.
     *
     * @return the bytes of the logical (decoded) stream
     *
     * @throws IOException when encoding/decoding causes an exception
     */
    public InputStream getUnfilteredStream() throws IOException
    {
        InputStream retval;
        if( unFilteredStream == null )
        {
            doDecode();
        }

        //if unFilteredStream is still null then this stream has not been
        //created yet, so we should return null.
        if( unFilteredStream != null )
        {
            long position = unFilteredStream.getPosition();
            long length = unFilteredStream.getLengthWritten();
            RandomAccessFileInputStream input =
                new RandomAccessFileInputStream( buffer, position, length );
            retval = new BufferedInputStream( input, BUFFER_SIZE );
        }
        else
        {
            // We should check if the COSStream contains data, maybe it
            // has been created with a RandomAccessFile - which is not
            // necessary empty.
            // In this case, the creation was been done as an input, this should
            // be the unfiltered file, since no filter has been applied yet.
//            if ( (file != null) &&
//                    (file.length() > 0) )
//            {
//                retval = new RandomAccessFileInputStream( file,
//                                                          0,
//                                                          file.length() );
//            }
//            else
//            {
                //if there is no stream data then simply return an empty stream.
                retval = new ByteArrayInputStream( new byte[0] );
//            }
        }
        return retval;
    }

    /**
     * Returns the repaired stream parameters dictionary.
     *
     * @return the repaired stream parameters dictionary
     * @throws IOException when encoding/decoding causes an exception
     */
    public DecodeResult getDecodeResult() throws IOException
    {
        if (unFilteredStream == null)
        {
            doDecode();
        }

        if (unFilteredStream == null || decodeResult == null)
        {
            throw new IOException("Stream was not read");
        }
        else
        {
            return decodeResult;
        }
    }

    @Override
    public Object accept(ICOSVisitor visitor) throws IOException
    {
        return visitor.visitFromStream(this);
    }

    /**
     * This will decode the physical byte stream applying all of the filters to the stream.
     *
     * @throws IOException If there is an error applying a filter to the stream.
     */
    private void doDecode() throws IOException
    {
// FIXME: We shouldn't keep the same reference?
        unFilteredStream = filteredStream;

        COSBase filters = getFilters();
        if( filters == null )
        {
            //then do nothing
            decodeResult = DecodeResult.DEFAULT;
        }
        else if( filters instanceof COSName )
        {
            doDecode( (COSName)filters, 0 );
        }
        else if( filters instanceof COSArray )
        {
            COSArray filterArray = (COSArray)filters;
            for( int i=0; i<filterArray.size(); i++ )
            {
                COSName filterName = (COSName)filterArray.get( i );
                doDecode( filterName, i );
            }
        }
        else
        {
            throw new IOException( "Error: Unknown filter type:" + filters );
        }
    }

    /**
     * This will decode applying a single filter on the stream.
     *
     * @param filterName The name of the filter.
     * @param filterIndex The index of the current filter.
     *
     * @throws IOException If there is an error parsing the stream.
     */
    private void doDecode( COSName filterName, int filterIndex ) throws IOException
    {
        Filter filter = FilterFactory.INSTANCE.getFilter( filterName );

        boolean done = false;
        IOException exception = null;
        long position = unFilteredStream.getPosition();
        long length = unFilteredStream.getLength();
        // in case we need it later
        long writtenLength = unFilteredStream.getLengthWritten()

        if (length == 0 && writtenLength == 0)
        {
            //if the length is zero then don't bother trying to decode
            //some filters don't work when attempting to decode
            //with a zero length stream.  See zlib_error_01.pdf
            IOUtils.closeQuietly(unFilteredStream);
            unFilteredStream = new RandomAccessFileOutputStream( buffer );
            done = true;
        }
        else
        {
            //ok this is a simple hack, sometimes we read a couple extra
            //bytes that shouldn't be there, so we encounter an error we will just
            //try again with one less byte.
            for (int tryCount = 0; length > 0 && !done && tryCount < 5; tryCount++)
            {
                InputStream input = null;
                try
                {
                    input = new BufferedInputStream(
                        new RandomAccessFileInputStream( buffer, position, length ), BUFFER_SIZE );
                    IOUtils.closeQuietly(unFilteredStream);
                    unFilteredStream = new RandomAccessFileOutputStream( buffer );
                    decodeResult = filter.decode( input, unFilteredStream, this, filterIndex );
                    done = true;
                }
                catch( IOException io )
                {
                    length--;
                    exception = io;
                }
                finally
                {
                    IOUtils.closeQuietly(input);
                }
            }
            if( !done )
            {
                //if no good stream was found then lets try again but with the
                //length of data that was actually read and not length
                //defined in the dictionary
                length = writtenLength;
                for( int tryCount=0; !done && tryCount<5; tryCount++ )
                {
                    InputStream input = null;
                    try
                    {
                        input = new BufferedInputStream(
                            new RandomAccessFileInputStream( buffer, position, length ), BUFFER_SIZE );
                        IOUtils.closeQuietly(unFilteredStream);
                        unFilteredStream = new RandomAccessFileOutputStream( buffer );
                        decodeResult = filter.decode( input, unFilteredStream, this, filterIndex);
                        done = true;
                    }
                    catch( IOException io )
                    {
                        length--;
                        exception = io;
                    }
                    finally
                    {
                        IOUtils.closeQuietly(input);
                    }
                }
            }
        }
        if( !done )
        {
            throw exception;
        }
    }

    /**
     * This will encode the logical byte stream applying all of the filters to the stream.
     *
     * @throws IOException If there is an error applying a filter to the stream.
     */
    private void doEncode() throws IOException
    {
        filteredStream = unFilteredStream;

        COSBase filters = getFilters();
        if( filters == null )
        {
            //there is no filter to apply
        }
        else if( filters instanceof COSName )
        {
            doEncode( (COSName)filters, 0 );
        }
        else if( filters instanceof COSArray )
        {
            // apply filters in reverse order
            COSArray filterArray = (COSArray)filters;
            for( int i=filterArray.size()-1; i>=0; i-- )
            {
                COSName filterName = (COSName)filterArray.get( i );
                doEncode( filterName, i );
            }
        }
    }

    /**
     * This will encode applying a single filter on the stream.
     *
     * @param filterName The name of the filter.
     * @param filterIndex The index to the filter.
     *
     * @throws IOException If there is an error parsing the stream.
     */
    private void doEncode( COSName filterName, int filterIndex ) throws IOException
    {
        Filter filter = FilterFactory.INSTANCE.getFilter( filterName );

        InputStream input = new BufferedInputStream(
            new RandomAccessFileInputStream( buffer, filteredStream.getPosition(),
                                                   filteredStream.getLength() ), BUFFER_SIZE );
        IOUtils.closeQuietly(filteredStream);
        filteredStream = new RandomAccessFileOutputStream( buffer );
        filter.encode( input, filteredStream, this, filterIndex );
        IOUtils.closeQuietly(input);
    }

    /**
     * This will return the filters to apply to the byte stream.
     * The method will return
     * - null if no filters are to be applied
     * - a COSName if one filter is to be applied
     * - a COSArray containing COSNames if multiple filters are to be applied
     *
     * @return the COSBase object representing the filters
     */
    public COSBase getFilters()
    {
        return getDictionaryObject(COSName.FILTER);
    }

    /**
     * This will create a new stream for which filtered byte should be
     * written to.  You probably don't want this but want to use the
     * createUnfilteredStream, which is used to write raw bytes to.
     *
     * @return A stream that can be written to.
     *
     * @throws IOException If there is an error creating the stream.
     */
    public OutputStream createFilteredStream() throws IOException
    {
        IOUtils.closeQuietly(unFilteredStream);
        unFilteredStream = null;
        IOUtils.closeQuietly(filteredStream);
        filteredStream = new RandomAccessFileOutputStream( buffer );
        return new BufferedOutputStream( filteredStream, BUFFER_SIZE );
    }

    /**
     * This will create a new stream for which filtered byte should be
     * written to.  You probably don't want this but want to use the
     * createUnfilteredStream, which is used to write raw bytes to.
     *
     * @param expectedLength An entry where a length is expected.
     *
     * @return A stream that can be written to.
     *
     * @throws IOException If there is an error creating the stream.
     */
    public OutputStream createFilteredStream( COSBase expectedLength ) throws IOException
    {
        OutputStream out = createFilteredStream();
        filteredStream.setExpectedLength(expectedLength);
        return out;
    }

    /**
     * set the filters to be applied to the stream.
     *
     * @param filters The filters to set on this stream.
     *
     * @throws IOException If there is an error clearing the old filters.
     */
    public void setFilters(COSBase filters) throws IOException
    {
        if (unFilteredStream == null)
        {
            // don't lose stream contents
            doDecode();
        }
        setItem(COSName.FILTER, filters);
        // kill cached filtered streams
        IOUtils.closeQuietly(filteredStream);
        filteredStream = null;
    }

    /**
     * This will create an output stream that can be written to.
     *
     * @return An output stream which raw data bytes should be written to.
     *
     * @throws IOException If there is an error creating the stream.
     */
    public OutputStream createUnfilteredStream() throws IOException
    {
        IOUtils.closeQuietly(filteredStream);
        filteredStream = null;
        IOUtils.closeQuietly(unFilteredStream);
        unFilteredStream = new RandomAccessFileOutputStream( buffer );
        return new BufferedOutputStream( unFilteredStream, BUFFER_SIZE );
    }
   
    @Override
    public void close()
    {
        try
        {
            if (buffer != null)
            {
                buffer.close();
                buffer = null;
            }
        }
        catch (IOException exception)
        {
            LOG.error("Exception occured when closing the file.", exception);
        }
        if (filteredStream != null)
        {
            IOUtils.closeQuietly(filteredStream);
        }
        if (unFilteredStream != null)
        {
            IOUtils.closeQuietly(unFilteredStream);
        }
        clear();
    }
}
TOP

Related Classes of org.apache.pdfbox.cos.COSStream

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.