Package com.sun.pdfview

Source Code of com.sun.pdfview.PDFFile

/*
* Copyright 2004 Sun Microsystems, Inc., 4150 Network Circle,
* Santa Clara, California 95054, U.S.A. All rights reserved.
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
*/
package com.sun.pdfview;

import java.awt.geom.Rectangle2D;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.StringTokenizer;

import com.sun.pdfview.action.GoToAction;
import com.sun.pdfview.action.PDFAction;
import com.sun.pdfview.annotation.PDFAnnotation;
import com.sun.pdfview.decrypt.EncryptionUnsupportedByPlatformException;
import com.sun.pdfview.decrypt.EncryptionUnsupportedByProductException;
import com.sun.pdfview.decrypt.IdentityDecrypter;
import com.sun.pdfview.decrypt.PDFAuthenticationFailureException;
import com.sun.pdfview.decrypt.PDFDecrypter;
import com.sun.pdfview.decrypt.PDFDecrypterFactory;
import com.sun.pdfview.decrypt.PDFPassword;
import com.sun.pdfview.decrypt.UnsupportedEncryptionException;

/**
* An encapsulation of a .pdf file.  The methods of this class
* can parse the contents of a PDF file, but those methods are
* hidden.  Instead, the public methods of this class allow
* access to the pages in the PDF file.  Typically, you create
* a new PDFFile, ask it for the number of pages, and then
* request one or more PDFPages.
* @author Mike Wessler
*/
public class PDFFile {

    public final static int             NUL_CHAR = 0;
    public final static int             FF_CHAR = 12;

    private String versionString = "1.1";
    private int majorVersion = 1;
    private int minorVersion = 1;
    /** the end of line character */
    /** the comment text to begin the file to determine it's version */
    private final static String VERSION_COMMENT = "%PDF-";
    /**
     * A ByteBuffer containing the file data
     */
    ByteBuffer buf;
    /**
     * the cross reference table mapping object numbers to locations
     * in the PDF file
     */
    PDFXref[] objIdx;
    /** the root PDFObject, as specified in the PDF file */
    PDFObject root = null;
    /** the Encrypt PDFObject, from the trailer */
    PDFObject encrypt = null;

    /** The Info PDFPbject, from the trailer, for simple metadata */
    PDFObject info = null;

    /** a mapping of page numbers to parsed PDF commands */
    Cache cache;
    /**
     * whether the file is printable or not (trailer -> Encrypt -> P & 0x4)
     */
    private boolean printable = true;
    /**
     * whether the file is saveable or not (trailer -> Encrypt -> P & 0x10)
     */
    private boolean saveable = true;

    /**
     * The default decrypter for streams and strings. By default, no
     * encryption is expected, and thus the IdentityDecrypter is used.
     */
    private PDFDecrypter defaultDecrypter = IdentityDecrypter.getInstance();

    /**
     * get a PDFFile from a .pdf file.  The file must me a random access file
     * at the moment.  It should really be a file mapping from the nio package.
     * <p>
     * Use the getPage(...) methods to get a page from the PDF file.
     * @param buf the RandomAccessFile containing the PDF.
     * @throws IOException if there's a problem reading from the buffer
     * @throws PDFParseException if the document appears to be malformed, or
     *  its features are unsupported. If the file is encrypted in a manner that
     *  the product or platform does not support then the exception's {@link
     *  PDFParseException#getCause() cause} will be an instance of {@link
     *  UnsupportedEncryptionException}.
     * @throws PDFAuthenticationFailureException if the file is password
     *  protected and requires a password
     */
    public PDFFile(ByteBuffer buf) throws IOException {
  this(buf, null);
    }

    public PDFFile(ByteBuffer buf, boolean doNotParse) throws IOException {
      this.buf = buf;
    }
   
    /**
     * get a PDFFile from a .pdf file.  The file must me a random access file
     * at the moment.  It should really be a file mapping from the nio package.
     * <p>
     * Use the getPage(...) methods to get a page from the PDF file.
     * @param buf the RandomAccessFile containing the PDF.
     * @param password the user or owner password
     * @throws IOException if there's a problem reading from the buffer
     * @throws PDFParseException if the document appears to be malformed, or
     *  its features are unsupported. If the file is encrypted in a manner that
     *  the product or platform does not support then the exception's {@link
     *  PDFParseException#getCause() cause} will be an instance of {@link
     *  UnsupportedEncryptionException}.
     * @throws PDFAuthenticationFailureException if the file is password
     *  protected and the supplied password does not decrypt the document
     */
    public PDFFile(ByteBuffer buf, PDFPassword password) throws IOException {
        this.buf = buf;

        this.cache = new Cache();

        parseFile(password);
    }

    /**
     * Gets whether the owner of the file has given permission to print
     * the file.
     * @return true if it is okay to print the file
     */
    public boolean isPrintable() {
        return this.printable;
    }

    /**
     * Gets whether the owner of the file has given permission to save
     * a copy of the file.
     * @return true if it is okay to save the file
     */
    public boolean isSaveable() {
        return this.saveable;
    }

    /**
     * get the root PDFObject of this PDFFile.  You generally shouldn't need
     * this, but we've left it open in case you want to go spelunking.
     */
    public PDFObject getRoot() {
        return this.root;
    }

    /**
     * return the number of pages in this PDFFile.  The pages will be
     * numbered from 1 to getNumPages(), inclusive.
     */
    public int getNumPages() {
        try {
            return this.root.getDictRef("Pages").getDictRef("Count").getIntValue();
        } catch (Exception ioe) {
            return 0;
        }
    }

    /**
     * Get metadata (e.g., Author, Title, Creator) from the Info dictionary
     * as a string.
     * @param name the name of the metadata key (e.g., Author)
     * @return the info
     * @throws IOException if the metadata cannot be read
     */
    public String getStringMetadata(String name)
            throws IOException {
        if (this.info != null) {
            final PDFObject meta = this.info.getDictRef(name);
            return meta != null ? meta.getTextStringValue() : null;
        } else {
            return null;
        }
    }

    /**
     * Get the keys into the Info metadata, for use with
     * {@link #getStringMetadata(String)}
     * @return the keys present into the Info dictionary
     * @throws IOException if the keys cannot be read
     */
    public Iterator<String> getMetadataKeys()
            throws IOException {
        if (this.info != null) {
            return this.info.getDictKeys();
        } else {
            return Collections.<String>emptyList().iterator();
        }
    }


    /**
     * Used internally to track down PDFObject references.  You should never
     * need to call this.
     * <p>
     * Since this is the only public method for tracking down PDF objects,
     * it is synchronized.  This means that the PDFFile can only hunt down
     * one object at a time, preventing the file's location from getting
     * messed around.
     * <p>
     * This call stores the current buffer position before any changes are made
     * and restores it afterwards, so callers need not know that the position
     * has changed.
     *
     */
    public synchronized PDFObject dereference(PDFXref ref, PDFDecrypter decrypter)
            throws IOException {
        int id = ref.getID();

        // make sure the id is valid and has been read
        if (id >= this.objIdx.length || this.objIdx[id] == null) {
            return PDFObject.nullObj;
        }

        // check to see if this is already dereferenced
        PDFObject obj = this.objIdx[id].getObject();
        if (obj != null) {
            return obj;
        }

        // store the current position in the buffer
        int startPos = this.buf.position();

        boolean compressed = this.objIdx[id].getCompressed();
        if (!compressed) {
          int loc = this.objIdx[id].getFilePos();
          if (loc < 0) {
              return PDFObject.nullObj;
          }
 
          // move to where this object is
          this.buf.position(loc);
 
          // read the object and cache the reference
          obj= readObject(ref.getID(), ref.getGeneration(), decrypter);
        }
        else { // compressed
          int compId = this.objIdx[id].getID();
          int idx = this.objIdx[id].getIndex();
          if (idx < 0)
              return PDFObject.nullObj;
          PDFXref compRef = new PDFXref(compId, 0);
          PDFObject compObj = dereference(compRef, decrypter);
          int first = compObj.getDictionary().get("First").getIntValue();
          int length = compObj.getDictionary().get("Length").getIntValue();
          int n = compObj.getDictionary().get("N").getIntValue();
          if (idx >= n)
              return PDFObject.nullObj;
          ByteBuffer strm = compObj.getStreamBuffer();
         
          ByteBuffer oldBuf = this.buf;
          this.buf = strm;
          // skip other nums
          for (int i=0; i<idx; i++) {
            PDFObject skip1num= readObject(-1, -1, true, IdentityDecrypter.getInstance());
            PDFObject skip2num= readObject(-1, -1, true, IdentityDecrypter.getInstance());
          }
          PDFObject objNumPO= readObject(-1, -1, true, IdentityDecrypter.getInstance());
          PDFObject offsetPO= readObject(-1, -1, true, IdentityDecrypter.getInstance());
          int objNum = objNumPO.getIntValue();
          int offset = offsetPO.getIntValue();
          if (objNum != id)
              return PDFObject.nullObj;
         
          this.buf.position(first+offset);
          obj= readObject(objNum, 0, IdentityDecrypter.getInstance());
          this.buf = oldBuf;
        }
       
        if (obj == null) {
            obj = PDFObject.nullObj;
        }

        this.objIdx[id].setObject(obj);

        // reset to the previous position
        this.buf.position(startPos);

        return obj;
    }

    /**
     * Is the argument a white space character according to the PDF spec?.
     * ISO Spec 32000-1:2008 - Table 1
     */
    public static boolean isWhiteSpace(int c) {
        if (c == ' ' || c == NUL_CHAR || c == '\t' || c == '\n' || c == '\r' || c == FF_CHAR) return true;
        return false;
      /*switch (c) {
            case NUL_CHAR:  // Null (NULL)
            case '\t':      // Horizontal Tab (HT)
            case '\n':      // Line Feed (LF)
            case FF_CHAR:   // Form Feed (FF)
            case '\r':      // Carriage Return (CR)
            case ' ':       // Space (SP)
                return true;
            default:
                return false;
        }*/
    }

    /**
     * Is the argument a delimiter according to the PDF spec?<p>
     *
     * ISO 32000-1:2008 - Table 2
     *
     * @param c the character to test
     */
    public static boolean isDelimiter(int c) {
        switch (c) {
            case '(':   // LEFT PARENTHESIS
            case ')':   // RIGHT PARENTHESIS
            case '<':   // LESS-THAN-SIGN
            case '>':   // GREATER-THAN-SIGN
            case '[':   // LEFT SQUARE BRACKET
            case ']':   // RIGHT SQUARE BRACKET
            case '{':   // LEFT CURLY BRACKET
            case '}':   // RIGHT CURLY BRACKET
            case '/':   // SOLIDUS
            case '%':   // PERCENT SIGN
                return true;
            default:
                return false;
        }
    }

    /**
     * return true if the character is neither a whitespace or a delimiter.
     *
     * @param c the character to test
     * @return boolean
     */
    public static boolean isRegularCharacter (int c) {
        return !(isWhiteSpace(c) || isDelimiter(c));
    }

    /**
     * read the next object from the file
     * @param objNum the object number of the object containing the object
     *  being read; negative only if the object number is unavailable (e.g., if
     *  reading from the trailer, or reading at the top level, in which
     *  case we can expect to be reading an object description)
     * @param objGen the object generation of the object containing the object
     *  being read; negative only if the objNum is unavailable
     * @param decrypter the decrypter to use
     */
    private PDFObject readObject(
            int objNum, int objGen, PDFDecrypter decrypter) throws IOException {
  return readObject(objNum, objGen, false, decrypter);
    }

    /**
     * read the next object with a special catch for numbers
     * @param numscan if true, don't bother trying to see if a number is
     *  an object reference (used when already in the middle of testing for
     *  an object reference, and not otherwise)
     * @param objNum the object number of the object containing the object
     *  being read; negative only if the object number is unavailable (e.g., if
     *  reading from the trailer, or reading at the top level, in which
     *  case we can expect to be reading an object description)
     * @param objGen the object generation of the object containing the object
     *  being read; negative only if the objNum is unavailable
     * @param decrypter the decrypter to use
     */
    private PDFObject readObject(
            int objNum, int objGen,
            boolean numscan, PDFDecrypter decrypter) throws IOException {
        // skip whitespace
        int c;
        PDFObject obj = null;
        while (obj == null) {
            while (isWhiteSpace(c = this.buf.get())) {
            }
            // check character for special punctuation:
            if (c == '<') {
                // could be start of <hex data>, or start of <<dictionary>>
                c = this.buf.get();
                if (c == '<') {
                    // it's a dictionary
        obj= readDictionary(objNum, objGen, decrypter);
                } else {
                    this.buf.position(this.buf.position() - 1);
        obj= readHexString(objNum, objGen, decrypter);
                }
            } else if (c == '(') {
    obj= readLiteralString(objNum, objGen, decrypter);
            } else if (c == '[') {
                // it's an array
    obj= readArray(objNum, objGen, decrypter);
            } else if (c == '/') {
                // it's a name
                obj = readName();
            } else if (c == '%') {
                // it's a comment
                readLine();
            } else if ((c >= '0' && c <= '9') || c == '-' || c == '+' || c == '.') {
                // it's a number
                obj = readNumber((char) c);
                if (!numscan) {
                    // It could be the start of a reference.
                    // Check to see if there's another number, then "R".
                    //
                    // We can't use mark/reset, since this could be called
                    // from dereference, which already is using a mark
                    int startPos = this.buf.position();

        PDFObject testnum= readObject(-1, -1, true, decrypter);
                    if (testnum != null &&
                            testnum.getType() == PDFObject.NUMBER) {
      PDFObject testR= readObject(-1, -1, true, decrypter);
                        if (testR != null &&
                                testR.getType() == PDFObject.KEYWORD &&
                                testR.getStringValue().equals("R")) {
                            // yup.  it's a reference.
                            PDFXref xref = new PDFXref(obj.getIntValue(),
                                    testnum.getIntValue());
                            // Create a placeholder that will be dereferenced
                            // as needed
                            obj = new PDFObject(this, xref);
                        } else if (testR != null &&
                                testR.getType() == PDFObject.KEYWORD &&
                                testR.getStringValue().equals("obj")) {
                            // it's an object description
          obj= readObjectDescription(
                                    obj.getIntValue(),
                                    testnum.getIntValue(),
                                    decrypter);
                        } else {
                            this.buf.position(startPos);
                        }
                    } else {
                        this.buf.position(startPos);
                    }
                }
            } else if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')) {
                // it's a keyword
                obj = readKeyword((char) c);
            } else {
                // it's probably a closing character.
                // throwback
                this.buf.position(this.buf.position() - 1);
                break;
            }
        }
        return obj;
    }

    /**
     * Get the next non-white space character
     * @param buf the buffer to read from
     * @return the next non-whitespace character
     */
    private int nextNonWhitespaceChar(ByteBuffer buf) {
        int c;
        while (isWhiteSpace(c = buf.get())) {
            // nothing
        }
        return c;
    }

    /**
     * Consume all sequential whitespace from the current buffer position,
     * leaving the buffer positioned at non-whitespace
     * @param buf the buffer to read from
     */
    private void consumeWhitespace(ByteBuffer buf) {
        nextNonWhitespaceChar(buf);
        buf.position(buf.position() - 1);
    }

    /**
     * requires the next few characters (after whitespace) to match the
     * argument.
     * @param match the next few characters after any whitespace that
     * must be in the file
     * @return true if the next characters match; false otherwise.
     */
    private boolean nextItemIs(String match) throws IOException {
        // skip whitespace
        int c = nextNonWhitespaceChar(buf);
        for (int i = 0; i < match.length(); i++) {
            if (i > 0) {
                c = this.buf.get();
            }
            if (c != match.charAt(i)) {
                return false;
            }
        }
        return true;
    }

    /**
     * process a version string, to determine the major and minor versions
     * of the file.
     *
     * @param versionString
     */
    private void processVersion(String versionString) {
        try {
            StringTokenizer tokens = new StringTokenizer(versionString, ".");
            this.majorVersion = Integer.parseInt(tokens.nextToken());
            this.minorVersion = Integer.parseInt(tokens.nextToken());
            this.versionString = versionString;
        } catch (Exception e) {
            // ignore
        }
    }

    /**
     * return the major version of the PDF header.
     *
     * @return int
     */
    public int getMajorVersion() {
        return this.majorVersion;
    }

    /**
     * return the minor version of the PDF header.
     *
     * @return int
     */
    public int getMinorVersion() {
        return this.minorVersion;
    }

    /**
     * return the version string from the PDF header.
     *
     * @return String
     */
    public String getVersionString() {
        return this.versionString;
    }

    /**
     * read an entire &lt;&lt; dictionary &gt;&gt;.  The initial
     * &lt;&lt; has already been read.
     * @param objNum the object number of the object containing the dictionary
     *  being read; negative only if the object number is unavailable, which
     *  should only happen if we're reading a dictionary placed directly
     *  in the trailer
     * @param objGen the object generation of the object containing the object
     *  being read; negative only if the objNum is unavailable
     * @param decrypter the decrypter to use
     * @return the Dictionary as a PDFObject.
     */
    private PDFObject readDictionary(
            int objNum, int objGen, PDFDecrypter decrypter) throws IOException {
        HashMap<String,PDFObject> hm = new HashMap<String,PDFObject>();
        // we've already read the <<.  Now get /Name obj pairs until >>
        PDFObject name;
  while ((name= readObject(objNum, objGen, decrypter))!=null) {
            // make sure first item is a NAME
            if (name.getType() != PDFObject.NAME) {
                throw new PDFParseException("First item in dictionary must be a /Name.  (Was " + name + ")");
            }
      PDFObject value= readObject(objNum, objGen, decrypter);
            if (value != null) {
                hm.put(name.getStringValue(), value);
            }
        }
        //  System.out.println("End of dictionary at location "+raf.getFilePointer());
        if (!nextItemIs(">>")) {
            throw new PDFParseException("End of dictionary wasn't '>>'");
        }
        //  System.out.println("Dictionary closed at location "+raf.getFilePointer());
        return new PDFObject(this, PDFObject.DICTIONARY, hm);
    }

    /**
     * read a character, and return its value as if it were a hexidecimal
     * digit.
     * @return a number between 0 and 15 whose value matches the next
     * hexidecimal character.  Returns -1 if the next character isn't in
     * [0-9a-fA-F]
     */
    private int readHexDigit() throws IOException {
        int a;
        while (isWhiteSpace(a = this.buf.get())) {
        }
        switch (a) {
            case '0': case '1': case '2': case '3': case '4':
            case '5': case '6': case '7': case '8': case '9':
                a -= '0';
                break;
            case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
                a -= 'a' - 10;
                break;
            case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
                a -= 'A' - 10;
                break;
            default:
                a = -1;
                break;
        }
        return a;
    }

    /**
     * return the 8-bit value represented by the next two hex characters.
     * If the next two characters don't represent a hex value, return -1
     * and reset the read head.  If there is only one hex character,
     * return its value as if there were an implicit 0 after it.
     */
    private int readHexPair() throws IOException {
        int first = readHexDigit();
        if (first < 0) {
            this.buf.position(this.buf.position() - 1);
            return -1;
        }
        int second = readHexDigit();
        if (second < 0) {
            this.buf.position(this.buf.position() - 1);
            return (first << 4);
        } else {
            return (first << 4) + second;
        }
    }

    /**
     * read a < hex string >.  The initial < has already been read.
     * @param objNum the object number of the object containing the dictionary
     *  being read; negative only if the object number is unavailable, which
     *  should only happen if we're reading a string placed directly
     *  in the trailer
     * @param objGen the object generation of the object containing the object
     *  being read; negative only if the objNum is unavailable
     * @param decrypter the decrypter to use
     */
    private PDFObject readHexString(
            int objNum, int objGen, PDFDecrypter decrypter) throws IOException {
        // we've already read the <. Now get the hex bytes until >
        int val;
        StringBuffer sb = new StringBuffer();
        while ((val = readHexPair()) >= 0) {
            sb.append((char) val);
        }
        if (this.buf.get() != '>') {
            throw new PDFParseException("Bad character in Hex String");
        }
        return new PDFObject(this, PDFObject.STRING,
                decrypter.decryptString(objNum, objGen, sb.toString()));
    }

    /**
     * <p>read a ( character string ).  The initial ( has already been read.
     * Read until a *balanced* ) appears.</p>
     *
     * <p>Section 3.2.3 of PDF Refernce version 1.7 defines the format of
     * String objects. Regarding literal strings:</p>
     *
     * <blockquote>Within a literal string, the backslash (\) is used as an
     * escape character for various purposes, such as to include newline
     * characters, nonprinting ASCII characters, unbalanced parentheses, or
     * the backslash character itself in the string. The character
     * immediately following the backslash determines its precise
     * interpretation (see Table 3.2). If the character following the
     * backslash is not one of those shown in the table, the backslash
     * is ignored.</blockquote>
     *
     * * <p>This only reads 8 bit basic character 'strings' so as to avoid a
     * text string interpretation when one is not desired (e.g., for byte
     * strings, as used by the decryption mechanism). For an interpretation of
     * a string returned from this method, where the object type is defined
     * as a 'text string' as per Section 3.8.1, Table 3.31 "PDF Data Types",
     * {@link PDFStringUtil#asTextString} ()} or
     * {@link PDFObject#getTextStringValue()} must be employed.</p>
     *
     * @param objNum the object number of the object containing the dictionary
     *  being read; negative only if the object number is unavailable, which
     *  should only happen if we're reading a dictionary placed directly
     *  in the trailer
     * @param objGen the object generation of the object containing the object
     *  being read; negative only if the objNum is unavailable
     * @param decrypter the decrypter to use
     */
    private PDFObject readLiteralString(
            int objNum, int objGen, PDFDecrypter decrypter) throws IOException {
        int c;

        // we've already read the (.  now get the characters until a
        // *balanced* ) appears.  Translate \r \n \t \b \f \( \) \\ \ddd
        // if a cr/lf follows a backslash, ignore the cr/lf
        int parencount = 1;
        StringBuffer sb = new StringBuffer();

        while (parencount > 0) {
            c = this.buf.get() & 0xFF;
            // process unescaped parenthesis
            if (c == '(') {
                parencount++;
            } else if (c == ')') {
                parencount--;
                if (parencount == 0) {
                    c = -1;
                    break;
                }
            } else if (c == '\\') {

                // From the spec:
                // Within a literal string, the backslash (\) is used as an
                // escape character for various purposes, such as to include
                // newline characters, nonprinting ASCII characters,
                // unbalanced parentheses, or the backslash character itself
                // in the string. The character immediately following the
                // backslash determines its precise interpretation (see
                // Table 3.2). If the character following the backslash is not
                // one of those shown in the table, the backslash is ignored.
                //
                // summary of rules:
                //
                // \n \r \t \b \f 2-char sequences are used to represent their
                //  1-char counterparts
                //
                // \( and \) are used to escape parenthesis
                //
                // \\ for a literal backslash
                //
                // \ddd (1-3 octal digits) for a character code
                //
                //  \<EOL> is used to put formatting newlines into the
                //  file, but aren't actually part of the string; EOL may be
                //  CR, LF or CRLF
                //
                // any other sequence should see the backslash ignored

                // grab the next character to see what we're dealing with
                c = this.buf.get() & 0xFF;
                if (c >= '0' && c < '8') {
                    // \ddd form - one to three OCTAL digits
                    int count = 0;
                    int val = 0;
                    while (c >= '0' && c < '8' && count < 3) {
                        val = val * 8 + c - '0';
                        c = this.buf.get() & 0xFF;
                        count++;
                    }
                    // we'll have read one character too many
                    this.buf.position(this.buf.position() - 1);
                    c = val;
                } else if (c == 'n') {
                    c = '\n';
                } else if (c == 'r') {
                    c = '\r';
                } else if (c == 't') {
                    c = '\t';
                } else if (c == 'b') {
                    c = '\b';
                } else if (c == 'f') {
                    c = '\f';
                } else if (c == '\r') {
                    // escaped CR to be ignored; look for a following LF
                    c = this.buf.get() & 0xFF;
                    if (c != '\n') {
                        // not an LF, we'll consume this character on
                        // the next iteration
                        this.buf.position(this.buf.position() - 1);
                    }
                    c = -1;
                } else if (c == '\n') {
                    // escaped LF to be ignored
                    c = -1;
                }
                // any other c should be used as is, as it's either
                // one of ()\ in which case it should be used literally,
                // or the backslash should just be ignored
            }
            if (c >= 0) {
                sb.append((char) c);
            }
        }
        return new PDFObject(this, PDFObject.STRING,
                decrypter.decryptString(objNum, objGen, sb.toString()));
    }

    /**
     * Read a line of text.  This follows the semantics of readLine() in
     * DataInput -- it reads character by character until a '\n' is
     * encountered.  If a '\r' is encountered, it is discarded.
     */
    private String readLine() {
        StringBuffer sb = new StringBuffer();

        while (this.buf.remaining() > 0) {
            char c = (char) this.buf.get();

            if (c == '\r') {
                if (this.buf.remaining() > 0) {
                    char n = (char) this.buf.get(this.buf.position());
                    if (n == '\n') {
                        this.buf.get();
                    }
                }
                break;
            } else if (c == '\n') {
                break;
            }

            sb.append(c);
        }

        return sb.toString();
    }

    /**
     * read an [ array ].  The initial [ has already been read.  PDFObjects
     * are read until ].
     * @param objNum the object number of the object containing the dictionary
     *  being read; negative only if the object number is unavailable, which
     *  should only happen if we're reading an array placed directly
     *  in the trailer
     * @param objGen the object generation of the object containing the object
     *  being read; negative only if the objNum is unavailable
     * @param decrypter the decrypter to use
     */
    private PDFObject readArray(
            int objNum, int objGen, PDFDecrypter decrypter) throws IOException {
        // we've already read the [.  Now read objects until ]
        ArrayList<PDFObject> ary = new ArrayList<PDFObject>();
        PDFObject obj;
  while((obj= readObject(objNum, objGen, decrypter))!=null) {
            ary.add(obj);
        }
        if (this.buf.get() != ']') {
            throw new PDFParseException("Array should end with ']'");
        }
        PDFObject[] objlist = new PDFObject[ary.size()];
        for (int i = 0; i < objlist.length; i++) {
            objlist[i] = ary.get(i);
        }
        return new PDFObject(this, PDFObject.ARRAY, objlist);
    }

    /**
     * read a /name.  The / has already been read.
     */
    private PDFObject readName() throws IOException {
        // we've already read the / that begins the name.
        // all we have to check for is #hh hex notations.
        StringBuffer sb = new StringBuffer();
        int c;
        while (isRegularCharacter(c = this.buf.get())) {
            if (c < '!' && c > '~') {
                break;      // out-of-range, should have been hex
            }
            // H.3.2.4 indicates version 1.1 did not do hex escapes
            if (c == '#' && (this.majorVersion != 1 && this.minorVersion != 1)) {
                int hex = readHexPair();
                if (hex >= 0) {
                    c = hex;
                } else {
                    throw new PDFParseException("Bad #hex in /Name");
                }
            }
            sb.append((char) c);
        }
        this.buf.position(this.buf.position() - 1);
        return new PDFObject(this, PDFObject.NAME, sb.toString());
    }

    /**
     * read a number.  The initial digit or . or - is passed in as the
     * argument.
     */
    private PDFObject readNumber(char start) throws IOException {
        // we've read the first digit (it's passed in as the argument)
        boolean neg = start == '-';
        boolean sawdot = start == '.';
        double dotmult = sawdot ? 0.1 : 1;
        double value = (start >= '0' && start <= '9') ? start - '0' : 0;
        while (true) {
            int c = this.buf.get();
            if (c == '.') {
                if (sawdot) {
                    throw new PDFParseException("Can't have two '.' in a number");
                }
                sawdot = true;
                dotmult = 0.1;
            } else if (c >= '0' && c <= '9') {
                int val = c - '0';
                if (sawdot) {
                    value += val * dotmult;
                    dotmult *= 0.1;
                } else {
                    value = value * 10 + val;
                }
            } else {
                this.buf.position(this.buf.position() - 1);
                break;
            }
        }
        if (neg) {
            value = -value;
        }
        return new PDFObject(this, PDFObject.NUMBER, Double.valueOf(value));
    }

    /**
     * read a bare keyword.  The initial character is passed in as the
     * argument.
     */
    private PDFObject readKeyword(char start) throws IOException {
        // we've read the first character (it's passed in as the argument)
        StringBuffer sb = new StringBuffer(String.valueOf(start));
        int c;
        while (isRegularCharacter(c = this.buf.get())) {
            sb.append((char) c);
        }
        this.buf.position(this.buf.position() - 1);
        return new PDFObject(this, PDFObject.KEYWORD, sb.toString());
    }

    /**
     * read an entire PDFObject.  The intro line, which looks something
     * like "4 0 obj" has already been read.
     * @param objNum the object number of the object being read, being
     *  the first number in the intro line (4 in "4 0 obj")
     * @param objGen the object generation of the object being read, being
     *  the second number in the intro line (0 in "4 0 obj").
     * @param decrypter the decrypter to use
     */
    private PDFObject readObjectDescription(
            int objNum, int objGen, PDFDecrypter decrypter) throws IOException {
        // we've already read the 4 0 obj bit.  Next thing up is the object.
        // object descriptions end with the keyword endobj
        long debugpos = this.buf.position();
        PDFObject obj = readObject(objNum, objGen, decrypter);
        // see if it's a dictionary.  If so, this could be a stream.
        PDFObject endkey = readObject(objNum, objGen, decrypter);
        if (endkey.getType() != PDFObject.KEYWORD && endkey.getType() != PDFObject.STREAM) {
            System.out.println("WARNING: Expected 'stream' or 'endobj' but was " + endkey.getType() + " " + String.valueOf(endkey.getStringValue()));
        }
        if (obj.getType() == PDFObject.DICTIONARY && endkey.getStringValue() != null && endkey.getStringValue().equals("stream")) {
            // skip until we see \n
            readLine();
            ByteBuffer data = readStream(obj);
            if (data == null) {
                data = ByteBuffer.allocate(0);
            }
            obj.setStream(data);
            endkey = readObject(objNum, objGen, decrypter);
        }
        // at this point, obj is the object, keyword should be "endobj"
        String endcheck = endkey.getStringValue();
        if (endcheck == null || !endcheck.equals("endobj")) {
            System.out.println("WARNING: object at " + debugpos + " didn't end with 'endobj'");
        }
        obj.setObjectId(objNum, objGen);
        return obj;
    }

    /**
     * read the stream portion of a PDFObject.  Calls decodeStream to
     * un-filter the stream as necessary.
     *
     * @param dict the dictionary associated with this stream.
     * @return a ByteBuffer with the encoded stream data
     */
    private ByteBuffer readStream(PDFObject dict) throws IOException {
        // pointer is at the start of a stream.  read the stream and
        // decode, based on the entries in the dictionary
        PDFObject lengthObj = dict.getDictRef("Length");
        int length = -1;
        if (lengthObj != null) {
            length = lengthObj.getIntValue();
        }
        if (length < 0) {
            throw new PDFParseException("Unknown length for stream");
        }

        // slice the data
        int start = this.buf.position();
        ByteBuffer streamBuf = this.buf.slice();
        streamBuf.limit(length);

        // move the current position to the end of the data
        this.buf.position(this.buf.position() + length);
        int ending = this.buf.position();

        if (!nextItemIs("endstream")) {
            System.out.println("read " + length + " chars from " + start + " to " +
                    ending);
            throw new PDFParseException("Stream ended inappropriately");
        }

        return streamBuf;
    // now decode stream
    // return PDFDecoder.decodeStream(dict, streamBuf);
    }

    /**
     * read the cross reference table from a PDF file.  When this method
     * is called, the file pointer must point to the start of the word
     * "xref" in the file.  Reads the xref table and the trailer dictionary.
     * If dictionary has a /Prev entry, move file pointer
     * and read new trailer
     * @param password
     */
    private void readTrailer(PDFPassword password)
            throws
            IOException,
            PDFAuthenticationFailureException,
            EncryptionUnsupportedByProductException,
            EncryptionUnsupportedByPlatformException {
        // the table of xrefs
        this.objIdx = new PDFXref[50];

        int pos = this.buf.position();
       
        PDFDecrypter newDefaultDecrypter = null;

        // read a bunch of nested trailer tables
        while (true) {
            // make sure we are looking at an xref table
            if (!nextItemIs("xref")) {
              this.buf.position(pos);
              readTrailer15(password);
              return;
//                throw new PDFParseException("Expected 'xref' at start of table");
            }

            // read a bunch of linked tabled
            while (true) {
                // read until the word "trailer"
    PDFObject obj=readObject(-1, -1, IdentityDecrypter.getInstance());
                if (obj.getType() == PDFObject.KEYWORD &&
                        obj.getStringValue().equals("trailer")) {
                    break;
                }

                // read the starting position of the reference
                if (obj.getType() != PDFObject.NUMBER) {
                    throw new PDFParseException("Expected number for first xref entry");
                }
                int refstart = obj.getIntValue();

                // read the size of the reference table
                obj = readObject(-1, -1, IdentityDecrypter.getInstance());
                if (obj.getType() != PDFObject.NUMBER) {
                    throw new PDFParseException("Expected number for length of xref table");
                }
                int reflen = obj.getIntValue();

                // skip a line
                readLine();

                // extend the objIdx table, if necessary
                if (refstart + reflen >= this.objIdx.length) {
                    PDFXref nobjIdx[] = new PDFXref[refstart + reflen];
                    System.arraycopy(this.objIdx, 0, nobjIdx, 0, this.objIdx.length);
                    this.objIdx = nobjIdx;
                }

                // read reference lines
                for (int refID = refstart; refID < refstart + reflen; refID++) {
                    // each reference line is 20 bytes long
                    byte[] refline = new byte[20];
                    this.buf.get(refline);

                    // ignore this line if the object ID is already defined
                    if (this.objIdx[refID] != null) {
                        continue;
                    }

                    // see if it's an active object
                    if (refline[17] == 'n') {
                        this.objIdx[refID] = new PDFXref(refline);
                    } else {
                        this.objIdx[refID] = new PDFXref(null);
                    }
                }
            }

            // at this point, the "trailer" word (not EOL) has been read.
      PDFObject trailerdict = readObject(-1, -1, IdentityDecrypter.getInstance());
            if (trailerdict.getType() != PDFObject.DICTIONARY) {
                throw new IOException("Expected dictionary after \"trailer\"");
            }

            // read the root object location
            if (this.root == null) {
                this.root = trailerdict.getDictRef("Root");
                if (this.root != null) {
                    this.root.setObjectId(PDFObject.OBJ_NUM_TRAILER,
                            PDFObject.OBJ_NUM_TRAILER);
                }
            }

            // read the encryption information
            if (this.encrypt == null) {
                this.encrypt = trailerdict.getDictRef("Encrypt");
                if (this.encrypt != null) {
                    this.encrypt.setObjectId(PDFObject.OBJ_NUM_TRAILER,
                            PDFObject.OBJ_NUM_TRAILER);
                }
                newDefaultDecrypter =
                        PDFDecrypterFactory.createDecryptor(
                                this.encrypt,
                                trailerdict.getDictRef("ID"),
                                password);
            }


            if (this.info == null) {
                this.info = trailerdict.getDictRef("Info");
                if (this.info != null) {
                    if (!this.info.isIndirect()) {
                        throw new PDFParseException(
                                "Info in trailer must be an indirect reference");
                    }
                    this.info.setObjectId(PDFObject.OBJ_NUM_TRAILER,
                            PDFObject.OBJ_NUM_TRAILER);
                }
            }

            // support for hybrid-PDFs containing an additional compressed-xref-stream
            PDFObject xrefstmPos = trailerdict.getDictRef("XRefStm");
            if (xrefstmPos != null) {
                int pos14 = this.buf.position();
                this.buf.position(xrefstmPos.getIntValue());
              readTrailer15(password);
                this.buf.position(pos14);
            }
           
            // read the location of the previous xref table
            PDFObject prevloc = trailerdict.getDictRef("Prev");
            if (prevloc != null) {
                this.buf.position(prevloc.getIntValue());
            } else {
                break;
            }
            // see if we have an optional Version entry


            if (this.root.getDictRef("Version") != null) {
                processVersion(this.root.getDictRef("Version").getStringValue());
            }
        }

        // make sure we found a root
        if (this.root == null) {
            throw new PDFParseException("No /Root key found in trailer dictionary");
        }

        if (this.encrypt != null && newDefaultDecrypter!=null) {
            PDFObject permissions = this.encrypt.getDictRef("P");
            if (permissions!=null && !newDefaultDecrypter.isOwnerAuthorised()) {
                int perms= permissions != null ? permissions.getIntValue() : 0;
                if (permissions!=null) {
                    this.printable = (perms & 4) != 0;
                    this.saveable = (perms & 16) != 0;
                }
            }
            // Install the new default decrypter only after the trailer has
            // been read, as nothing we're reading passing through is encrypted
            this.defaultDecrypter = newDefaultDecrypter;
        }

        // dereference the root object
        this.root.dereference();
    }

    /**
     * read the cross reference table from a PDF file.  When this method
     * is called, the file pointer must point to the start of the word
     * "xref" in the file.  Reads the xref table and the trailer dictionary.
     * If dictionary has a /Prev entry, move file pointer
     * and read new trailer
     * @param password
     */
    private void readTrailer15(PDFPassword password)
            throws
            IOException,
            PDFAuthenticationFailureException,
            EncryptionUnsupportedByProductException,
            EncryptionUnsupportedByPlatformException {
     
        // the table of xrefs
        // objIdx is initialized from readTrailer(), do not overwrite here data from hybrid PDFs
//        objIdx = new PDFXref[50];
        PDFDecrypter newDefaultDecrypter = null;
       
        while (true) {
      PDFObject xrefObj = readObject(-1, -1, IdentityDecrypter.getInstance());
      PDFObject pdfObject = xrefObj.getDictionary().get("W");
      if (pdfObject == null) {
        break;
      }
      PDFObject[] wNums = pdfObject.getArray();
      int l1 = wNums[0].getIntValue();
      int l2 = wNums[1].getIntValue();
      int l3 = wNums[2].getIntValue();
 
      int size = xrefObj.getDictionary().get("Size").getIntValue();

      byte[] strmbuf = xrefObj.getStream();
      int strmPos = 0;
     
      PDFObject idxNums = xrefObj.getDictionary().get("Index");
      int[] idxArray;
      if (idxNums == null) {
        idxArray = new int[]{0, size};
      }
      else {
        PDFObject[] idxNumArr = idxNums.getArray();
        idxArray = new int[idxNumArr.length];
        for (int i = 0; i < idxNumArr.length; i++) {
          idxArray[i] = idxNumArr[i].getIntValue();
        }
      }
      int idxLen = idxArray.length;
      int idxPos = 0;
 
     
      while (idxPos<idxLen) {
        int refstart = idxArray[idxPos++];
        int reflen = idxArray[idxPos++];
       
            // extend the objIdx table, if necessary
            if (refstart + reflen >= this.objIdx.length) {
                PDFXref nobjIdx[] = new PDFXref[refstart + reflen];
                System.arraycopy(this.objIdx, 0, nobjIdx, 0, this.objIdx.length);
                this.objIdx = nobjIdx;
            }
 
              // read reference lines
              for (int refID = refstart; refID < refstart + reflen; refID++) {
               
          int type = readNum(strmbuf, strmPos, l1);
          strmPos += l1;
          int id = readNum(strmbuf, strmPos, l2);
          strmPos += l2;
          int gen = readNum(strmbuf, strmPos, l3);
          strmPos += l3;
 
                  // ignore this line if the object ID is already defined
                  if (this.objIdx[refID] != null) {
                      continue;
                  }
 
                  // see if it's an active object
                  if (type == 0) { // inactive
                      this.objIdx[refID] = new PDFXref(null);
                  } else if (type == 1) { // active uncompressed
                      this.objIdx[refID] = new PDFXref(id, gen);
                  } else { // active compressed
                      this.objIdx[refID] = new PDFXref(id, gen, true);
                  }
               
        }
      }
 
        HashMap<String, PDFObject> trailerdict = xrefObj.getDictionary();
 
            // read the root object location
            if (this.root == null) {
                this.root = trailerdict.get("Root");
                if (this.root != null) {
                    this.root.setObjectId(PDFObject.OBJ_NUM_TRAILER,
                            PDFObject.OBJ_NUM_TRAILER);
                }
            }

            // read the encryption information
            if (this.encrypt == null) {
                this.encrypt = trailerdict.get("Encrypt");
                if (this.encrypt != null) {
                    this.encrypt.setObjectId(PDFObject.OBJ_NUM_TRAILER,
                            PDFObject.OBJ_NUM_TRAILER);
                }
                newDefaultDecrypter =
                        PDFDecrypterFactory.createDecryptor(
                                this.encrypt,
                                trailerdict.get("ID"),
                                password);
            }


            if (this.info == null) {
                this.info = trailerdict.get("Info");
                if (this.info != null) {
                    if (!this.info.isIndirect()) {
                        throw new PDFParseException(
                                "Info in trailer must be an indirect reference");
                    }
                    this.info.setObjectId(PDFObject.OBJ_NUM_TRAILER,
                            PDFObject.OBJ_NUM_TRAILER);
                }
            }

            // read the location of the previous xref table
            PDFObject prevloc = trailerdict.get("Prev");
            if (prevloc != null) {
                this.buf.position(prevloc.getIntValue());
            } else {
                break;
            }
            // see if we have an optional Version entry


            if (this.root.getDictRef("Version") != null) {
                processVersion(this.root.getDictRef("Version").getStringValue());
            }
        }

        // make sure we found a root
        if (this.root == null) {
            throw new PDFParseException("No /Root key found in trailer dictionary");
        }

        // check what permissions are relevant
        if (this.encrypt != null && newDefaultDecrypter!=null) {
            PDFObject permissions = this.encrypt.getDictRef("P");
            if (permissions!=null && !newDefaultDecrypter.isOwnerAuthorised()) {
                int perms= permissions != null ? permissions.getIntValue() : 0;
                if (permissions!=null) {
                    this.printable = (perms & 4) != 0;
                    this.saveable = (perms & 16) != 0;
                }
            }
            // Install the new default decrypter only after the trailer has
            // been read, as nothing we're reading passing through is encrypted
            this.defaultDecrypter = newDefaultDecrypter;
        }

        // dereference the root object
        this.root.dereference();
    }

    private int readNum(byte[] sbuf, int pos, int numBytes) {
      int result = 0;
      for (int i=0; i<numBytes; i++)
        result = (result << 8) + (sbuf[pos+i]&0xff);
    return result;
  }

  /**
     * build the PDFFile reference table.  Nothing in the PDFFile actually
     * gets parsed, despite the name of this function.  Things only get
     * read and parsed when they're needed.
     * @param password
     */
    private void parseFile(PDFPassword password) throws IOException {
        // start at the begining of the file
        this.buf.rewind();
        String versionLine = readLine();
        if (versionLine.startsWith(VERSION_COMMENT)) {
            processVersion(versionLine.substring(VERSION_COMMENT.length()));
        }
        this.buf.rewind();

        // back up about 32 characters from the end of the file to find
        // startxref\n
        byte[] scan = new byte[32];
        int scanPos = this.buf.remaining() - scan.length;
        int loc = 0;

        while (scanPos >= 0) {
            this.buf.position(scanPos);
            this.buf.get(scan);

            // find startxref in scan
            String scans = new String(scan);
            loc = scans.indexOf("startxref");
            if (loc > 0) {
                if (scanPos + loc + scan.length <= this.buf.limit()) {
                    scanPos = scanPos + loc;
                    loc = 0;
                }

                break;
            }
            scanPos -= scan.length - 10;
        }

        if (scanPos < 0) {
            throw new IOException("This may not be a PDF File");
        }

        this.buf.position(scanPos);
        this.buf.get(scan);
        String scans = new String(scan);

        loc += 10// skip over "startxref" and first EOL char
        if (scans.charAt(loc) < 32) {
            loc++;
        // skip over possible 2nd EOL char
        while (scans.charAt(loc) == 32) {
            loc++;
        } // skip over possible leading blanks
        // read number
        int numstart = loc;
        while (loc < scans.length() &&
                scans.charAt(loc) >= '0' &&
                scans.charAt(loc) <= '9') {
            loc++;
        }
        int xrefpos = Integer.parseInt(scans.substring(numstart, loc));
        this.buf.position(xrefpos);

        try {
            readTrailer(password);
        } catch (UnsupportedEncryptionException e) {
            throw new PDFParseException(e.getMessage(), e);
        }
    }

    /**
     * Gets the outline tree as a tree of OutlineNode, which is a subclass
     * of DefaultMutableTreeNode.  If there is no outline tree, this method
     * returns null.
     */
    public OutlineNode getOutline() throws IOException {
        // find the outlines entry in the root object
        PDFObject oroot = this.root.getDictRef("Outlines");
        OutlineNode work = null;
        OutlineNode outline = null;
        if (oroot != null) {
            // find the first child of the outline root
            PDFObject scan = oroot.getDictRef("First");
            outline = work = new OutlineNode("<top>");

            // scan each sibling in turn
            while (scan != null) {
                // add the new node with it's name
                String title = scan.getDictRef("Title").getTextStringValue();
                OutlineNode build = new OutlineNode(title);
                work.add(build);

                // find the action
                PDFAction action = null;

                PDFObject actionObj = scan.getDictRef("A");
                if (actionObj != null) {
                    try {
                        action = PDFAction.getAction(actionObj, getRoot());
                    }
                    catch (PDFParseException e) {
                      // oh well
                    }
                } else {
                    // try to create an action from a destination
                    PDFObject destObj = scan.getDictRef("Dest");
                    if (destObj != null) {
                        try {
                            PDFDestination dest =
                                    PDFDestination.getDestination(destObj, getRoot());

                            action = new GoToAction(dest);
                        } catch (IOException ioe) {
                            // oh well
                        }
                    }
                }

                // did we find an action?  If so, add it
                if (action != null) {
                    build.setAction(action);
                }

                // find the first child of this node
                PDFObject kid = scan.getDictRef("First");
                if (kid != null) {
                    work = build;
                    scan = kid;
                } else {
                    // no child.  Process the next sibling
                    PDFObject next = scan.getDictRef("Next");
                    while (next == null) {
                        scan = scan.getDictRef("Parent");
                        next = scan.getDictRef("Next");
                        work = (OutlineNode) work.getParent();
                        if (work == null) {
                            break;
                        }
                    }
                    scan = next;
                }
            }
        }

        return outline;
    }

    /**
     * Gets the page number (starting from 1) of the page represented by
     * a particular PDFObject.  The PDFObject must be a Page dictionary or
     * a destination description (or an action).
     * @return a number between 1 and the number of pages indicating the
     * page number, or 0 if the PDFObject is not in the page tree.
     */
    public int getPageNumber(PDFObject page) throws IOException {
        if (page.getType() == PDFObject.ARRAY) {
            page = page.getAt(0);
        }

        // now we've got a page.  Make sure.
        PDFObject typeObj = page.getDictRef("Type");
        if (typeObj == null || !typeObj.getStringValue().equals("Page")) {
            return 0;
        }

        int count = 0;
        while (true) {
            PDFObject parent = page.getDictRef("Parent");
            if (parent == null) {
                break;
            }
            PDFObject kids[] = parent.getDictRef("Kids").getArray();
            for (int i = 0; i < kids.length; i++) {
                if (kids[i].equals(page)) {
                    break;
                } else {
                    PDFObject kcount = kids[i].getDictRef("Count");
                    if (kcount != null) {
                        count += kcount.getIntValue();
                    } else {
                        count += 1;
                    }
                }
            }
            page = parent;
        }
        return count;
    }

    /**
     * Get the page commands for a given page in a separate thread.
     *
     * @param pagenum the number of the page to get commands for
     */
    public PDFPage getPage(int pagenum) {
        return getPage(pagenum, false);
    }

    /**
     * Get the page commands for a given page.
     *
     * @param pagenum the number of the page to get commands for
     * @param wait if true, do not exit until the page is complete.
     */
    public PDFPage getPage(int pagenum, boolean wait) {
        Integer key = Integer.valueOf(pagenum);
        HashMap<String,PDFObject> resources = null;
        PDFObject pageObj = null;
        boolean needread = false;

        PDFPage page = this.cache.getPage(key);
        PDFParser parser = this.cache.getPageParser(key);
        if (page == null) {
            try {
                // hunt down the page!
                resources = new HashMap<String,PDFObject>();

                PDFObject topPagesObj = this.root.getDictRef("Pages");
                pageObj = findPage(topPagesObj, 0, pagenum, resources);

                if (pageObj == null) {
                    return null;
                }

                page = createPage(pagenum, pageObj);

                byte[] stream = getContents(pageObj);
                parser = new PDFParser(page, stream, resources);

                this.cache.addPage(key, page, parser);
            } catch (IOException ioe) {
                return null;
            }
        }

        if (parser != null && !parser.isFinished()) {
            parser.go(wait);
        }

        return page;
    }

    /**
     * Stop the rendering of a particular image on this page
     */
    public void stop(int pageNum) {
        PDFParser parser = this.cache.getPageParser(Integer.valueOf(pageNum));
        if (parser != null) {
            // stop it
            parser.stop();
        }
    }

    /**
     * get the stream representing the content of a particular page.
     *
     * @param pageObj the page object to get the contents of
     * @return a concatenation of any content streams for the requested
     * page.
     */
    private byte[] getContents(PDFObject pageObj) throws IOException {
        // concatenate all the streams
        PDFObject contentsObj = pageObj.getDictRef("Contents");
        if (contentsObj == null) {
            throw new IOException("No page contents!");
        }

        PDFObject contents[] = contentsObj.getArray();

        // see if we have only one stream (the easy case)
        if (contents.length == 1) {
            return contents[0].getStream();
        }

        // first get the total length of all the streams
        int len = 0;
        for (int i = 0; i < contents.length; i++) {
            byte[] data = contents[i].getStream();
            if (data == null) {
                throw new PDFParseException("No stream on content " + i +
                        ": " + contents[i]);
            }
            len += data.length;
        }

        // now assemble them all into one object
        byte[] stream = new byte[len];
        len = 0;
        for (int i = 0; i < contents.length; i++) {
            byte data[] = contents[i].getStream();
            System.arraycopy(data, 0, stream, len, data.length);
            len += data.length;
        }

        return stream;
    }

    /**
     * Create a PDF Page object by finding the relevant inherited
     * properties
     *
     * @param pageObj the PDF object for the page to be created
     */
    private PDFPage createPage(int pagenum, PDFObject pageObj)
            throws IOException {
        int rotation = 0;
        Rectangle2D mediabox = null; // second choice, if no crop
        Rectangle2D cropbox = null// first choice

        PDFObject mediaboxObj = getInheritedValue(pageObj, "MediaBox");
        if (mediaboxObj != null) {
            mediabox = parseNormalisedRectangle(mediaboxObj);
        }

        PDFObject cropboxObj = getInheritedValue(pageObj, "CropBox");
        if (cropboxObj != null) {
            cropbox = parseNormalisedRectangle(cropboxObj);
        }

        PDFObject rotateObj = getInheritedValue(pageObj, "Rotate");
        if (rotateObj != null) {
            rotation = rotateObj.getIntValue();
        }

        // read annotations and add them to the PDF page
        PDFObject annots = getInheritedValue(pageObj, "Annots");
        List<PDFAnnotation> annotationList = new ArrayList<PDFAnnotation>();
        if (annots != null) {
            if (annots.getType() != PDFObject.ARRAY) {
                throw new PDFParseException("Can't parse annotations: " + annots.toString());
            }
          PDFObject[] array = annots.getArray();
          for (PDFObject object : array) {
                try {
                PDFAnnotation pdfAnnot = PDFAnnotation.createAnnotation(object);
                if(pdfAnnot != null) {
                    annotationList.add(pdfAnnot);
                }
                }catch (PDFParseException e) {
              // do nothing, annotations could not be parsed and links will not be displayed.
            }
      }           
        }
       
        Rectangle2D bbox = ((cropbox == null) ? mediabox : cropbox);
        PDFPage page = new PDFPage(pagenum, bbox, rotation, this.cache);
        page.setAnnots(annotationList);
        return page;
    }

    /**
     * Get the PDFObject representing the content of a particular page. Note
     * that the number of the page need not have anything to do with the
     * label on that page.  If there are two blank pages, and then roman
     * numerals for the page number, then passing in 6 will get page (iv).
     *
     * @param pagedict the top of the pages tree
     * @param start the page number of the first page in this dictionary
     * @param getPage the number of the page to find; NOT the page's label.
     * @param resources a HashMap that will be filled with any resource
     *                  definitions encountered on the search for the page
     */
    private PDFObject findPage(PDFObject pagedict, int start, int getPage,
            Map<String,PDFObject> resources) throws IOException {
        PDFObject rsrcObj = pagedict.getDictRef("Resources");
        if (rsrcObj != null) {
            resources.putAll(rsrcObj.getDictionary());
        }

        PDFObject typeObj = pagedict.getDictRef("Type");
        if (typeObj != null && typeObj.getStringValue().equals("Page")) {
            // we found our page!
            return pagedict;
        }

        // find the first child for which (start + count) > getPage
        PDFObject kidsObj = pagedict.getDictRef("Kids");
        if (kidsObj != null) {
            PDFObject[] kids = kidsObj.getArray();
            for (int i = 0; i < kids.length; i++) {
                int count = 1;
                // BUG: some PDFs (T1Format.pdf) don't have the Type tag.
                // use the Count tag to indicate a Pages dictionary instead.
                PDFObject countItem = kids[i].getDictRef("Count");
                //                if (kids[i].getDictRef("Type").getStringValue().equals("Pages")) {
                if (countItem != null) {
                    count = countItem.getIntValue();
                }

                if (start + count >= getPage) {
                    return findPage(kids[i], start, getPage, resources);
                }

                start += count;
            }
        }

        return null;
    }

    /**
     * Find a property value in a page that may be inherited.  If the value
     * is not defined in the page itself, follow the page's "parent" links
     * until the value is found or the top of the tree is reached.
     *
     * @param pageObj the object representing the page
     * @param propName the name of the property we are looking for
     */
    private PDFObject getInheritedValue(PDFObject pageObj, String propName)
            throws IOException {
        // see if we have the property
        PDFObject propObj = pageObj.getDictRef(propName);
        if (propObj != null) {
            return propObj;
        }

        // recursively see if any of our parent have it
        PDFObject parentObj = pageObj.getDictRef("Parent");
        if (parentObj != null) {
            return getInheritedValue(parentObj, propName);
        }

        // no luck
        return null;
    }

    public static Rectangle2D parseNormalisedRectangle(PDFObject obj)
            throws IOException {

        if (obj != null) {
            if (obj.getType() == PDFObject.ARRAY) {
                PDFObject bounds[] = obj.getArray();
                if (bounds.length == 4) {
                    final double x0 = bounds[0].getDoubleValue();
                    final double y0 = bounds[1].getDoubleValue();
                    final double x1 = bounds[2].getDoubleValue();
                    final double y1 = bounds[3].getDoubleValue();

                    final double minX;
                    final double maxY;
                    final double maxX;
                    final double minY;

                    if (x0 < x1) {
                        minX = x0;
                        maxX = x1;
                    } else {
                        minX = x1;
                        maxX = x0;
                    }
                    if (y0 < y1) {
                        minY = y0;
                        maxY = y1;
                    } else {
                        minY = y1;
                        maxY = y0;
                    }

                    return new Rectangle2D.Double(minX, minY, Math.abs(maxX - minX), Math.abs(maxY - minY));

                } else {
                    throw new PDFParseException("Rectangle definition didn't have 4 elements");
                }
            } else {
                throw new PDFParseException("Rectangle definition not an array");
            }
        } else {
            throw new PDFParseException("Rectangle not present");
        }

    }

    /**
     * Get the default decrypter for the document
     * @return the default decrypter; never null, even for documents that
     *  aren't encrypted
     */
    public PDFDecrypter getDefaultDecrypter() {
        return this.defaultDecrypter;
    }
}
TOP

Related Classes of com.sun.pdfview.PDFFile

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.