Examples of com.itextpdf.text.pdf.PRTokeniser

com.itextpdf.text.pdf.PRTokeniser
@author Paulo Soares

    public CidLocationFromByte(byte[] data) {
        this.data = data;
    }
    
    public PRTokeniser getLocation(String location) throws IOException {
        return new PRTokeniser(data);
    }

View Full Code Here

    public PRTokeniser getLocation(String location) throws IOException {
        String fullName = BaseFont.RESOURCE_PATH + "cmaps/" + location;
        InputStream inp = BaseFont.getResourceStream(fullName);
        if (inp == null)
            throw new IOException(MessageLocalization.getComposedMessage("the.cmap.1.was.not.found", fullName));
        return new PRTokeniser(new RandomAccessFileOrArray(inp));
    }

View Full Code Here

    }
    
    private static void parseCid(String cmapName, AbstractCMap cmap, CidLocation location, int level) throws IOException {
        if (level >= MAXLEVEL)
            return;
        PRTokeniser inp = location.getLocation(cmapName);
        try {
            ArrayList<PdfObject> list = new ArrayList<PdfObject>();
            PdfContentParser cp = new PdfContentParser(inp);
            int maxExc = 50;
            while (true) {
                try {
                    cp.parse(list);
                }
                catch (Exception ex) {
                    if (--maxExc < 0)
                        break;
                    continue;
                }
                if (list.isEmpty())
                    break;
                String last = list.get(list.size() - 1).toString();
                if (level == 0 && list.size() == 3 && last.equals(DEF)) {
                    PdfObject key = list.get(0);
                    if (PdfName.REGISTRY.equals(key))
                        cmap.setRegistry(list.get(1).toString());
                    else if (PdfName.ORDERING.equals(key))
                        cmap.setOrdering(list.get(1).toString());
                    else if (CMAPNAME.equals(key))
                        cmap.setName(list.get(1).toString());
                    else if (PdfName.SUPPLEMENT.equals(key)) {
                        try {
                            cmap.setSupplement(((PdfNumber)list.get(1)).intValue());
                        }
                        catch (Exception ex) {}
                    }
                }
                else if ((last.equals(ENDCIDCHAR) || last.equals(ENDBFCHAR)) && list.size() >= 3) {
                    int lmax = list.size() - 2;
                    for (int k = 0; k < lmax; k += 2) {
                        if (list.get(k) instanceof PdfString) {
                            cmap.addChar((PdfString)list.get(k), list.get(k + 1));
                        }
                    }
                }
                else if ((last.equals(ENDCIDRANGE) || last.equals(ENDBFRANGE)) && list.size() >= 4) {
                    int lmax = list.size() - 3;
                    for (int k = 0; k < lmax; k += 3) {
                        if (list.get(k) instanceof PdfString && list.get(k + 1) instanceof PdfString) {
                            cmap.addRange((PdfString)list.get(k), (PdfString)list.get(k + 1), list.get(k + 2));
                        }
                    }
                }
                else if (last.equals(USECMAP) && list.size() == 2 && list.get(0) instanceof PdfName) {
                    parseCid(PdfName.decodeName(list.get(0).toString()), cmap, location, level + 1);
                }
            }
        }
        finally {
            inp.close();
        }
    }

View Full Code Here

        
        PdfNumber h = imageDictionary.getAsNumber(PdfName.HEIGHT);


        int bytesToRead = computeBytesPerRow(imageDictionary, colorSpaceDic) * h.intValue();
        byte[] bytes = new byte[bytesToRead];
        PRTokeniser tokeniser = ps.getTokeniser();
        
        int shouldBeWhiteSpace = tokeniser.read(); // skip next character (which better be a whitespace character - I suppose we could check for this)
        // from the PDF spec:  Unless the image uses ASCIIHexDecode or ASCII85Decode as one of its filters, the ID operator shall be followed by a single white-space character, and the next character shall be interpreted as the first byte of image data.
        // unfortunately, we've seen some PDFs where there is no space following the ID, so we have to capture this case and handle it
        int startIndex = 0;
        if (!PRTokeniser.isWhitespace(shouldBeWhiteSpace)){
            bytes[0] = (byte)shouldBeWhiteSpace;
            startIndex++;
        }
        for(int i = startIndex; i < bytesToRead; i++){
            int ch = tokeniser.read();
            if (ch == -1)
                throw new InlineImageParseException("End of content stream reached before end of image data");
            
            bytes[i] = (byte)ch;
        }

View Full Code Here

        // I'd rather avoid right now.
        ByteArrayOutputStream baos = new ByteArrayOutputStream();
        ByteArrayOutputStream accumulated = new ByteArrayOutputStream();
        int ch;
        int found = 0;
        PRTokeniser tokeniser = ps.getTokeniser();
        
        while ((ch = tokeniser.read()) != -1){
            if (found == 0 && PRTokeniser.isWhitespace(ch)){
                found++;
                accumulated.write(ch);
            } else if (found == 1 && ch == 'E'){
                found++;

View Full Code Here

     * @param resources    the resources that come with the content stream
     */
    public void processContent(byte[] contentBytes, PdfDictionary resources){
        this.resources.push(resources);
        try {
            PRTokeniser tokeniser = new PRTokeniser(contentBytes);
            PdfContentParser ps = new PdfContentParser(tokeniser);
            ArrayList<PdfObject> operands = new ArrayList<PdfObject>();
            while (ps.parse(operands).size() > 0){
                PdfLiteral operator = (PdfLiteral)operands.get(operands.size()-1);
                if ("BI".equals(operator.toString())){

View Full Code Here

     */
    public void processContent(byte[] contentBytes, PdfDictionary resources){


        this.resources.push(resources);
        try {
            PRTokeniser tokeniser = new PRTokeniser(contentBytes);
            PdfContentParser ps = new PdfContentParser(tokeniser);
            ArrayList<PdfObject> operands = new ArrayList<PdfObject>();
            while (ps.parse(operands).size() > 0){
                PdfLiteral operator = (PdfLiteral)operands.get(operands.size()-1);
                
                // special handling for embedded images.  If we hit an ID operator, we need
                // to skip all content until we reach an EI operator surrounded by whitespace.
                // The following algorithm has one potential issue: what if the image stream 
                // contains <ws>EI<ws> ?
                // it sounds like we would have to actually decode the content stream, which
                // I'd rather avoid right now.
                if ("ID".equals(operator.toString())){
                    ByteArrayOutputStream baos = new ByteArrayOutputStream();
                    ByteArrayOutputStream accumulated = new ByteArrayOutputStream();
                    int ch;
                    int found = 0;
                    while ((ch = tokeniser.read()) != -1){
                        if (found == 0 && PRTokeniser.isWhitespace(ch)){
                            found++;
                            accumulated.write(ch);
                        } else if (found == 1 && ch == 'E'){
                            found++;

View Full Code Here

     * @param resources    the resources that come with the content stream
     */
    public void processContent(byte[] contentBytes, PdfDictionary resources){
        this.resources.push(resources);
        try {
            PRTokeniser tokeniser = new PRTokeniser(new RandomAccessFileOrArray(new RandomAccessSourceFactory().createSource(contentBytes)));
            PdfContentParser ps = new PdfContentParser(tokeniser);
            ArrayList<PdfObject> operands = new ArrayList<PdfObject>();
            while (ps.parse(operands).size() > 0){
                PdfLiteral operator = (PdfLiteral)operands.get(operands.size()-1);
                if ("BI".equals(operator.toString())){

View Full Code Here

        
        PdfNumber h = imageDictionary.getAsNumber(PdfName.HEIGHT);


        int bytesToRead = computeBytesPerRow(imageDictionary, colorSpaceDic) * h.intValue();
        byte[] bytes = new byte[bytesToRead];
        PRTokeniser tokeniser = ps.getTokeniser();
        
        int shouldBeWhiteSpace = tokeniser.read(); // skip next character (which better be a whitespace character - I suppose we could check for this)
        // from the PDF spec:  Unless the image uses ASCIIHexDecode or ASCII85Decode as one of its filters, the ID operator shall be followed by a single white-space character, and the next character shall be interpreted as the first byte of image data.
        // unfortunately, we've seen some PDFs where there is no space following the ID, so we have to capture this case and handle it
        int startIndex = 0;
        if (!PRTokeniser.isWhitespace(shouldBeWhiteSpace) || shouldBeWhiteSpace == 0){ // tokeniser treats 0 as whitespace, but for our purposes, we shouldn't
            bytes[0] = (byte)shouldBeWhiteSpace;
            startIndex++;
        }
        for(int i = startIndex; i < bytesToRead; i++){
            int ch = tokeniser.read();
            if (ch == -1)
                throw new InlineImageParseException("End of content stream reached before end of image data");
            
            bytes[i] = (byte)ch;
        }

View Full Code Here

        // I'd rather avoid right now.
        ByteArrayOutputStream baos = new ByteArrayOutputStream();
        ByteArrayOutputStream accumulated = new ByteArrayOutputStream();
        int ch;
        int found = 0;
        PRTokeniser tokeniser = ps.getTokeniser();
        
        while ((ch = tokeniser.read()) != -1){
            if (found == 0 && PRTokeniser.isWhitespace(ch)){
                found++;
                accumulated.write(ch);
            } else if (found == 1 && ch == 'E'){
                found++;

View Full Code Here

0 1 2

TOP

Related Classes of com.itextpdf.text.pdf.PRTokeniser

br.com.objectos.way.boleto.PdfToText

com.itextpdf.text.io.RandomAccessSourceFactory

com.itextpdf.text.pdf.fonts.cmaps.CidLocationFromByte

com.itextpdf.text.pdf.fonts.cmaps.CidResource

com.itextpdf.text.pdf.fonts.cmaps.CMapParserEx

com.itextpdf.text.pdf.mc.MCParser

com.itextpdf.text.pdf.ocg.OCGParser

com.itextpdf.text.pdf.parser.InlineImageUtils

com.itextpdf.text.pdf.parser.PdfContentStreamProcessor

com.itextpdf.text.exceptions.InvalidPdfException

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.