/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.pdfbox.filter;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.PushbackInputStream;
import java.io.StreamCorruptedException;
import org.apache.pdfbox.cos.COSDictionary;
import org.apache.pdfbox.io.NBitInputStream;
import org.apache.pdfbox.io.NBitOutputStream;
/**
* This is the used for the LZWDecode filter.
*
* @author <a href="mailto:ben@benlitchfield.com">Ben Litchfield</a>
* @version $Revision: 1.15 $
*/
public class LZWFilter implements Filter
{
/**
* The LZW clear table code.
*/
public static final long CLEAR_TABLE = 256;
/**
* The LZW end of data code.
*/
public static final long EOD = 257;
/**
* {@inheritDoc}
*/
public void decode( InputStream compressedData, OutputStream result, COSDictionary options, int filterIndex )
throws IOException
{
//log.debug("decode( )");
NBitInputStream in = null;
in = new NBitInputStream( compressedData );
in.setBitsInChunk( 9 );
LZWDictionary dic = new LZWDictionary();
byte firstByte = 0;
long nextCommand = 0;
while( (nextCommand = in.read() ) != EOD )
{
// log.debug( "decode - nextCommand=" + nextCommand + ", bitsInChunk: " + in.getBitsInChunk());
if( nextCommand == CLEAR_TABLE )
{
in.setBitsInChunk( 9 );
dic = new LZWDictionary();
}
else
{
byte[] data = dic.getData( nextCommand );
if( data == null )
{
dic.visit( firstByte );
data = dic.getData( nextCommand );
dic.clear();
}
if( data == null )
{
throw new StreamCorruptedException( "Error: data is null" );
}
dic.visit(data);
//log.debug( "decode - dic.getNextCode(): " + dic.getNextCode());
if( dic.getNextCode() >= 2047 )
{
in.setBitsInChunk( 12 );
}
else if( dic.getNextCode() >= 1023 )
{
in.setBitsInChunk( 11 );
}
else if( dic.getNextCode() >= 511 )
{
in.setBitsInChunk( 10 );
}
else
{
in.setBitsInChunk( 9 );
}
/**
if( in.getBitsInChunk() != dic.getCodeSize() )
{
in.unread( nextCommand );
in.setBitsInChunk( dic.getCodeSize() );
System.out.print( "Switching " + nextCommand + " to " );
nextCommand = in.read();
System.out.println( "" + nextCommand );
data = dic.getData( nextCommand );
}**/
firstByte = data[0];
result.write( data );
}
}
result.flush();
}
/**
* {@inheritDoc}
*/
public void encode( InputStream rawData, OutputStream result, COSDictionary options, int filterIndex )
throws IOException
{
//log.debug("encode( )");
PushbackInputStream input = new PushbackInputStream( rawData, 4096 );
LZWDictionary dic = new LZWDictionary();
NBitOutputStream out = new NBitOutputStream( result );
out.setBitsInChunk( 9 ); //initially nine
out.write( CLEAR_TABLE );
ByteArrayOutputStream buffer = new ByteArrayOutputStream();
int byteRead = 0;
for( int i=0; (byteRead = input.read()) != -1; i++ )
{
//log.debug( "byteRead = '" + (char)byteRead + "' (0x" + Integer.toHexString(byteRead) + "), i=" + i);
buffer.write( byteRead );
dic.visit( (byte)byteRead );
out.setBitsInChunk( dic.getCodeSize() );
//log.debug( "Getting node '" + new String( buffer.toByteArray() ) + "', buffer.size = " + buffer.size() );
LZWNode node = dic.getNode( buffer.toByteArray() );
int nextByte = input.read();
if( nextByte != -1 )
{
//log.debug( "nextByte = '" + (char)nextByte + "' (0x" + Integer.toHexString(nextByte) + ")");
LZWNode next = node.getNode( (byte)nextByte );
if( next == null )
{
//log.debug("encode - No next node, writing node and resetting buffer (" +
// " node.getCode: " + node.getCode() + ")" +
// " bitsInChunk: " + out.getBitsInChunk() +
// ")");
out.write( node.getCode() );
buffer.reset();
}
input.unread( nextByte );
}
else
{
//log.debug("encode - EOF on lookahead: writing node, resetting buffer, and terminating read loop (" +
// " node.getCode: " + node.getCode() + ")" +
// " bitsInChunk: " + out.getBitsInChunk() +
// ")");
out.write( node.getCode() );
buffer.reset();
break;
}
if( dic.getNextCode() == 4096 )
{
//log.debug("encode - Clearing dictionary and unreading pending buffer data (" +
// " bitsInChunk: " + out.getBitsInChunk() +
// ")");
out.write( CLEAR_TABLE );
dic = new LZWDictionary();
input.unread( buffer.toByteArray() );
buffer.reset();
}
}
// Fix the code size based on the fact that we are writing the EOD
//
if( dic.getNextCode() >= 2047 )
{
out.setBitsInChunk( 12 );
}
else if( dic.getNextCode() >= 1023 )
{
out.setBitsInChunk( 11 );
}
else if( dic.getNextCode() >= 511 )
{
out.setBitsInChunk( 10 );
}
else
{
out.setBitsInChunk( 9 );
}
//log.debug("encode - Writing EOD (" +
// " bitsInChunk: " + out.getBitsInChunk() +
// ")");
out.write( EOD );
out.close();
result.flush();
}
}