/*
* UTF8ISReader.java
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
*/
// Expand to define MIDP define
//#define DMIDP20
//#ifdef DMIDP20
package cz.cacek.ebook;
import java.io.IOException;
import java.io.InputStream;
import java.io.Reader;
import java.io.UTFDataFormatException;
/**
* Encoding of ebooks is UTF-8 and not all J2ME implementations supports this encoding
* (e.g. my Siemens S55). This class adds support for UTF-8
* @author Josef Cacek [josef.cacek (at) atlas.cz]
* @author $Author: ibuntonjr $
* @version $Revision: 934 $
* @created $Date: 2008-07-16 05:33:20 +0800 (周三, 2008-07-16) $
*/
public class UTF8ISReader extends Reader {
private int readAhead[];
private boolean readNext;
private InputStream inputStream;
/**
* Constructor.
* @param anIn
*/
public UTF8ISReader(InputStream anIn) {
if (anIn==null) {
throw new NullPointerException("Can't read null stream.");
}
inputStream = anIn;
readAhead = new int[3];
prepareForNextChar();
}
/* (non-Javadoc)
* @see java.io.Reader#ready()
*/
public boolean ready() {
try {
return inputStream.available() > 0;
} catch (IOException x) {
return false;
}
}
/* (non-Javadoc)
* @see java.io.Reader#close()
*/
public void close() throws IOException {
//bj check if is there something to close
if (inputStream != null) {
inputStream.close();
}
}
/* (non-Javadoc)
* @see java.io.Reader#read(char[], int, int)
*/
public int read(char cbuf[], int off, int len) throws IOException {
int count = 0;
int currentChar = 0;
if (len == 0)
return 0;
readNext = true;
while (count < len) {
int firstByte = getByteOfCurrentChar(0);
if (firstByte < 0) {
if (firstByte == -1 && count == 0) {
return -1;
}
return count;
}
int extraBytes;
switch (firstByte >> 4) {
case 0: // '\0'
case 1: // '\001'
case 2: // '\002'
case 3: // '\003'
case 4: // '\004'
case 5: // '\005'
case 6: // '\006'
case 7: // '\007'
extraBytes = 0;
currentChar = firstByte;
break;
case 12: // '\f'
case 13: // '\r'
extraBytes = 1;
currentChar = firstByte & 0x1f;
break;
case 14: // '\016'
extraBytes = 2;
currentChar = firstByte & 0xf;
break;
case 8: // '\b'
case 9: // '\t'
case 10: // '\n'
case 11: // '\013'
default:
throw new UTFDataFormatException("invalid first byte "
+ Integer.toBinaryString(firstByte));
}
for (int j = 1; j <= extraBytes; j++) {
int nextByte = getByteOfCurrentChar(j);
if (nextByte == -2)
return count;
if (nextByte == -1)
throw new UTFDataFormatException("partial character");
if ((nextByte & 0xc0) != 128)
throw new UTFDataFormatException("invalid byte "
+ Integer.toBinaryString(nextByte));
currentChar = (currentChar << 6) + (nextByte & 0x3f);
}
cbuf[off + count] = (char) currentChar;
count++;
prepareForNextChar();
}
return count;
}
private int getByteOfCurrentChar(int byteOfChar) throws IOException {
if (readAhead[byteOfChar] != -2)
return readAhead[byteOfChar];
if (!readNext && inputStream.available() <= 0) {
return -2;
}
readAhead[byteOfChar] = inputStream.read();
readNext = false;
return readAhead[byteOfChar];
}
private void prepareForNextChar() {
readAhead[0] = -2;
readAhead[1] = -2;
readAhead[2] = -2;
}
/* (non-Javadoc)
* @see java.io.Reader#markSupported()
*/
public boolean markSupported() {
return false;
}
/* (non-Javadoc)
* @see java.io.Reader#mark(int)
*/
public void mark(int readAheadLimit) throws IOException {
throw new IOException("mark() not supported");
}
/* (non-Javadoc)
* @see java.io.Reader#reset()
*/
public void reset() throws IOException {
throw new IOException("reset() not supported");
}
/**
* Returns count of characters (UTF-8) in given part of byte array.
* @param array byte array
* @param offset start position of counting
* @param length count of bytes for counting characters
* @return count of characters in given part of byte array
*/
public int sizeOf(byte array[], int offset, int length) {
int count = 0;
for (int endOfArray = offset + length; offset < endOfArray;) {
count++;
switch ((array[offset] & 0xff) >> 4) {
case 0: // '\0'
case 1: // '\001'
case 2: // '\002'
case 3: // '\003'
case 4: // '\004'
case 5: // '\005'
case 6: // '\006'
case 7: // '\007'
offset++;
break;
case 12: // '\f'
case 13: // '\r'
offset += 2;
break;
case 14: // '\016'
offset += 3;
break;
case 8: // '\b'
case 9: // '\t'
case 10: // '\n'
case 11: // '\013'
default:
return count;
}
}
return count;
}
}
//#endif