// 0x0001 0000 <= c <= 0x001f ffff
// Unicode supports c <= 0x0010 ffff ...
if (c > 0x0010ffff)
throw new CharConversionException("UTF-8 encoding of character 0x00"
+ Integer.toHexString(c)
+ " can't be converted to Unicode.");
// Convert UCS-4 char to surrogate pair (UTF-16)
c -= 0x10000;
nextChar = (char) (0xDC00 + (c & 0x03ff));
c = 0xD800 + (c >> 10);
// 5 and 6 byte versions are XML WF errors, but
// typically come from mislabeled encodings
} else
throw new CharConversionException("Unconvertible UTF-8 character"
+ " beginning with 0x"
+ Integer.toHexString(buffer[start] & 0xff));
} catch (ArrayIndexOutOfBoundsException e) {
// off > length && length >= buffer.length
c = 0;
}
//
// if the buffer held only a partial character,
// compact it and try to read the rest of the
// character. worst case involves three
// single-byte reads -- quite rare.
//
if (off > finish) {
System.arraycopy(buffer, start,
buffer, 0, finish - start);
finish -= start;
start = 0;
off = instream.read(buffer, finish,
buffer.length - finish);
if (off < 0) {
this.close();
throw new CharConversionException("Partial UTF-8 char");
}
finish += off;
continue;
}
//
// check the format of the non-initial bytes
//
for (start++; start < off; start++) {
if ((buffer[start] & 0xC0) != 0x80) {
this.close();
throw new CharConversionException("Malformed UTF-8 char -- "
+ "is an XML encoding declaration missing?");
}
}
//