private CharSequence readFile(String href, String baseURI, String encoding, XPathContext context)
throws XPathException {
final Configuration config = context.getConfiguration();
NameChecker checker = config.getNameChecker();
// Use the URI machinery to validate and resolve the URIs
Platform platform = Configuration.getPlatform();
URI absoluteURI;
try {
absoluteURI = platform.makeAbsolute(href, baseURI);
} catch (java.net.URISyntaxException err) {
XPathException e = new XPathException("Cannot resolve relative URI", err);
e.setErrorCode("XTDE1170");
throw e;
}
if (absoluteURI.getFragment() != null) {
XPathException e = new XPathException("URI for unparsed-text() must not contain a fragment identifier");
e.setErrorCode("XTDE1170");
throw e;
}
// The URL dereferencing classes throw all kinds of strange exceptions if given
// ill-formed sequences of %hh escape characters. So we do a sanity check that the
// escaping is well-formed according to UTF-8 rules
EscapeURI.checkPercentEncoding(absoluteURI.toString());
Reader reader = context.getController().getUnparsedTextURIResolver().resolve(absoluteURI, encoding, config);
try {
FastStringBuffer sb = new FastStringBuffer(2048);
char[] buffer = new char[2048];
boolean first = true;
int actual;
int line = 1;
int column = 1;
while (true) {
actual = reader.read(buffer, 0, 2048);
if (actual < 0) {
break;
}
for (int c=0; c<actual;) {
int ch32 = buffer[c++];
if (ch32 == '\n') {
line++;
column = 0;
}
column++;
if (UTF16.isHighSurrogate(ch32)) {
if (c==actual) {
actual = reader.read(buffer, 0, 2048);
c = 0;
}
char low = buffer[c++];
ch32 = UTF16.combinePair((char)ch32, low);
}
if (!checker.isValidChar(ch32)) {
XPathException err = new XPathException("The unparsed-text file contains a character illegal in XML (line=" +
line + " column=" + column + " value=hex " + Integer.toHexString(ch32) + ')');
err.setErrorCode("XTDE1190");
throw err;
}