/*
* Copyright (c) 2007-2008 Mozilla Foundation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
package org.whattf.io;
import java.io.IOException;
import java.io.InputStream;
import java.io.StringReader;
import java.net.MalformedURLException;
import com.hp.hpl.jena.iri.IRI;
import com.hp.hpl.jena.iri.IRIFactory;
public class DataUri {
public static boolean startsWithData(String uri) {
return uri != null && uri.length() >= 5
&& (uri.charAt(0) == 'd' || uri.charAt(0) == 'D')
&& (uri.charAt(1) == 'a' || uri.charAt(1) == 'A')
&& (uri.charAt(2) == 't' || uri.charAt(2) == 'T')
&& (uri.charAt(3) == 'a' || uri.charAt(3) == 'A')
&& (uri.charAt(4) == ':');
}
private enum State {
AT_START, IN_SUPERTYPE, AT_SUBTYPE_START, IN_SUBTYPE, SEMICOLON_SEEN, WS_BEFORE_SEMICOLON, IN_PARAM_NAME, EQUALS_SEEN, IN_QUOTED_STRING, IN_UNQUOTED_STRING, IN_QUOTED_PAIR, CLOSE_QUOTE_SEEN
}
private String contentType;
private InputStream inputStream;
/**
* @throws IOException, MalformedURLException
*
*/
protected void init(IRI uri) throws IOException, MalformedURLException {
if (!uri.getScheme().equals("data")) {
throw new IllegalArgumentException("The input did not start with data:.");
}
if (uri.getRawFragment() != null) {
throw new MalformedURLException("Fragment is not allowed for data: URIs according to RFC 2397. But if strictly comply with RFC 3986, ignore this error.");
}
InputStream is = new PercentDecodingReaderInputStream(new StringReader(uri.getRawPath()));
StringBuilder sb = new StringBuilder();
State state = State.AT_START;
int i = 0; // string counter
for (;;i++) {
int b = is.read();
if (b == -1) {
throw new MalformedURLException("Premature end of URI.");
}
if (b >= 0x80) {
throw new MalformedURLException("Non-ASCII character in MIME type part of the data URI.");
}
char c = (char) b;
sb.append(c);
switch (state) {
case AT_START:
if (isTokenChar(c)) {
state = State.IN_SUPERTYPE;
continue;
} else if (c == ';') {
sb.setLength(0);
sb.append("text/plain;");
state = State.SEMICOLON_SEEN;
continue;
} else if (c == ',') {
contentType = "text/plain;charset=US-ASCII";
inputStream = is;
return;
} else {
throw newDatatypeException(i,
"Expected a token character or a semicolon but saw ",
c, " instead.");
}
case IN_SUPERTYPE:
if (isTokenChar(c)) {
continue;
} else if (c == '/') {
state = State.AT_SUBTYPE_START;
continue;
} else {
throw newDatatypeException(i,
"Expected a token character or \u201C/\u201D but saw ",
c, " instead.");
}
case AT_SUBTYPE_START:
if (isTokenChar(c)) {
state = State.IN_SUBTYPE;
continue;
} else {
throw newDatatypeException(i,
"Expected a token character but saw ",
c, " instead.");
}
case IN_SUBTYPE:
if (isTokenChar(c)) {
continue;
} else if (c == ';') {
state = State.SEMICOLON_SEEN;
continue;
} else if (isWhitespace(c)) {
state = State.WS_BEFORE_SEMICOLON;
continue;
} else if (c == ',') {
contentType = sb.substring(0, sb.length() - 1);
inputStream = is;
return;
} else {
throw newDatatypeException(i,
"Expected a token character, whitespace, a semicolon or a comma but saw ",
c, " instead.");
}
case WS_BEFORE_SEMICOLON:
if (isWhitespace(c)) {
continue;
} else if (c == ';') {
state = State.SEMICOLON_SEEN;
continue;
} else {
throw newDatatypeException(i,
"Expected whitespace or a semicolon but saw ",
c, " instead.");
}
case SEMICOLON_SEEN:
if (isWhitespace(c)) {
continue;
} else if (isTokenChar(c)) {
state = State.IN_PARAM_NAME;
continue;
} else {
throw newDatatypeException(i,
"Expected whitespace or a token character but saw ",
c, " instead.");
}
case IN_PARAM_NAME:
if (isTokenChar(c)) {
continue;
} else if (c == '=') {
state = State.EQUALS_SEEN;
continue;
} else if (c == ',') {
// let's see if we had ;base64,
int baseFirst = sb.length() - 8;
if (baseFirst >= 0 && ";base64,".equals(sb.substring(baseFirst, sb.length()))) {
contentType = sb.substring(0, baseFirst);
inputStream = new Base64InputStream(is);
return;
}
} else {
throw newDatatypeException(i,
"Expected an equals sign, a comma or a token character but saw ",
c, " instead.");
}
case EQUALS_SEEN:
if (c == '\"') {
state = State.IN_QUOTED_STRING;
continue;
} else if (isTokenChar(c)) {
state = State.IN_UNQUOTED_STRING;
continue;
} else {
throw newDatatypeException(i,
"Expected a double quote or a token character but saw ",
c, " instead.");
}
case IN_QUOTED_STRING:
if (c == '\\') {
state = State.IN_QUOTED_PAIR;
continue;
} else if (c == '\"') {
state = State.CLOSE_QUOTE_SEEN;
continue;
} else if (isQDTextChar(c)) {
continue;
} else {
throw newDatatypeException(i,
"Expected a non-control ASCII character but saw ",
c, " instead.");
}
case IN_QUOTED_PAIR:
if (c <= 127) {
state = State.IN_QUOTED_STRING;
continue;
} else {
throw newDatatypeException(i,
"Expected an ASCII character but saw ",
c, " instead.");
}
case CLOSE_QUOTE_SEEN:
if (c == ';') {
state = State.SEMICOLON_SEEN;
continue;
} else if (isWhitespace(c)) {
state = State.WS_BEFORE_SEMICOLON;
continue;
} else if (c == ',') {
contentType = sb.substring(0, sb.length() - 1);
inputStream = is;
return;
} else {
throw newDatatypeException(i,
"Expected an ASCII character but saw ",
c, " instead.");
}
case IN_UNQUOTED_STRING:
if (isTokenChar(c)) {
continue;
} else if (c == ';') {
state = State.SEMICOLON_SEEN;
continue;
} else if (isWhitespace(c)) {
state = State.WS_BEFORE_SEMICOLON;
continue;
} else if (c == ',') {
contentType = sb.substring(0, sb.length() - 1);
inputStream = is;
return;
} else {
throw newDatatypeException(i,
"Expected a token character, whitespace, a semicolon, or a comma but saw ",
c, " instead.");
}
}
}
}
/**
* @throws IOException, MalformedURLException
*
*/
public DataUri(String uri) throws IOException, MalformedURLException {
IRIFactory fac = new IRIFactory();
fac.shouldViolation(true, false);
fac.securityViolation(true, false);
fac.dnsViolation(true, false);
fac.mintingViolation(false, false);
fac.useSpecificationIRI(true);
init(fac.construct(uri));
}
/**
* @throws IOException, MalformedURLException
*
*/
public DataUri(IRI uri) throws IOException, MalformedURLException {
init(uri);
}
private IOException newDatatypeException(int i, String head, char c, String tail) {
return new DataUriException(i, head, c, tail);
}
private boolean isQDTextChar(char c) {
return (c >= ' ' && c <= 126) || (c == '\n') || (c == '\r')
|| (c == '\t');
}
private boolean isTokenChar(char c) {
return (c >= 33 && c <= 126)
&& !(c == '(' || c == ')' || c == '<' || c == '>' || c == '@'
|| c == ',' || c == ';' || c == ':' || c == '\\'
|| c == '\"' || c == '/' || c == '[' || c == ']'
|| c == '?' || c == '=' || c == '{' || c == '}');
}
/**
* Checks if a UTF-16 code unit represents a whitespace character (U+0020,
* U+0009, U+000D or U+000A).
* @param c the code unit
* @return <code>true</code> if whitespace, <code>false</code> otherwise
*/
private boolean isWhitespace(char c) {
return c == ' ' || c == '\t' || c == '\n' || c == '\r';
}
/**
* Returns the contentType.
*
* @return the contentType
*/
public String getContentType() {
return contentType;
}
/**
* Returns the inputStream.
*
* @return the inputStream
*/
public InputStream getInputStream() {
return inputStream;
}
}