}
@Override
public RDFDataset parse(Object input) throws JsonLdError {
if (!(input instanceof String)) {
throw new JsonLdError(JsonLdError.Error.INVALID_INPUT,
"Invalid input; Triple RDF Parser requires a string input");
}
final RDFDataset result = new RDFDataset();
final State state = new State((String) input);
while (!"".equals(state.line)) {
// check if line is a directive
Matcher match = Regex.DIRECTIVE.matcher(state.line);
if (match.find()) {
if (match.group(1) != null || match.group(4) != null) {
final String ns = match.group(1) != null ? match.group(1) : match.group(4);
String iri = match.group(1) != null ? match.group(2) : match.group(5);
if (!iri.contains(":")) {
iri = state.baseIri + iri;
}
iri = unescape(iri);
validateIRI(state, iri);
state.namespaces.put(ns, iri);
result.setNamespace(ns, iri);
} else {
String base = match.group(3) != null ? match.group(3) : match.group(6);
base = unescape(base);
validateIRI(state, base);
if (!base.contains(":")) {
state.baseIri = state.baseIri + base;
} else {
state.baseIri = base;
}
}
state.advanceLinePosition(match.group(0).length());
continue;
}
if (state.curSubject == null) {
// we need to match a subject
match = Regex.SUBJECT.matcher(state.line);
if (match.find()) {
String iri;
if (match.group(1) != null) {
// matched IRI
iri = unescape(match.group(1));
if (!iri.contains(":")) {
iri = state.baseIri + iri;
}
} else if (match.group(2) != null) {
// matched NS:NAME
final String ns = match.group(2);
final String name = unescapeReserved(match.group(3));
iri = state.expandIRI(ns, name);
} else if (match.group(4) != null) {
// match ns: only
iri = state.expandIRI(match.group(4), "");
} else if (match.group(5) != null) {
// matched BNODE
iri = state.namer.getName(match.group(0).trim());
} else {
// matched anon node
iri = state.namer.getName();
}
// make sure IRI still matches an IRI after escaping
validateIRI(state, iri);
state.curSubject = iri;
state.advanceLinePosition(match.group(0).length());
}
// handle blank nodes
else if (state.line.startsWith("[")) {
final String bnode = state.namer.getName();
state.advanceLinePosition(1);
state.push();
state.curSubject = bnode;
}
// handle collections
else if (state.line.startsWith("(")) {
final String bnode = state.namer.getName();
// so we know we want a predicate if the collection close
// isn't followed by a subject end
state.curSubject = bnode;
state.advanceLinePosition(1);
state.push();
state.curSubject = bnode;
state.curPredicate = RDF_FIRST;
}
// make sure we have a subject already
else {
throw new JsonLdError(JsonLdError.Error.PARSE_ERROR,
"Error while parsing Turtle; missing expected subject. {line: "
+ state.lineNumber + "position: " + state.linePosition + "}");
}
}
if (state.curPredicate == null) {
// match predicate
match = Regex.PREDICATE.matcher(state.line);
if (match.find()) {
String iri = "";
if (match.group(1) != null) {
// matched IRI
iri = unescape(match.group(1));
if (!iri.contains(":")) {
iri = state.baseIri + iri;
}
} else if (match.group(2) != null) {
// matched NS:NAME
final String ns = match.group(2);
final String name = unescapeReserved(match.group(3));
iri = state.expandIRI(ns, name);
} else if (match.group(4) != null) {
// matched ns:
iri = state.expandIRI(match.group(4), "");
} else {
// matched "a"
iri = RDF_TYPE;
}
validateIRI(state, iri);
state.curPredicate = iri;
state.advanceLinePosition(match.group(0).length());
} else {
throw new JsonLdError(JsonLdError.Error.PARSE_ERROR,
"Error while parsing Turtle; missing expected predicate. {line: "
+ state.lineNumber + "position: " + state.linePosition + "}");
}
}
// expecting bnode or object
// match BNODE values
if (state.line.startsWith("[")) {
final String bnode = state.namer.getName();
result.addTriple(state.curSubject, state.curPredicate, bnode);
state.advanceLinePosition(1);
// check for anonymous objects
if (state.line.startsWith("]")) {
state.advanceLinePosition(1);
// next we expect a statement or object separator
}
// otherwise we're inside the blank node
else {
state.push();
state.curSubject = bnode;
// next we expect a predicate
continue;
}
}
// match collections
else if (state.line.startsWith("(")) {
state.advanceLinePosition(1);
// check for empty collection
if (state.line.startsWith(")")) {
state.advanceLinePosition(1);
result.addTriple(state.curSubject, state.curPredicate, RDF_NIL);
// next we expect a statement or object separator
}
// otherwise we're inside the collection
else {
final String bnode = state.namer.getName();
result.addTriple(state.curSubject, state.curPredicate, bnode);
state.push();
state.curSubject = bnode;
state.curPredicate = RDF_FIRST;
continue;
}
} else {
// match object
match = Regex.OBJECT.matcher(state.line);
if (match.find()) {
String iri = null;
if (match.group(1) != null) {
// matched IRI
iri = unescape(match.group(1));
if (!iri.contains(":")) {
iri = state.baseIri + iri;
}
} else if (match.group(2) != null) {
// matched NS:NAME
final String ns = match.group(2);
final String name = unescapeReserved(match.group(3));
iri = state.expandIRI(ns, name);
} else if (match.group(4) != null) {
// matched ns:
iri = state.expandIRI(match.group(4), "");
} else if (match.group(5) != null) {
// matched BNODE
iri = state.namer.getName(match.group(0).trim());
}
if (iri != null) {
validateIRI(state, iri);
// we have a object
result.addTriple(state.curSubject, state.curPredicate, iri);
} else {
// we have a literal
String value = match.group(6);
String lang = null;
String datatype = null;
if (value != null) {
// we have a string literal
value = unquoteString(value);
value = unescape(value);
lang = match.group(7);
if (lang == null) {
if (match.group(8) != null) {
datatype = unescape(match.group(8));
if (!datatype.contains(":")) {
datatype = state.baseIri + datatype;
}
validateIRI(state, datatype);
} else if (match.group(9) != null) {
datatype = state.expandIRI(match.group(9),
unescapeReserved(match.group(10)));
} else if (match.group(11) != null) {
datatype = state.expandIRI(match.group(11), "");
}
} else {
datatype = RDF_LANGSTRING;
}
} else if (match.group(12) != null) {
// integer literal
value = match.group(12);
datatype = XSD_DOUBLE;
} else if (match.group(13) != null) {
// decimal literal
value = match.group(13);
datatype = XSD_DECIMAL;
} else if (match.group(14) != null) {
// double literal
value = match.group(14);
datatype = XSD_INTEGER;
} else if (match.group(15) != null) {
// boolean literal
value = match.group(15);
datatype = XSD_BOOLEAN;
}
result.addTriple(state.curSubject, state.curPredicate, value, datatype,
lang);
}
state.advanceLinePosition(match.group(0).length());
} else {
throw new JsonLdError(JsonLdError.Error.PARSE_ERROR,
"Error while parsing Turtle; missing expected object or blank node. {line: "
+ state.lineNumber + "position: " + state.linePosition + "}");
}
}
// close collection
boolean collectionClosed = false;
while (state.line.startsWith(")")) {
if (!RDF_FIRST.equals(state.curPredicate)) {
throw new JsonLdError(JsonLdError.Error.PARSE_ERROR,
"Error while parsing Turtle; unexpected ). {line: " + state.lineNumber
+ "position: " + state.linePosition + "}");
}
result.addTriple(state.curSubject, RDF_REST, RDF_NIL);
state.pop();
state.advanceLinePosition(1);
collectionClosed = true;
}
boolean expectDotOrPred = false;
// match end of bnode
if (state.line.startsWith("]")) {
final String bnode = state.curSubject;
state.pop();
state.advanceLinePosition(1);
if (state.curSubject == null) {
// this is a bnode as a subject and we
// expect either a . or a predicate
state.curSubject = bnode;
expectDotOrPred = true;
}
}
// match list separator
if (!expectDotOrPred && state.line.startsWith(",")) {
state.advanceLinePosition(1);
// now we expect another object/bnode
continue;
}
// match predicate end
if (!expectDotOrPred) {
while (state.line.startsWith(";")) {
state.curPredicate = null;
state.advanceLinePosition(1);
// now we expect another predicate, or a dot
expectDotOrPred = true;
}
}
if (state.line.startsWith(".")) {
if (state.expectingBnodeClose) {
throw new JsonLdError(JsonLdError.Error.PARSE_ERROR,
"Error while parsing Turtle; missing expected )\"]\". {line: "
+ state.lineNumber + "position: " + state.linePosition + "}");
}
state.curSubject = null;
state.curPredicate = null;
state.advanceLinePosition(1);
// this can now be the end of the document.
continue;
} else if (expectDotOrPred) {
// we're expecting another predicate since we didn't find a dot
continue;
}
// if we're in a collection
if (RDF_FIRST.equals(state.curPredicate)) {
final String bnode = state.namer.getName();
result.addTriple(state.curSubject, RDF_REST, bnode);
state.curSubject = bnode;
continue;
}
if (collectionClosed) {
// we expect another object
// TODO: it's not clear yet if this is valid
continue;
}
// if we get here, we're missing a close statement
throw new JsonLdError(JsonLdError.Error.PARSE_ERROR,
"Error while parsing Turtle; missing expected \"]\" \",\" \";\" or \".\". {line: "
+ state.lineNumber + "position: " + state.linePosition + "}");
}
return result;