/**
* org.bioversityinternational.model.rdf.Parser
*
* Created: Sep 25, 2013 - 10:45:05 AM
*
* Copyright 2013 Bioversity International and
* the Global Crop Diversity Trust
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* The code tree likely contains a copy of the License,
* ('LICENSE'), but you may also obtain a copy at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
**/
package org.bioversityinternational.model.rdf;
import java.util.Iterator;
import java.util.Map;
import java.util.HashMap;
import com.hp.hpl.jena.rdf.model.Literal;
import com.hp.hpl.jena.rdf.model.Model;
import com.hp.hpl.jena.rdf.model.RDFNode;
import com.hp.hpl.jena.rdf.model.Resource;
import com.hp.hpl.jena.rdf.model.Statement;
import com.hp.hpl.jena.rdf.model.StmtIterator;
import com.hp.hpl.jena.vocabulary.RDF;
/**
* @author Richard Bruskiewich
*
*/
public class Parser {
private static final Boolean _debug = false ;
private Model model = null ;
private Map<String,String> qnames = new HashMap<String,String>() ;
private Map<String,String> namespaces = new HashMap<String,String>() ;
/**
*
* @param model is the Jena Model representation of a specified RDF document
*/
public Parser(Model model) {
this.model = model ; // assumed NOT to be null when the constructor is called?
Iterator<Map.Entry<String,String>> prefixNsPairs =
model.getNsPrefixMap().entrySet().iterator();
while (prefixNsPairs.hasNext())
{
Map.Entry<String,String> entry =
(Map.Entry<String,String>)(prefixNsPairs.next()) ;
// A one-to-one mapping of qnames to namespaces *is* assumed here!
qnames.put((String)entry.getKey(), (String)entry.getValue()) ;
namespaces.put((String)entry.getValue(),(String)entry.getKey()) ;
if(_debug) {
System.out.print("prefix: "+ entry.getKey()) ;
System.out.println(", namespace: "+ entry.getValue());
}
}
}
/**
*
* @param typeURI of the RDF.type desired for
* @return Map of subjects (String keys are the URIs of the subjects)
* @throws RDFParserException if model is not initialized
*/
public Map<String,String> subjectsOfType( String typeURI ) throws RDFParserException {
if(model==null) {
throw new RDFParserException("RDF model not loaded! Call loadModel()!") ;
}
HashMap<String,String> subjects = new HashMap<String,String>() ;
StmtIterator statements =
model.listStatements(
(Resource)null,
RDF.type,
(RDFNode)model.createResource(typeURI)
) ;
while(statements.hasNext()) {
Statement statement = statements.nextStatement() ;
Resource subject = statement.getSubject() ;
// First iteration: set value to the local name (if available)
String key, value ;
if(subject.isAnon()) {
key = value = subject.getId().toString() ;
} else {
// convert QNames to full URIs on the fly
// problem is that if you translate the QNames,
// then their cross-references become hidden?
//key = parseURI(subject.getURI()) ;
key = subject.getURI() ;
value = subject.getLocalName() ;
}
// Find the pref
subjects.put(key,value) ;
}
return subjects ;
}
/**
* @param subjectURI
* @param propertyURI
* @return Map of property RDFNode values using URI or literal self as the key
* Default use of English or whatever is available(?)
* @throws RDFParserException if model is not initialized
*/
public Map<String,String> getPropertyValues(
String subjectURI,
String propertyURI
) throws RDFParserException {
return getPropertyValues( subjectURI, propertyURI, null, false ) ;
}
/**
* @param subjectURI - target Subject Resource for query
* @param propertyURI - target Predicate (Property) for query
* @param fullId - set True if full Object lexical form or URI should be returned, otherwise, just the ontology term
* @return Map of property RDFNode values using URI or literal self as the key
* Default use of English or whatever is available(?)
* @throws RDFParserException if model is not initialized
*/
public Map<String,String> getPropertyValues(
String subjectURI,
String propertyURI,
Boolean fullId
) throws RDFParserException {
return getPropertyValues( subjectURI, propertyURI, null, fullId ) ;
}
/**
*
* @param subjectURI
* @param propertyURI
* @param xml:lang language filter for literals (if null, return ALL literals)
* @param fullId - set True if full Object lexical form or URI should be returned, otherwise, just the ontology term
* @return Map of property RDFNode values using URI or literal self as the key
* @throws RDFParserException if model is not initialized
*/
public Map<String,String> getPropertyValues(
String subjectURI,
String propertyURI,
String language,
Boolean fullId
) throws RDFParserException {
if(model==null) {
throw new RDFParserException("RDF model not loaded! Call loadModel()!") ;
}
HashMap<String,String> values = new HashMap<String,String>() ;
StmtIterator statements =
model.listStatements(
model.createResource(subjectURI),
model.createProperty(propertyURI),
(RDFNode)null
) ;
while(statements.hasNext()) {
Statement statement = statements.nextStatement() ;
RDFNode object = statement.getObject();
// First iteration: set value to the local name (if available)
String key = null, value = null ;
if(object.isLiteral()) {
Literal literal = object.asLiteral() ;
String lang = literal.getLanguage() ;
if(
lang.isEmpty() || // not relevant to this literal... ignore!
language == null || // don't care about language in this call
// otherwise, this also catches language codes which
// have locale tacked on, e.g. en-US but may be wrong
// if two character language codes are matched against
// a three character code.
// TODO: Might be better to use a regex here.
lang.startsWith(language)
) {
key = literal.toString() ;
if(fullId) {
value = literal.toString() ;
} else {
value = literal.getLexicalForm() ;
}
}
} else {
Resource objRes = object.asResource() ;
if(object.isAnon()) {
key = value = objRes.getId().toString() ;
} else {
// convert QNames to full URIs on the fly
// problem is that if you translate the QNames,
// then their cross-references become hidden?
// key = parseURI(objRes.getURI()) ;
key = objRes.getURI() ;
if(fullId) {
//value = objRes.getURI() ;
Map<String,String> parts = parseURI(objRes.getURI()) ;
value = parts.get(Parser.URI) ;
} else {
value = objRes.getLocalName() ;
}
}
}
if(key != null) { values.put(key,value) ; }
}
return values ;
}
public static final String NAMESPACE = "Namespace" ;
public static final String URI = "URI" ;
public static final String TERM = "Term" ;
/**
* This method is a bit of a hack to repair URI's (incorrectly) abbreviated
* as QNames in the specified RDF model.
*
* @param uri checked if it is a XML QName
* @return Map of namespace and full URI corresponding to QName, translated based on model recorded namespaces
* @throws RDFParserException if model is not initialized or if unknown QName encountered
*/
public Map<String,String> parseURI(String uri) throws RDFParserException {
if(model==null) {
throw new RDFParserException("RDF model not loaded! Call loadModel()!") ;
}
Map<String,String> results = new HashMap<String,String>() ;
String[] part = uri.split(":") ;
if(part.length>1) {
String qname = part[0].toLowerCase() ;
// merge the rest of the split uri, just in case
// additional colons split it into more than 2 string pieces
String tail = "" ;
for(int i=1;i<part.length;++i) {
tail += part[i] ;
}
// If not a regular URI...
if(! qname.equals("http")) {
// looks like a candidate XML QName
// Check if you know it...
if(qnames.containsKey(qname)) {
String ns = qnames.get(qname) ;
results.put(NAMESPACE, ns) ;
// reconstruct the actual URI
results.put(URI, ns+tail) ;
// add the term
results.put(TERM, tail) ;
} else {
throw new RDFParserException("Don't recognize XML QName: '"+qname+"'?") ;
}
} else { // identify namespace in regular URI?
String ns ;
// first, check for hash URIs
String[] hashpart = uri.split("#") ;
if(hashpart.length>1) {
// assumed well-formed URI.. first part is namespace
// don't forget to add the hash back in
ns = hashpart[0]+"#" ;
// add the term
results.put(TERM, hashpart[1]) ;
} else {
// not a hash uri... hmmm...
// since this should be a term URI,
// then take the first part of the path as the namespace
ns = uri.substring(0,uri.lastIndexOf('/')+1) ;
// add the term
results.put(TERM, uri.substring(uri.lastIndexOf('/')+1)) ;
}
results.put(NAMESPACE, ns) ;
// the original uri is assumed the complete URI
results.put(URI, uri) ;
}
}
return results ;
}
/**
*
* Method to rewrite the category keys into full URIs
* in case they are originally only XML QNames
*
* @param Initial data Map<String,String> possibly containing QNames
* @return Map<String,String> with keys converted to full URI's
* @throws RDFParserException
*/
public Map<String,String> fullURIMap(Map<String,String> data_map) throws RDFParserException {
Map<String,String> remapped = new HashMap<String,String>() ;
for(String key : data_map.keySet()) {
Map<String,String> parts = this.parseURI(key) ;
remapped.put(parts.get(Parser.URI), data_map.get(key)) ;
}
return remapped ;
}
}