Package org.deri.grefine.rdf.vocab

Source Code of org.deri.grefine.rdf.vocab.VocabularyImporter

package org.deri.grefine.rdf.vocab;

import java.util.HashSet;
import java.util.List;
import java.util.Set;

import org.apache.any23.Any23;
import org.apache.any23.http.HTTPClient;
import org.apache.any23.source.DocumentSource;
import org.apache.any23.source.HTTPDocumentSource;
import org.apache.any23.writer.ReportingTripleHandler;
import org.apache.any23.writer.RepositoryWriter;
import org.openrdf.model.Value;
import org.openrdf.query.BindingSet;
import org.openrdf.query.QueryLanguage;
import org.openrdf.query.TupleQuery;
import org.openrdf.query.TupleQueryResult;
import org.openrdf.repository.Repository;
import org.openrdf.repository.RepositoryConnection;
import org.openrdf.repository.RepositoryException;
import org.openrdf.repository.sail.SailRepository;
import org.openrdf.sail.inferencer.fc.ForwardChainingRDFSInferencer;
import org.openrdf.sail.memory.MemoryStore;


public class VocabularyImporter {
 
  public void importVocabulary(String name, String uri, String fetchUrl, List<RDFSClass> classes, List<RDFSProperty> properties) throws VocabularyImportException{
    boolean strictlyRdf = faultyContentNegotiation(uri);
    Repository repos = getModel(fetchUrl, strictlyRdf);
    getTerms(repos, name, uri, classes, properties);
  }
 
  public void importVocabulary(String name, String uri,Repository repository, List<RDFSClass> classes, List<RDFSProperty> properties) throws VocabularyImportException{
    getTerms(repository, name, uri, classes, properties);
  }
 
  private static final String PREFIXES = "PREFIX rdfs:<http://www.w3.org/2000/01/rdf-schema#> "
      + "PREFIX rdf:<http://www.w3.org/1999/02/22-rdf-syntax-ns#> "
      + "PREFIX skos:<http://www.w3.org/2004/02/skos/core#> ";
  private static final String CLASSES_QUERY_P1 = PREFIXES
      + "SELECT ?resource ?label ?en_label ?description ?en_description ?definition ?en_definition "
      + "WHERE { "
      + "?resource rdf:type rdfs:Class. "
      + "OPTIONAL {?resource rdfs:label ?label.} "
      + "OPTIONAL {?resource rdfs:label ?en_label. FILTER langMatches( lang(?en_label), \"EN\" )  } "
      + "OPTIONAL {?resource rdfs:comment ?description.} "
      + "OPTIONAL {?resource rdfs:comment ?en_description. FILTER langMatches( lang(?en_description), \"EN\" )  } "
      + "OPTIONAL {?resource skos:definition ?definition.} "
      + "OPTIONAL {?resource skos:definition ?en_definition. FILTER langMatches( lang(?en_definition), \"EN\" )  } "
      + "FILTER regex(str(?resource), \"^";
  private static final String CLASSES_QUERY_P2 = "\")}";

  private static final String PROPERTIES_QUERY_P1 = PREFIXES
      + "SELECT ?resource ?label ?en_label ?description ?en_description ?definition ?en_definition "
      + "WHERE { "
      + "?resource rdf:type rdf:Property. "
      + "OPTIONAL {?resource rdfs:label ?label.} "
      + "OPTIONAL {?resource rdfs:label ?en_label. FILTER langMatches( lang(?en_label), \"EN\" )  } "
      + "OPTIONAL {?resource rdfs:comment ?description.} "
      + "OPTIONAL {?resource rdfs:comment ?en_description. FILTER langMatches( lang(?en_description), \"EN\" )  } "
      + "OPTIONAL {?resource skos:definition ?definition.} "
      + "OPTIONAL {?resource skos:definition ?en_definition. FILTER langMatches( lang(?en_definition), \"EN\" )  } "
      + "FILTER regex(str(?resource), \"^";
  private static final String PROPERTIES_QUERY_P2 = "\")}";

  private Repository getModel(String url,boolean strictlyRdf) throws VocabularyImportException {
    try {
      Any23 runner;
      if(strictlyRdf){
        runner = new Any23("rdf-xml");
      }else{
        runner = new Any23();
      }
      runner.setHTTPUserAgent("google-refine-rdf-extension");
      HTTPClient client = runner.getHTTPClient();
      DocumentSource source = new HTTPDocumentSource(client, url);
      Repository repository = new SailRepository(
          new ForwardChainingRDFSInferencer(new MemoryStore()));
      repository.initialize();
      RepositoryConnection con = repository.getConnection();
      RepositoryWriter w = new RepositoryWriter(con);
      ReportingTripleHandler reporter = new ReportingTripleHandler(w);
      runner.extract(source, reporter);
     
      return repository;
    } catch (Exception e) {
      throw new VocabularyImportException(
          "Unable to import vocabulary from " + url, e);
    }
  }

  protected void getTerms(Repository repos, String name, String uri, List<RDFSClass> classes, List<RDFSProperty> properties) throws VocabularyImportException {
    try {
      RepositoryConnection con = repos.getConnection();
      try {

        TupleQuery query = con.prepareTupleQuery(QueryLanguage.SPARQL,CLASSES_QUERY_P1 + uri + CLASSES_QUERY_P2);
        TupleQueryResult res = query.evaluate();

        Set<String> seen = new HashSet<String>();
        while (res.hasNext()) {
          BindingSet solution = res.next();
          String clazzURI = solution.getValue("resource").stringValue();
          if (seen.contains(clazzURI)) {
            continue;
          }
          seen.add(clazzURI);
          String label = getFirstNotNull(new Value[] {
              solution.getValue("en_label"),
              solution.getValue("label") });
          String description = getFirstNotNull(new Value[] {
              solution.getValue("en_definition"),
              solution.getValue("definition"),
              solution.getValue("en_description"),
              solution.getValue("description") });
          RDFSClass clazz = new RDFSClass(clazzURI, label,
              description, name, uri);
          classes.add(clazz);
        }
       
        query = con.prepareTupleQuery(QueryLanguage.SPARQL,PROPERTIES_QUERY_P1 + uri + PROPERTIES_QUERY_P2);
        res = query.evaluate();
        seen = new HashSet<String>();
        while (res.hasNext()) {
          BindingSet solution = res.next();
          String propertyUri = solution.getValue("resource").stringValue();
          if (seen.contains(propertyUri)) {
            continue;
          }
          seen.add(propertyUri);
          String label = getFirstNotNull(new Value[] {
              solution.getValue("en_label"),
              solution.getValue("label") });
          String description = getFirstNotNull(new Value[] {
              solution.getValue("en_definition"),
              solution.getValue("definition"),
              solution.getValue("en_description"),
              solution.getValue("description") });
          RDFSProperty prop = new RDFSProperty(propertyUri, label,
              description, name, uri);
          properties.add(prop);
        }

      } catch (Exception ex) {
        throw new VocabularyImportException("Error while processing vocabulary retrieved from " + uri, ex);
      } finally {
        con.close();
      }
    } catch (RepositoryException ex) {
      throw new VocabularyImportException("Error while processing vocabulary retrieved from " + uri,ex);
    }
  }
 
  private String getFirstNotNull(Value[] values) {
    String s = null;
    for (int i = 0; i < values.length; i++) {
      s = getString(values[i]);
      if (s != null) {
        break;
      }
    }
    return s;
  }

  private String getString(Value v) {
    if (v != null) {
      return v.stringValue();
    }
    return null;
  }
 
  private boolean faultyContentNegotiation(String uri){
    //we add an exceptional treatment for SKOS as their deployment does not handle Accept header properly
    //SKSO always return HTML if the Accept header contains HTML regardless the other more preferred options
    return uri.equals("http://www.w3.org/2004/02/skos/core#");
  }

}
TOP

Related Classes of org.deri.grefine.rdf.vocab.VocabularyImporter

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.