/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.stanbol.entityhub.query.clerezza;
import static org.apache.stanbol.entityhub.servicesapi.defaults.SpecialFieldEnum.isSpecialField;
import java.text.DateFormat;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.Date;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.Map.Entry;
import org.apache.clerezza.rdf.core.Resource;
import org.apache.clerezza.rdf.core.Triple;
import org.apache.clerezza.rdf.core.TripleCollection;
import org.apache.clerezza.rdf.core.UriRef;
import org.apache.clerezza.rdf.core.impl.util.W3CDateFormat;
import org.apache.stanbol.entityhub.core.utils.AdaptingIterator;
import org.apache.stanbol.entityhub.model.clerezza.RdfRepresentation;
import org.apache.stanbol.entityhub.model.clerezza.RdfValueFactory;
import org.apache.stanbol.entityhub.servicesapi.defaults.DataTypeEnum;
import org.apache.stanbol.entityhub.servicesapi.defaults.NamespaceEnum;
import org.apache.stanbol.entityhub.servicesapi.defaults.SpecialFieldEnum;
import org.apache.stanbol.entityhub.servicesapi.model.Reference;
import org.apache.stanbol.entityhub.servicesapi.model.rdf.RdfResourceEnum;
import org.apache.stanbol.entityhub.servicesapi.query.Constraint;
import org.apache.stanbol.entityhub.servicesapi.query.RangeConstraint;
import org.apache.stanbol.entityhub.servicesapi.query.ReferenceConstraint;
import org.apache.stanbol.entityhub.servicesapi.query.TextConstraint;
import org.apache.stanbol.entityhub.servicesapi.query.ValueConstraint;
import org.apache.stanbol.entityhub.servicesapi.query.TextConstraint.PatternType;
import org.apache.stanbol.entityhub.servicesapi.query.ValueConstraint.MODE;
import org.apache.stanbol.entityhub.servicesapi.util.PatternUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* Utility Class to create SPARQL Queries for {@link SparqlFieldQuery} instances.<p>
* Thanks to ogrisel for pointing me to his <a href="http://hg.nuxeo.org/sandbox/scribo/raw-file/b57ada956947/scribo-annotator-recognizer-sparql-ep/src/main/java/ws/scribo/annotators/recognizer/SparqlEndpointInstanceRecognizer.java">
* SparqlEndpointInstanceRecognizer</a> implementation for the query optimisations
* for Virtuoso and LARQ!
* @author Rupert Westenthaler
*
*/
public final class SparqlQueryUtils {
private static final Logger log = LoggerFactory.getLogger(SparqlQueryUtils.class);
public static enum EndpointTypeEnum {
Standard,
Virtuoso(true),
LARQ,
ARQ;
boolean supportsSparql11SubSelect;
/**
* Default feature set (SPARQL 1.0)
*/
EndpointTypeEnum(){
this(false);
}
/**
* Allows to enable SPARQL 1.1 features
* @param supportsSparql11SubSelect
*/
EndpointTypeEnum(boolean supportsSparql11SubSelect){
this.supportsSparql11SubSelect = supportsSparql11SubSelect;
}
public final boolean supportsSubSelect() {
return supportsSparql11SubSelect;
}
}
private static final String XSD_DATE_TIME = "http://www.w3.org/2001/XMLSchema#dateTime";
private static final DateFormat DATE_FORMAT = new W3CDateFormat();
private static final RdfValueFactory valueFavtory = RdfValueFactory.getInstance();
/**
* {@link UriRef} constant for {@link RdfResourceEnum#queryResult}
* @see RdfResourceEnum.fieldQueryResult
*/
public static final UriRef FIELD_QUERY_RESULT = new UriRef(RdfResourceEnum.queryResult.getUri());
/**
* {@link UriRef} constant for {@link RdfResourceEnum#QueryResultSet}
* @see RdfResourceEnum.FieldQueryResultSet
*/
public static final UriRef FIELD_QUERY_RESULT_SET = new UriRef(RdfResourceEnum.QueryResultSet.getUri());
private SparqlQueryUtils(){}
/**
* Creates a SPARWL CONSTRUCT query that creates triples for all the selected
* fields of representations in the result set. <p>
* In addition the query also constructs <code>entityhub-query:ieldQueryResultSet
* entityhub-query:fieldQueryResult ?representation </code> triples that can be
* used to create an iterator over the results of the query
* @param query the field query
* @param endpointType The type of the Endpoint (used to write optimized
* queries for endpoint type specific extensions
* @param additionalFields This allows to parse additional fields that are
* optionally selected in the data set and added to the CONSTRUCT part
* of the query
* @return the SPARQL CONSTRUCT Query
*/
public static String createSparqlConstructQuery(SparqlFieldQuery query,EndpointTypeEnum endpointType,String...additionalFields){
return createSparqlConstructQuery(query,-1,endpointType,additionalFields);
}
/**
* Creates a SPARWL CONSTRUCT query that creates triples for all the selected
* fields of representations in the result set. <p>
* In addition the query also constructs <code>entityhub-query:ieldQueryResultSet
* entityhub-query:fieldQueryResult ?representation </code> triples that can be
* used to create an iterator over the results of the query
* @param query the field query
* @param limit if a value > 0 is parsed, than this value overwrites the limit
* defined by the query.
* @param endpointType The type of the Endpoint (used to write optimized
* queries for endpoint type specific extensions
* @param additionalFields This allows to parse additional fields that are
* optionally selected in the data set and added to the CONSTRUCT part
* of the query
* @return the SPARQL CONSTRUCT Query
*/
public static String createSparqlConstructQuery(SparqlFieldQuery query,int limit,EndpointTypeEnum endpointType,String...additionalFields){
//1)INIT
final StringBuilder queryString = new StringBuilder();
//clone the query and reconfigure the clone
query = initLocalQuery(query, limit, additionalFields);
final Map<String,String> selectedFields = initSelectedFieldsMap(query);
//2)CONSTRUCT
createConstruct(queryString, selectedFields);
//3)WHERE
queryString.append("WHERE { \n");
addFieldConstraint(queryString, query, selectedFields,endpointType);
queryString.append("} \n");
//5) Limit and Offset
if(!isSubSelectState(endpointType,selectedFields)){
//4)add Entity Ranking (if needed)
addRankingOrder(endpointType, queryString,selectedFields.get(null),"");
addLimit(query.getLimit()!=null?query.getLimit():0, queryString);
addOffset(query, queryString);
}
return queryString.toString();
}
/**
* Determines if the current query uses sub selects. Activated if the
* SPARQL endpoint supports the SPARQL 1.1 sub select feature and the query
* selects more than the enttiy id.
* @param endpoint the used endpoint type
* @param selectedFields the map with the selected fields
* @return the state
*/
private static boolean isSubSelectState(EndpointTypeEnum endpoint, Map<String,String> selectedFields){
return endpoint.supportsSubSelect() && selectedFields.size() > 1;
}
/**
* Creates a clone of the parsed query and applies the parsed limit and
* additional fields
* @param query the query
* @param limit the limit (if > 0)
* @param additionalFields additional fields to select
* @return a clone of the parsed query with the set limit and added fields
*/
private static SparqlFieldQuery initLocalQuery(SparqlFieldQuery query,
int limit,
String... additionalFields) {
query = query.clone();
if(limit > 0){
query.setLimit(limit);
}
//We need a copy to delete all fields that are already covered by some added
//graph pattern.
if(additionalFields!=null && additionalFields.length>0){
query.addSelectedFields(Arrays.asList(additionalFields));
}
return query;
}
/**
* Creates the CONSTRUCT part of the query including the
* <code>entityhub-query:ieldQueryResultSet entityhub-query:fieldQueryResult ?representation </code>
* triples that are used to build the iterator over the results
* @param queryString The query to add the construct fields
* @param selectedFields the field name 2 variable name mapping used by the
* query. This mapping MUST also contain the <code>null</code> key that
* is mapped to the variable name used for the representations to be selected
*/
private static void createConstruct(final StringBuilder queryString, final Map<String, String> selectedFields) {
queryString.append("CONSTRUCT { \n");
String rootVar = selectedFields.get(null);//the null element has the root variable mapping
for(Entry<String,String> mapping :selectedFields.entrySet()){
if(mapping.getKey() != null){
queryString.append(" ?").append(rootVar).append(" <");
queryString.append(mapping.getKey()).append("> ?");
queryString.append(mapping.getValue()).append(" .\n");
}
}
//add the triples for the Representation type
//add the triples that form the result set
queryString.append(" <").append(RdfResourceEnum.QueryResultSet).append("> <");
queryString.append(RdfResourceEnum.queryResult).append("> ?");
queryString.append(rootVar).append(" . \n");
queryString.append("} ");
}
/**
* Creates the SPARQL representation of the parse field query.
* @param query A field query implementation that additionally supports a field
* to variable mapping
* @param endpointType The type of the Endpoint (used to write optimized
* queries for endpoint type specific extensions
* @return the SPARQL query as String
*/
public static String createSparqlSelectQuery(SparqlFieldQuery query,EndpointTypeEnum endpointType) {
return createSparqlSelectQuery(query,true,-1,endpointType);
}
/**
* Creates the SPARQL representation of the parse field query.
* @param query A field query implementation that additionally supports a field
* to variable mapping
* @param limit If > 0, than the limit parsed by the query is overriden by this
* value
* @param endpointType The type of the Endpoint (used to write optimized
* queries for endpoint type specific extensions
* @return the SPARQL query as String
*/
public static String createSparqlSelectQuery(SparqlFieldQuery query,int limit,EndpointTypeEnum endpointType) {
return createSparqlSelectQuery(query,true,limit,endpointType);
}
/**
* Creates the SPARQL representation of the parse field query.
* @param query A field query implementation that additionally supports a field
* to variable mapping
* @param includeFields if <code>false</code> only the root is selected (selected fields are ignored)
* @param endpointType The type of the Endpoint (used to write optimized
* queries for endpoint type specific extensions
* @return the SPARQL query as String
*/
public static String createSparqlSelectQuery(SparqlFieldQuery query,boolean includeFields, EndpointTypeEnum endpointType) {
return createSparqlSelectQuery(query,includeFields,-1,endpointType);
}
/**
* Creates the SPARQL representation of the parse field query.
* @param query A field query implementation that additionally supports a field
* to variable mapping
* @param includeFields if <code>false</code> only the root is selected (selected fields are ignored)
* @param limit if > 0 than the limit defined by the query is overridden by
* the parsed value
* @param endpointType The type of the Endpoint (used to write optimized
* queries for endpoint type specific extensions
* @return the SPARQL query as String
*/
public static String createSparqlSelectQuery(SparqlFieldQuery query,boolean includeFields,int limit, EndpointTypeEnum endpointType) {
//1) INIT
final StringBuilder queryString = new StringBuilder();
query = initLocalQuery(query, limit);
final Map<String,String> selectedFields = initSelectedFieldsMap(query);
//2) SELECT
createSelect(queryString, includeFields, selectedFields);
//3) WHERE
queryString.append("WHERE { \n");
addFieldConstraint(queryString, query, selectedFields,endpointType);
queryString.append("} \n");
if(!isSubSelectState(endpointType, selectedFields)){
//4) Add Stuff to rank results based on the "page rank" of entities
addRankingOrder(endpointType, queryString,selectedFields.get(null),"");
//5) Limit and Offset
addLimit(query.getLimit()!=null?query.getLimit():0, queryString);
addOffset(query, queryString);
}
return queryString.toString();
}
/**
* Initialise the field -> variable name mappings including the root
* variable name by adding <code>null</code> as key
* @param query the query
* @return the mappings
*/
private static Map<String,String> initSelectedFieldsMap(SparqlFieldQuery query) {
//We need a copy to delete all fields that are already covered by some added
//graph pattern.
final Map<String,String> selectedFields = new HashMap<String, String>();
selectedFields.putAll(query.getFieldVariableMappings());
//also add the root variable
selectedFields.put(null, query.getRootVariableName());
return selectedFields;
}
/**
* For some {@link EndpointTypeEnum SPARQL endpoint types} we need to add
* an additional constraint to determine the ranking information based on
* incomming relations to the Entities.<p>
* Currently this done for {@link EndpointTypeEnum#ARQ ARQ} and
* {@link EndpointTypeEnum#LARQ LARQ}.
* @param endpointType the endpoint type
* @param queryString the SPARQL query string to add the ranking constraint
* @param rootFieldName the variable name used to select entities
*/
private static void addRankingConstraints(EndpointTypeEnum endpointType, final StringBuilder queryString, final String rootFieldName) {
if(endpointType == EndpointTypeEnum.ARQ || endpointType == EndpointTypeEnum.LARQ){
queryString.append(String.format(" { ?incoming ?p ?%s . } \n",rootFieldName));
} //else ... for Virtuoso we need not count incoming links, because it has a
//page rank like feature we can use to rank entities!
//all others do not support sorting
}
/**
* @param endpointType
* @param queryString
*/
private static void addRankingOrder(EndpointTypeEnum endpointType, final StringBuilder queryString,String rootVarName,String intend) {
if(endpointType == EndpointTypeEnum.Virtuoso){
//is that still SPARQL ... wondering about the syntax ^
queryString.append(String.format("%sORDER BY DESC ( <LONG::IRI_RANK> (?%s) ) \n",
intend!=null?intend:"",rootVarName));
} else if(endpointType == EndpointTypeEnum.ARQ || endpointType == EndpointTypeEnum.LARQ){
//TODO: COUNT is not part of the SPARQL 1.0 specification!
// see http://www.w3.org/2009/sparql/wiki/Feature:AggregateFunctions
queryString.append(String.format("%sORDER BY DESC (COUNT (?incoming) ) \n",
intend!=null?intend:""));
} //else not supported ... add nothing
}
/**
* @param query
* @param queryString
*/
private static void addOffset(SparqlFieldQuery query, final StringBuilder queryString) {
if(query.getOffset() > 0){
queryString.append(String.format("OFFSET %d \n",query.getOffset()));
}
}
/**
* @param query
* @param queryString
*/
private static void addLimit(Integer limit, final StringBuilder queryString) {
if(limit != null && limit > 0){
queryString.append(String.format("LIMIT %d \n", limit));
}
}
/**
* Adds the SELECT part to the SPARQL query
* @param queryString
* @param query
* @param includeFields
* @param selectedFields
*/
private static void createSelect(final StringBuilder queryString, boolean includeFields, final Map<String, String> selectedFields) {
queryString.append("SELECT DISTINCT");
// REMOVED: The root variable is already in the selected fields map!
// queryString.append(" ?"+query.getRootVariableName()); //select the representation ID
//now the variables for the selected fields!
if(includeFields){
for(String varName : selectedFields.values()){
queryString.append(" ?");
queryString.append(varName);
}
} else {
//else add only the root variable (stored under key null)
queryString.append(" ?");
queryString.append(selectedFields.get(null));
}
queryString.append(" \n");
}
/**
* Adds the WHERE clause of the SPARQL query. <p>
* If the {@link EndpointTypeEnum SPARQL endpoint} supports SPARQL 1.1
* subqueries, than this adds also the LIMIT and OFFSET to in inner SELECT
* that only selects the id.
* @param queryString the SPARQL query string to add the WHERE
* @param query the query
* @param selectedFields the selected fields
* @param endpointType The type of the endpoint (used to write optimised
* queries for endpoint type specific extensions
*/
private static void addFieldConstraint(final StringBuilder queryString, SparqlFieldQuery query, Map<String, String> selectedFields,EndpointTypeEnum endpointType) {
//we need temporary variables with unique names
String varPrefix = "tmp";
int[] varNum = new int []{1};
//used to open brackets for the select part of the constraints
boolean first = true;
//determine if sub-selects are supported and if we need a sub-select
//(more than the id is selected)
boolean subSelectState = isSubSelectState(endpointType, selectedFields);
//if we uses a sub query to select the ids, we need to add the graph pattern
//of all selected fields outside of the sub query
Map<String,String> tmpSelectedFields = subSelectState ?
new HashMap<String,String>(selectedFields) :
null;
String intend;
if(subSelectState){
intend = " "; //additional intend because of sub query (3*2)
} else {
intend = " "; //normal intend (2*2)
}
for(Entry<String,Constraint> fieldConstraint : query){
if(first){
queryString.append(" { \n");
if(subSelectState){
String rootVarName = selectedFields.get(null);
queryString.append(" SELECT ?").append(rootVarName).append(" \n");
queryString.append(" WHERE { \n");
}
first = false;
}
String field = fieldConstraint.getKey();
Constraint constraint = fieldConstraint.getValue();
switch (constraint.getType()) {
case value:
addValueConstraint(queryString,field, (ValueConstraint)constraint,selectedFields, varPrefix,varNum,intend);
break;
case text:
String var = addFieldGraphPattern(queryString, field, selectedFields, varPrefix, varNum,intend);
addTextConstraint(queryString, var, (TextConstraint)constraint,endpointType,intend);
break;
case range:
var = addFieldGraphPattern(queryString, field, selectedFields, varPrefix, varNum,intend);
addRangeConstriant(queryString,var,(RangeConstraint)constraint,intend);
break;
default:
log.warn("Please update this Implementation to support the Constraint Type "+fieldConstraint.getValue().getType());
break;
}
queryString.append(" . \n");
}
//for some endpoints we need to add an additional constraints used for
//ranking. If sub-queries are used this need to be in the select part
//of the query (to rank results of the inner query)
//otherwise it is better to have it in outside if the select part to only
//rank the graph selected by the query
if(subSelectState){
addRankingConstraints(endpointType, queryString, selectedFields.get(null));
}
if(!first){
if(subSelectState){
queryString.append(" } \n");
//re-add all selected fields to be added as selects because in
//the sub-query we only select the ID!
selectedFields = tmpSelectedFields;
//ranking needs also to be added to the sub-query (to correctly
//process LIMIT and OFFSET
addRankingOrder(endpointType, queryString,selectedFields.get(null)," ");
//add LIMIT and OFFSET to the sub-query!
//TODO: add link to the email
queryString.append(" ");
addLimit(query.getLimit(), queryString);
queryString.append(" ");
addOffset(query, queryString);
queryString.append(" ");
}
queryString.append("} \n");
}
//All the followig Graphpattern are only processed for the parts selected
//by the above constraints
//if no subqueries are used we need now to add the ranking constraints
if(!subSelectState){
addRankingConstraints(endpointType, queryString, selectedFields.get(null));
}
//we need to add graph pattern for selected field that are not covered by
//graph pattern written for the constraint.
//Implementation Note: selectedFields contains the null key for the root variable
while(selectedFields.size()>1){ //if this is the only left element we are done
Iterator<String> it = selectedFields.keySet().iterator();
String actField; //we need to get a non null value from the map
do {
//the outer while ensures an non null value so we need not to use hasNext
actField = it.next();
} while(actField == null);
queryString.append(" OPTIONAL { ");
// NOTE the following Method removes the written mapping from the Map
addFieldGraphPattern(queryString, actField, selectedFields, varPrefix, varNum,"");
queryString.append(". } \n");
}
}
private static void addValueConstraint(StringBuilder queryString,String field,ValueConstraint constraint,Map<String, String> selectedFields,String varPrefix,int[] varNum,String intend){
String rootVarName = selectedFields.get(null);
Collection<String> dataTypes = constraint.getDataTypes();
if(dataTypes == null){
//we need not to distinguish between empty and null
//because SPARQL includes support for automatic detection of dataTypes
//see http://www.w3.org/TR/rdf-sparql-query/#QSynLiterals
dataTypes = Collections.emptySet();
}
if(constraint.getValues() != null){
if(dataTypes.size()<=1){
addDataTypeValueConstraint(queryString, rootVarName, field,
dataTypes.isEmpty()?null:dataTypes.iterator().next(),
constraint.getValues(),constraint.getMode(),varPrefix,varNum,intend);
} else { //we have multiple dataTypes -> need to use union!
boolean first = true;
for(Iterator<String> it = dataTypes.iterator();it.hasNext();){
String dataType = it.next();
if(first){
queryString.append('{');
first = false;
} else {
queryString.append("} UNION {\n");
}
addDataTypeValueConstraint(queryString, rootVarName, field, dataType, constraint.getValues(),constraint.getMode(),varPrefix,varNum,intend);
}
queryString.append('}');
}
} else { // no constraint for the value
// filter all instances that define any value for the given dataTypes
// see http://www.w3.org/TR/rdf-sparql-query/#func-datatype
//first we need to select the Variable to filter
String var = addFieldGraphPattern(queryString, field, selectedFields, varPrefix, varNum,intend);
queryString.append(". \n").append(intend);
//now we need to write the filter
if(dataTypes.size()==1){
addDataTypeFilter(queryString, var, dataTypes.iterator().next());
} else {
boolean first = true;
for(Iterator<String> it = dataTypes.iterator();it.hasNext();) {
String dataType = it.next();
if(first){
queryString.append("( \n ").append(intend);
first = false;
} else{
queryString.append(" || \n ").append(intend);
}
addDataTypeFilter(queryString, var, dataType);
}
queryString.append(" \n").append(intend).append(")");
}
}
}
/**
* Adds a filter that restricts the data type to an variable
* @param queryString the query String to add the filter. MUST NOT be <code>null</code>
* @param var the variable to add the filter. MUST NOT be <code>null</code>
* @param dataTypes the data type uri for the filter. MUST NOT be <code>null</code>
*/
private static void addDataTypeFilter(StringBuilder queryString, String var, String dataType) {
queryString.append(String.format("FILTER(datatype(?%s) = <%s>)",
var,dataType));
}
/**
* Adds a value constraint for a field including the dataType
* @param queryString the query string to add the constraint. MUST NOT be <code>null</code>
* @param rootVarName the variable name of the subject. MUST NOT be <code>null</code>
* @param field the property name of the field. MUST NOT be <code>null</code>
* @param dataType the dataType constraint or <code>null</code> if none
* @param value the value. MUST NOT be <code>null</code>.
*/
private static void addDataTypeValueConstraint(StringBuilder queryString, String rootVarName, String field, String dataType, Collection<Object> values,MODE mode, String varPrefix,int[] varNum,String intend) {
String addIntend = intend;
queryString.append(intend);
if(values.size() > 1){
queryString.append("{ ");
addIntend = intend+" ";
}
boolean first = true;
for(Object value : values){
if(first){
if(mode == MODE.any){
queryString.append('{');
}
first = false;
} else {
if(mode == MODE.any){
queryString.append("} UNION {\n");
} else {
queryString.append(" .\n");
}
queryString.append(addIntend);
}
String fieldVar;
if(isSpecialField(field)){
//in case of a special field replace the field URI with an
//variable to allow searching all outgoing properties
fieldVar = varPrefix+varNum[0];
varNum[0]++;
} else {
fieldVar = null;
}
if(DataTypeEnum.Reference.getUri().equals(dataType) ||
value instanceof Reference){
if(fieldVar != null){
queryString.append(String.format("?%s ?%s <%s>",
rootVarName,fieldVar,value));
} else {
queryString.append(String.format("?%s <%s> <%s>",
rootVarName,field,value));
}
} else {
if(fieldVar != null){
queryString.append(String.format("?%s ?%s \"%s\"%s",
rootVarName,fieldVar,value,
dataType!=null?String.format("^^<%s>",dataType):""));
} else {
queryString.append(String.format("?%s <%s> \"%s\"%s",
rootVarName,field,value,
dataType!=null?String.format("^^<%s>",dataType):""));
}
}
}
if(values.size() > 1){
if(mode == MODE.any){ //close the union
queryString.append('}');
}
queryString.append(" }");
}
}
/**
* Adds an text constraint to the SPARQL query string
* @param queryString the query string to add the constraint
* @param var the variable name to constraint
* @param constraint the constraint
* @param endpointType The type of the Endpoint (used to write optimized
* queries for endpoint type specific extensions
*/
private static void addTextConstraint(StringBuilder queryString,String var,TextConstraint constraint,EndpointTypeEnum endpointType,String intend){
boolean filterAdded = false;
boolean isTextValueConstraint = constraint.getTexts() != null && !constraint.getTexts().isEmpty();
if(isTextValueConstraint){
if(constraint.getPatternType() == PatternType.regex){
queryString.append(" \n").append(intend).append(" FILTER(");
filterAdded = true;
addRegexFilter(queryString,var,constraint.getTexts(),constraint.isCaseSensitive());
} else {
//TODO: This optimised versions for Virtuoso and LARQ might not
// respect case sensitive queries. Need more testing!
if (EndpointTypeEnum.Virtuoso == endpointType) {
queryString.append(". \n ").append(intend);
queryString.append(String.format("?%s bif:contains '%s'",
var,createFullTextQueryString(constraint.getTexts())));
} else if (EndpointTypeEnum.LARQ == endpointType) {
queryString.append(". \n ").append(intend);
queryString.append(String.format("?%s <http://jena.hpl.hp.com/ARQ/property#textMatch> '%s'",
var, createFullTextQueryString(constraint.getTexts())));
} else {
queryString.append(" \n").append(intend).append(" FILTER(");
filterAdded = true;
if(constraint.getPatternType() == PatternType.none){
if(constraint.isCaseSensitive()){
boolean first = true;
for(String textConstraint : constraint.getTexts()){
if(first){
first = false;
} else {
queryString.append(" || ");
}
if(textConstraint != null && !textConstraint.isEmpty()){
queryString.append(String.format("(str(?%s) = \"%s\")", var,textConstraint));
}
}
} else {
Collection<String> regexQueryTexts = new ArrayList<String>(constraint.getTexts().size());
for(String textConstraint : constraint.getTexts()){
if(textConstraint != null && !textConstraint.isEmpty()){
regexQueryTexts.add(PatternUtils.value2Regex(textConstraint));
}
}
addRegexFilter(queryString,var,regexQueryTexts,constraint.isCaseSensitive());
}
} else if(constraint.getPatternType() == PatternType.wildcard){
//parse false, because that is more in line with the expectations of users!
Collection<String> regexQueryTexts = new ArrayList<String>(constraint.getTexts().size());
for(String textConstraint : constraint.getTexts()){
if(textConstraint != null && !textConstraint.isEmpty()){
regexQueryTexts.add(PatternUtils.wildcardToRegex(textConstraint,false));
}
}
addRegexFilter(queryString,var,regexQueryTexts,constraint.isCaseSensitive());
} else {
log.warn("Unspported Patterntype "+constraint.getPatternType()+"! Change this impplementation to support this type! -> treat constaint \""+constraint.getTexts()+"\"as REGEX");
addRegexFilter(queryString,var,constraint.getTexts(),constraint.isCaseSensitive());
}
}
}
} //else nothing to do
//add language Filters
//TODO check if FILTER ( is already written!
if(constraint.getLanguages() != null && !constraint.getLanguages().isEmpty()){
if(!filterAdded){
queryString.append(" . \n").append(intend).append(" FILTER(");
filterAdded = true;
writeLanguagesFilter(queryString, constraint.getLanguages(), var,null);
} else {
writeLanguagesFilter(queryString, constraint.getLanguages(), var," && ");
}
}
if(filterAdded){
queryString.append(")"); //close the FILTER and the graph pattern
}
}
/**
* (Creates AND Text) OR (Query AND String) like queries based on the
* parsed TextConstraint as used by {@link EndpointTypeEnum#LARQ LARQ} and
* {@link EndpointTypeEnum#Virtuoso VIRTUOSO} SPARQL endpoints to speed up
* full text queries.
* @param constraints the as returned by {@link TextConstraint#getTexts()}
* @return the full text query string
*/
protected static String createFullTextQueryString(Collection<String> constraints) {
StringBuilder textQuery = new StringBuilder();
boolean firstText = true;
for(String constraintText : constraints){
if(constraintText != null && !constraintText.isEmpty()){
if(firstText){
firstText = false;
} else {
textQuery.append(" OR ");
}
//TODO: maybe we should use a word tokenizer here
String[] words = constraintText.split("\\s");
if(words.length>1){
//not perfect because words might contain empty string, but
//it will eliminate most unnecessary brackets .
textQuery.append('(');
}
boolean firstAndWord = true;
for(String word : words){
word = word.trim();
boolean hasAlphaNumeric = false;
for(int i = 0; i < word.length() && !hasAlphaNumeric;i++){
char ch = word.charAt(i);
if(Character.isLetter(ch) || Character.isDigit(ch)){
hasAlphaNumeric = true;
}
}
if(hasAlphaNumeric){
if(firstAndWord){
firstAndWord = false;
} else {
textQuery.append(" AND ");
}
// we need to double the backslashes because of replaceAll takes a regular expression
// as input.
String escapedWord = word.replaceAll("\\\"", "\\\\\"");
textQuery.append('"').append(escapedWord).append('"');
}
}
if (words.length > 1) {
textQuery.append(')');
}
} //end if not null and not empty
}
return textQuery.toString();
}
/**
* Adds a SPARQL regex filter to the parsed query string
* @param queryString the string builder to add the constraint
* @param var the variable to constrain
* @param regexContraints the regex encoded search strings (connected with '||' (OR))
* @param isCasesensitive if the constraint is case sensitive or not
*/
private static void addRegexFilter(StringBuilder queryString, String var, Collection<String> regexContraints,boolean isCasesensitive) {
boolean first = true;
for(String regex : regexContraints){
if(regex != null && !regex.isEmpty()){
if(first){
first = false;
} else {
queryString.append(" || ");
}
queryString.append(String.format("regex(str(?%s),\"%s\"%s)", var,regex,isCasesensitive?"":",\"i\""));
}
}
}
/**
* Adds an RangeConstraint to the parsed query String
* @param queryString the query to add the constraint
* @param var the variable to constrain
* @param constraint the constraint
*/
private static void addRangeConstriant(StringBuilder queryString, String var, RangeConstraint constraint,String intend) {
queryString.append("\n").append(intend).append("FILTER "); //start the FILTER
boolean closedRange = constraint.getLowerBound() != null && constraint.getUpperBound() != null;
if(closedRange){
queryString.append("(");
}
//write lower and upper bounds
if(constraint.getLowerBound()!= null){
addRangeBound(queryString, var, true, constraint.isInclusive(), constraint.getLowerBound());
}
if(closedRange){
queryString.append(" && ");
}
if(constraint.getUpperBound() != null){
addRangeBound(queryString, var, false, constraint.isInclusive(), constraint.getUpperBound());
}
if(closedRange){
queryString.append(")");
}
}
/**
* Adds a lower/upper bound constraint to the query String
* @param queryString the query string
* @param var the variable
* @param lowerBound <code>true</code> to add the lower bound and
* <code>false</code> to add the upper bound
* @param inclusive if the bound is inclusive (>= or <=)
* @param value the value representing the bound.
*/
private static void addRangeBound(StringBuilder queryString, String var, boolean lowerBound, boolean inclusive, Object value) {
String stringValue;
if(value instanceof Date){//for dates add the data type!
stringValue = String.format("%s^^<%s>", DATE_FORMAT.format((Date)value),XSD_DATE_TIME);
} else { //add additional "if" for special types if necessary
stringValue =value.toString();
}
//adds (?var >/<[=] valueString)
queryString.append(String.format("(?%s %c%s %s)", var,lowerBound?'>':'<',inclusive?"=":"",stringValue));
}
/**
* Adds a S P O pattern to the query by using the root as subject, the parsed
* field as predicate and the returned variable as object. This method doese
* not open a '{' nor close the pattern with any of '.', ',' or ';'
* @param queryString the {@link StringBuilder} to add the pattern
* @param field the field
* @param selectedFields the map field -> var of the selected variables. If
* the parsed field is selected, the field is removed from the list and
* the mapped variable name is returned
* @param varPrefix the default prefix for newly created variable names
* @param varNum The first element of the array is used to get the number of
* the created variable. If one is created the value of the first element is
* increased by one
* @return The variable name used for the object of the pattern
*/
private static String addFieldGraphPattern(StringBuilder queryString, String field, Map<String, String> selectedFields, String varPrefix, int[] varNum,String intend) {
String var = selectedFields.remove(field); //check if the field is selected
if(var == null){ //this field is not selected
//we need to generate a temp var
var = varPrefix+varNum[0];
varNum[0]++;
}
if(isSpecialField(field)){
//in case of a special field replace the field URI with an
//variable to allow searching all outgoing properties
String fieldVar = varPrefix+varNum[0];
varNum[0]++;
queryString.append(String.format("%s?%s ?%s ?%s ",
intend,selectedFields.get(null),fieldVar,var));
} else {
queryString.append(String.format("%s?%s <%s> ?%s ",
intend,selectedFields.get(null),field,var));
}
return var;
}
/**
* Writes the SPARQL FILTER for the parsed languages.
* This Method writes
* <code><pre>
* prefix ((lang(?var) = "lang1") [|| (lang(?var) = "lang2..n")])
* </pre></code>
* @param queryString the query string to add the FILTER
* @param languages the languages to filter for (may contain <code>null</code> as element)
* @param var the name of the variable to filter.
* @param prefix The prefix is written in front of the filter expression (if any is created).
* Typically this will be <ul>
* <li> FILTER if this is the only filter for an variable
* <li> && if this filter is combined with AND to an other filter or
* <li> || if this filter is combined wit OR to an other filter
* </ul>
*/
private static void writeLanguagesFilter(StringBuilder queryString, Collection<String> languages, String var, String prefix) {
if(languages != null && !languages.isEmpty()){
if(prefix != null){
queryString.append(prefix);
}
if(languages.size()>1){
queryString.append("(");
}
boolean first = true;
for(String language : languages){
if(first){
first = false;
} else {
queryString.append(" || ");
}
queryString.append(String.format("(lang(?%s) = \"%s\")", var,language!=null?language:""));
/*
* NOTE: the lang() returns "" for literals without an language
* tag. Because of that if the language == null we need
* to parse "" as an argument
*/
}
if(languages.size()>1){
queryString.append(")");
}
}
}
public static void main(String[] args) {
SparqlFieldQuery query = SparqlFieldQueryFactory.getInstance().createFieldQuery();
// query.setConstraint("urn:field1", new ReferenceConstraint("urn:testReference"));
// query.setConstraint("urn:field1", new ReferenceConstraint(
// Arrays.asList("urn:testReference","urn:testReference1","urn:testReference3"),MODE.any));
// query.setConstraint(SpecialFieldEnum.references.getUri(), new ReferenceConstraint(
// Arrays.asList("urn:testReference","urn:testReference1","urn:testReference3")));
// query.setConstraint("urn:field1a", new ValueConstraint(null, Arrays.asList(
// DataTypeEnum.Float.getUri())));
// query.addSelectedField("urn:field1a");
// query.setConstraint("urn:field1b", new ValueConstraint(9, Arrays.asList(
// DataTypeEnum.Float.getUri())));
// query.setConstraint("urn:field1b", new ValueConstraint(Arrays.asList(9,10,11), Arrays.asList(
// DataTypeEnum.Float.getUri()),MODE.any));
// query.setConstraint("urn:field1c", new ValueConstraint(null, Arrays.asList(
// DataTypeEnum.Float.getUri(),DataTypeEnum.Double.getUri(),DataTypeEnum.Decimal.getUri())));
// query.addSelectedField("urn:field1c");
// query.setConstraint("urn:field1d", new ValueConstraint(9, Arrays.asList(
// DataTypeEnum.Float.getUri(),DataTypeEnum.Double.getUri(),DataTypeEnum.Decimal.getUri())));
// query.setConstraint("urn:field1d", new ValueConstraint(Arrays.asList(9,10,11), Arrays.asList(
// DataTypeEnum.Float.getUri(),DataTypeEnum.Double.getUri(),DataTypeEnum.Decimal.getUri())));
// query.setConstraint("urn:field2", new TextConstraint("test value"));
// query.setConstraint("urn:field3", new TextConstraint(Arrays.asList(
// "text value","anothertest","some more values"),true));
// query.setConstraint(SpecialFieldEnum.fullText.getUri(), new TextConstraint(Arrays.asList(
// "text value","anothertest","some more values"),true));
// query.setConstraint("urn:field2a", new TextConstraint(":-]")); //tests escaping of REGEX
// query.setConstraint("urn:field3", new TextConstraint("language text","en"));
// query.setConstraint("urn:field4", new TextConstraint("multi language text","en","de",null));
// query.setConstraint("urn:field5", new TextConstraint("wildcar*",PatternType.wildcard,false,"en","de"));
// query.addSelectedField("urn:field5");
// query.setConstraint("urn:field6", new TextConstraint("^regex",PatternType.REGEX,true));
// query.setConstraint("urn:field7", new TextConstraint("par*",PatternType.WildCard,false,"en","de",null));
// query.setConstraint("urn:field8", new TextConstraint(null,"en","de",null));
// query.setConstraint("urn:field9", new RangeConstraint((int)5, (int)10, true));
// query.setConstraint("urn:field10", new RangeConstraint((int)5, (int)10, false));
// query.setConstraint("urn:field11", new RangeConstraint(null, (int)10, true));
// query.setConstraint("urn:field12", new RangeConstraint((int)5, null, true));
// query.setConstraint("urn:field12", new RangeConstraint(new Date(), null, true));
query.addSelectedField("urn:field2a");
query.addSelectedField("urn:field3");
query.setLimit(5);
query.setOffset(5);
System.out.println(createSparqlSelectQuery(query,true,0,EndpointTypeEnum.LARQ));
System.out.println();
System.out.println(createSparqlSelectQuery(query,true,0,EndpointTypeEnum.Virtuoso));
System.out.println();
System.out.println(createSparqlSelectQuery(query,true,0,EndpointTypeEnum.Standard));
System.out.println();
System.out.println(createSparqlConstructQuery(query,0,EndpointTypeEnum.Virtuoso));
}
/**
* @param query
* @param resultGraph
* @return
*/
public static Iterator<RdfRepresentation> parseQueryResultsFromMGraph(final TripleCollection resultGraph) {
Iterator<Triple> resultTripleIterator = resultGraph.filter(FIELD_QUERY_RESULT_SET,FIELD_QUERY_RESULT,null);
Iterator<RdfRepresentation> resultIterator = new AdaptingIterator<Triple, RdfRepresentation>(
resultTripleIterator,new AdaptingIterator.Adapter<Triple, RdfRepresentation>() {
/*
* Anonymous implementation of an Adapter that converts the filtered
* Triples of the resulting graph to RdfRepresentations
*/
@Override
public RdfRepresentation adapt(Triple value, Class<RdfRepresentation> type) {
Resource object = value.getObject();
if(object == null){
return null;
} else if(object instanceof UriRef){
return valueFavtory.createRdfRepresentation((UriRef)object, resultGraph);
} else {
log.warn("Unable to create representation for FieldQueryResult "+object+" because this Resource is not of Type UriRef (type: "+object.getClass()+") -> result gets ignored");
return null;
}
}
},RdfRepresentation.class);
return resultIterator;
}
}