Package org.apache.stanbol.entityhub.query.clerezza

Source Code of org.apache.stanbol.entityhub.query.clerezza.SparqlQueryUtils

/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements.  See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.stanbol.entityhub.query.clerezza;

import static org.apache.stanbol.entityhub.servicesapi.defaults.SpecialFieldEnum.isSpecialField;

import java.text.DateFormat;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.Date;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.Map.Entry;

import org.apache.clerezza.rdf.core.Resource;
import org.apache.clerezza.rdf.core.Triple;
import org.apache.clerezza.rdf.core.TripleCollection;
import org.apache.clerezza.rdf.core.UriRef;
import org.apache.clerezza.rdf.core.impl.util.W3CDateFormat;
import org.apache.stanbol.entityhub.model.clerezza.RdfRepresentation;
import org.apache.stanbol.entityhub.model.clerezza.RdfValueFactory;
import org.apache.stanbol.entityhub.servicesapi.defaults.DataTypeEnum;
import org.apache.stanbol.entityhub.servicesapi.defaults.NamespaceEnum;
import org.apache.stanbol.entityhub.servicesapi.defaults.SpecialFieldEnum;
import org.apache.stanbol.entityhub.servicesapi.model.Reference;
import org.apache.stanbol.entityhub.servicesapi.model.rdf.RdfResourceEnum;
import org.apache.stanbol.entityhub.servicesapi.query.Constraint;
import org.apache.stanbol.entityhub.servicesapi.query.RangeConstraint;
import org.apache.stanbol.entityhub.servicesapi.query.ReferenceConstraint;
import org.apache.stanbol.entityhub.servicesapi.query.TextConstraint;
import org.apache.stanbol.entityhub.servicesapi.query.ValueConstraint;
import org.apache.stanbol.entityhub.servicesapi.query.TextConstraint.PatternType;
import org.apache.stanbol.entityhub.servicesapi.query.ValueConstraint.MODE;
import org.apache.stanbol.entityhub.servicesapi.util.AdaptingIterator;
import org.apache.stanbol.entityhub.servicesapi.util.PatternUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
* Utility Class to create SPARQL Queries for {@link SparqlFieldQuery} instances.
* <p>
* Thanks to ogrisel for pointing me to his <a href=
* "http://hg.nuxeo.org/sandbox/scribo/raw-file/b57ada956947/scribo-annotator-recognizer-sparql-ep/src/main/java/ws/scribo/annotators/recognizer/SparqlEndpointInstanceRecognizer.java"
* > SparqlEndpointInstanceRecognizer</a> implementation for the query optimisations for Virtuoso and LARQ!
*
* @author Rupert Westenthaler
*
*/
public final class SparqlQueryUtils {

    private static final Logger log = LoggerFactory.getLogger(SparqlQueryUtils.class);

    public static enum EndpointTypeEnum {
        Standard,
        Virtuoso(true),
        LARQ,
        ARQ;
        boolean supportsSparql11SubSelect;

        /**
         * Default feature set (SPARQL 1.0)
         */
        EndpointTypeEnum() {
            this(false);
        }

        /**
         * Allows to enable SPARQL 1.1 features
         *
         * @param supportsSparql11SubSelect
         */
        EndpointTypeEnum(boolean supportsSparql11SubSelect) {
            this.supportsSparql11SubSelect = supportsSparql11SubSelect;
        }

        public final boolean supportsSubSelect() {
            return supportsSparql11SubSelect;
        }
    }

    private static final String XSD_DATE_TIME = "http://www.w3.org/2001/XMLSchema#dateTime";
    private static final DateFormat DATE_FORMAT = new W3CDateFormat();
    private static final RdfValueFactory valueFavtory = RdfValueFactory.getInstance();
    /**
     * {@link UriRef} constant for {@link RdfResourceEnum#queryResult}
     *
     * @see RdfResourceEnum.fieldQueryResult
     */
    public static final UriRef FIELD_QUERY_RESULT = new UriRef(RdfResourceEnum.queryResult.getUri());
    /**
     * {@link UriRef} constant for {@link RdfResourceEnum#QueryResultSet}
     *
     * @see RdfResourceEnum.FieldQueryResultSet
     */
    public static final UriRef FIELD_QUERY_RESULT_SET = new UriRef(RdfResourceEnum.QueryResultSet.getUri());

    private SparqlQueryUtils() {}

    /**
     * Creates a SPARWL CONSTRUCT query that creates triples for all the selected fields of representations in
     * the result set.
     * <p>
     * In addition the query also constructs <code>entityhub-query:ieldQueryResultSet
     * entityhub-query:fieldQueryResult ?representation </code> triples that can be used to create an iterator
     * over the results of the query
     *
     * @param query
     *            the field query
     * @param endpointType
     *            The type of the Endpoint (used to write optimized queries for endpoint type specific
     *            extensions
     * @param additionalFields
     *            This allows to parse additional fields that are optionally selected in the data set and
     *            added to the CONSTRUCT part of the query
     * @return the SPARQL CONSTRUCT Query
     */
    public static String createSparqlConstructQuery(SparqlFieldQuery query,
                                                    EndpointTypeEnum endpointType,
                                                    String... additionalFields) {
        return createSparqlConstructQuery(query, -1, endpointType, additionalFields);
    }

    /**
     * Creates a SPARWL CONSTRUCT query that creates triples for all the selected fields of representations in
     * the result set.
     * <p>
     * In addition the query also constructs <code>entityhub-query:ieldQueryResultSet
     * entityhub-query:fieldQueryResult ?representation </code> triples that can be used to create an iterator
     * over the results of the query
     *
     * @param query
     *            the field query
     * @param limit
     *            if a value > 0 is parsed, than this value overwrites the limit defined by the query.
     * @param endpointType
     *            The type of the Endpoint (used to write optimized queries for endpoint type specific
     *            extensions
     * @param additionalFields
     *            This allows to parse additional fields that are optionally selected in the data set and
     *            added to the CONSTRUCT part of the query
     * @return the SPARQL CONSTRUCT Query
     */
    public static String createSparqlConstructQuery(SparqlFieldQuery query,
                                                    int limit,
                                                    EndpointTypeEnum endpointType,
                                                    String... additionalFields) {
        // 1)INIT
        final StringBuilder queryString = new StringBuilder();
        // clone the query and reconfigure the clone
        query = initLocalQuery(query, limit, additionalFields);
        final Map<String,String> selectedFields = initSelectedFieldsMap(query);
        // 2)CONSTRUCT
        createConstruct(queryString, selectedFields);
        // 3)WHERE
        queryString.append("WHERE { \n");
        addFieldConstraint(queryString, query, selectedFields, endpointType);
        queryString.append("} \n");
        // 5) Limit and Offset
        if (!isSubSelectState(endpointType, selectedFields)) {
            // 4)add Entity Ranking (if needed)
            addRankingOrder(endpointType, queryString, selectedFields.get(null), "");
            addLimit(query.getLimit() != null ? query.getLimit() : 0, queryString);
            addOffset(query, queryString);
        }
        return queryString.toString();
    }

    /**
     * Determines if the current query uses sub selects. Activated if the SPARQL endpoint supports the SPARQL
     * 1.1 sub select feature and the query selects more than the enttiy id.
     *
     * @param endpoint
     *            the used endpoint type
     * @param selectedFields
     *            the map with the selected fields
     * @return the state
     */
    private static boolean isSubSelectState(EndpointTypeEnum endpoint, Map<String,String> selectedFields) {
        return endpoint.supportsSubSelect() && selectedFields.size() > 1;
    }

    /**
     * Creates a clone of the parsed query and applies the parsed limit and additional fields
     *
     * @param query
     *            the query
     * @param limit
     *            the limit (if &gt; 0)
     * @param additionalFields
     *            additional fields to select
     * @return a clone of the parsed query with the set limit and added fields
     */
    private static SparqlFieldQuery initLocalQuery(SparqlFieldQuery query,
                                                   int limit,
                                                   String... additionalFields) {
        query = query.clone();
        if (limit > 0) {
            query.setLimit(limit);
        }
        // We need a copy to delete all fields that are already covered by some
        // added
        // graph pattern.
        if (additionalFields != null && additionalFields.length > 0) {
            query.addSelectedFields(Arrays.asList(additionalFields));
        }
        return query;
    }

    /**
     * Creates the CONSTRUCT part of the query including the
     * <code>entityhub-query:ieldQueryResultSet entityhub-query:fieldQueryResult ?representation </code>
     * triples that are used to build the iterator over the results
     *
     * @param queryString
     *            The query to add the construct fields
     * @param selectedFields
     *            the field name 2 variable name mapping used by the query. This mapping MUST also contain the
     *            <code>null</code> key that is mapped to the variable name used for the representations to be
     *            selected
     */
    private static void createConstruct(final StringBuilder queryString,
                                        final Map<String,String> selectedFields) {
        queryString.append("CONSTRUCT { \n");
        String rootVar = selectedFields.get(null);// the null element has the
                                                  // root variable mapping
        for (Entry<String,String> mapping : selectedFields.entrySet()) {
            if (mapping.getKey() != null) {
                queryString.append("  ?").append(rootVar).append(" <");
                queryString.append(mapping.getKey()).append("> ?");
                queryString.append(mapping.getValue()).append(" .\n");
            }
        }
        // add the triples for the Representation type
        // add the triples that form the result set
        queryString.append("  <").append(RdfResourceEnum.QueryResultSet).append("> <");
        queryString.append(RdfResourceEnum.queryResult).append("> ?");
        queryString.append(rootVar).append(" . \n");

        queryString.append("} ");
    }

    /**
     * Creates the SPARQL representation of the parse field query.
     *
     * @param query
     *            A field query implementation that additionally supports a field to variable mapping
     * @param endpointType
     *            The type of the Endpoint (used to write optimized queries for endpoint type specific
     *            extensions
     * @return the SPARQL query as String
     */
    public static String createSparqlSelectQuery(SparqlFieldQuery query, EndpointTypeEnum endpointType) {
        return createSparqlSelectQuery(query, true, -1, endpointType);
    }

    /**
     * Creates the SPARQL representation of the parse field query.
     *
     * @param query
     *            A field query implementation that additionally supports a field to variable mapping
     * @param limit
     *            If > 0, than the limit parsed by the query is overriden by this value
     * @param endpointType
     *            The type of the Endpoint (used to write optimized queries for endpoint type specific
     *            extensions
     * @return the SPARQL query as String
     */
    public static String createSparqlSelectQuery(SparqlFieldQuery query,
                                                 int limit,
                                                 EndpointTypeEnum endpointType) {
        return createSparqlSelectQuery(query, true, limit, endpointType);
    }

    /**
     * Creates the SPARQL representation of the parse field query.
     *
     * @param query
     *            A field query implementation that additionally supports a field to variable mapping
     * @param includeFields
     *            if <code>false</code> only the root is selected (selected fields are ignored)
     * @param endpointType
     *            The type of the Endpoint (used to write optimized queries for endpoint type specific
     *            extensions
     * @return the SPARQL query as String
     */
    public static String createSparqlSelectQuery(SparqlFieldQuery query,
                                                 boolean includeFields,
                                                 EndpointTypeEnum endpointType) {
        return createSparqlSelectQuery(query, includeFields, -1, endpointType);
    }

    /**
     * Creates the SPARQL representation of the parse field query.
     *
     * @param query
     *            A field query implementation that additionally supports a field to variable mapping
     * @param includeFields
     *            if <code>false</code> only the root is selected (selected fields are ignored)
     * @param limit
     *            if > 0 than the limit defined by the query is overridden by the parsed value
     * @param endpointType
     *            The type of the Endpoint (used to write optimized queries for endpoint type specific
     *            extensions
     * @return the SPARQL query as String
     */
    public static String createSparqlSelectQuery(SparqlFieldQuery query,
                                                 boolean includeFields,
                                                 int limit,
                                                 EndpointTypeEnum endpointType) {
        // 1) INIT
        final StringBuilder queryString = new StringBuilder();
        query = initLocalQuery(query, limit);
        final Map<String,String> selectedFields = initSelectedFieldsMap(query);
        // 2) SELECT
        createSelect(queryString, includeFields, selectedFields);
        // 3) WHERE
        queryString.append("WHERE { \n");
        addFieldConstraint(queryString, query, selectedFields, endpointType);

        log.trace("After adding field constrains the query string is [{}].", queryString);

        queryString.append("} \n");
        if (!isSubSelectState(endpointType, selectedFields)) {
            // 4) Add Stuff to rank results based on the "page rank" of entities
            addRankingOrder(endpointType, queryString, selectedFields.get(null), "");
            // 5) Limit and Offset
            addLimit(query.getLimit() != null ? query.getLimit() : 0, queryString);
            addOffset(query, queryString);
        }
        return queryString.toString();
    }

    /**
     * Initialise the field -&gt; variable name mappings including the root variable name by adding
     * <code>null</code> as key
     *
     * @param query
     *            the query
     * @return the mappings
     */
    private static Map<String,String> initSelectedFieldsMap(SparqlFieldQuery query) {
        // We need a copy to delete all fields that are already covered by some
        // added
        // graph pattern.
        final Map<String,String> selectedFields = new HashMap<String,String>();
        selectedFields.putAll(query.getFieldVariableMappings());
        // also add the root variable
        selectedFields.put(null, query.getRootVariableName());
        return selectedFields;
    }

    /**
     * For some {@link EndpointTypeEnum SPARQL endpoint types} we need to add an additional constraint to
     * determine the ranking information based on incoming relations to the Entities.
     * <p>
     * Currently this done for {@link EndpointTypeEnum#ARQ ARQ} and {@link EndpointTypeEnum#LARQ LARQ}.
     *
     * @param endpointType
     *            the endpoint type
     * @param queryString
     *            the SPARQL query string to add the ranking constraint
     * @param rootFieldName
     *            the variable name used to select entities
     */
    private static void addRankingConstraints(EndpointTypeEnum endpointType,
                                              final StringBuilder queryString,
                                              final String rootFieldName) {
        if (endpointType == EndpointTypeEnum.ARQ || endpointType == EndpointTypeEnum.LARQ) {
            queryString.append(String.format("  { \n    ?incoming ?p ?%s . \n  } \n", rootFieldName));
        } // else ... for Virtuoso we need not count incoming links, because it
          // has a
          // page rank like feature we can use to rank entities!
          // all others do not support sorting
    }

    /**
     * @param endpointType
     * @param queryString
     */
    private static void addRankingOrder(EndpointTypeEnum endpointType,
                                        final StringBuilder queryString,
                                        String rootVarName,
                                        String intend) {
        if (endpointType == EndpointTypeEnum.Virtuoso) {
            // is that still SPARQL ... wondering about the syntax ^
            queryString.append(String.format("%sORDER BY DESC ( <LONG::IRI_RANK> (?%s) ) \n",
                intend != null ? intend : "", rootVarName));
        } else if (endpointType == EndpointTypeEnum.ARQ || endpointType == EndpointTypeEnum.LARQ) {
            // TODO: COUNT is not part of the SPARQL 1.0 specification!
            // see http://www.w3.org/2009/sparql/wiki/Feature:AggregateFunctions
            queryString.append(String.format("%sORDER BY DESC (COUNT (?incoming) ) \n",
                intend != null ? intend : ""));
        } // else not supported ... add nothing
    }

    /**
     * @param query
     * @param queryString
     */
    private static void addOffset(SparqlFieldQuery query, final StringBuilder queryString) {
        if (query.getOffset() > 0) {
            queryString.append(String.format("OFFSET %d \n", query.getOffset()));
        }
    }

    /**
     * @param query
     * @param queryString
     */
    private static void addLimit(Integer limit, final StringBuilder queryString) {
        if (limit != null && limit > 0) {
            queryString.append(String.format("LIMIT %d \n", limit));
        }
    }

    /**
     * Adds the SELECT part to the SPARQL query
     *
     * @param queryString
     * @param query
     * @param includeFields
     * @param selectedFields
     */
    private static void createSelect(final StringBuilder queryString,
                                     boolean includeFields,
                                     final Map<String,String> selectedFields) {
        queryString.append("SELECT DISTINCT");
        // REMOVED: The root variable is already in the selected fields map!
        // queryString.append(" ?"+query.getRootVariableName()); //select the
        // representation ID

        // now the variables for the selected fields!
        if (includeFields) {
            for (String varName : selectedFields.values()) {
                queryString.append(" ?");
                queryString.append(varName);
            }
        } else {
            // else add only the root variable (stored under key null)
            queryString.append(" ?");
            queryString.append(selectedFields.get(null));
        }
        queryString.append(" \n");
    }

    /**
     * Adds the WHERE clause of the SPARQL query.
     * <p>
     * If the {@link EndpointTypeEnum SPARQL endpoint} supports SPARQL 1.1 subqueries, than this adds also the
     * LIMIT and OFFSET to in inner SELECT that only selects the id.
     *
     * @param queryString
     *            the SPARQL query string to add the WHERE
     * @param query
     *            the query
     * @param selectedFields
     *            the selected fields
     * @param endpointType
     *            The type of the endpoint (used to write optimised queries for endpoint type specific
     *            extensions
     */
    private static void addFieldConstraint(final StringBuilder queryString,
                                           SparqlFieldQuery query,
                                           Map<String,String> selectedFields,
                                           EndpointTypeEnum endpointType) {
        // we need temporary variables with unique names
        String varPrefix = "tmp";
        int[] varNum = new int[] {1};
        // used to open brackets for the select part of the constraints
        boolean first = true;
        // determine if sub-selects are supported and if we need a sub-select
        // (more than the id is selected)
        boolean subSelectState = isSubSelectState(endpointType, selectedFields);

        log.trace("add field constraints is in a sub-select-state [{}].", (subSelectState ? "yes" : "no"));
        // if we uses a sub query to select the ids, we need to add the graph
        // pattern
        // of all selected fields outside of the sub query
        Map<String,String> tmpSelectedFields = subSelectState ? new HashMap<String,String>(selectedFields)
                : null;
        String intend;
        if (subSelectState) {
            intend = "      "; // additional intend because of sub query (3*2)
        } else {
            intend = "    "; // normal intend (2*2)
        }
        Iterator<Entry<String,Constraint>> constraintIterator = query.iterator();
        while (constraintIterator.hasNext()) {
            Entry<String,Constraint> fieldConstraint = constraintIterator.next();
            // for (Entry<String, Constraint> fieldConstraint : query) {

            if (first) {
                queryString.append("  { \n");
                if (subSelectState) {
                    String rootVarName = selectedFields.get(null);
                    queryString.append("    SELECT ?").append(rootVarName).append(" \n");
                    queryString.append("    WHERE { \n");
                }
                first = false;
            }
            String field = fieldConstraint.getKey();
            Constraint constraint = fieldConstraint.getValue();

            log.trace("adding a constraint [type :: {}][field :: {}][prefix :: {}][intent :: {}].",
                new Object[]{constraint.getType(), field, varPrefix, intend});

            switch (constraint.getType()) {
                case value:
                    addValueConstraint(queryString, field, (ValueConstraint) constraint, selectedFields,
                        varPrefix, varNum, intend);
                    break;
                case text:
                    String var = addFieldGraphPattern(queryString, field, selectedFields, varPrefix, varNum,
                        intend);
                    addTextConstraint(queryString, var, (TextConstraint) constraint, endpointType, intend);
                    break;
                case range:
                    var = addFieldGraphPattern(queryString, field, selectedFields, varPrefix, varNum, intend);
                    addRangeConstriant(queryString, var, (RangeConstraint) constraint, intend);
                    break;
                default:
                    log.warn("Please update this Implementation to support the Constraint Type "
                             + fieldConstraint.getValue().getType());
                    break;
            }

            queryString.append(" . \n");
        }
        // for some endpoints we need to add an additional constraints used for
        // ranking. If sub-queries are used this need to be in the select part
        // of the query (to rank results of the inner query)
        // otherwise it is better to have it in outside if the select part to
        // only
        // rank the graph selected by the query
        if (subSelectState) {
            addRankingConstraints(endpointType, queryString, selectedFields.get(null));
        }
        if (!first) {
            if (subSelectState) {
                queryString.append("    } \n");
                // re-add all selected fields to be added as selects because in
                // the sub-query we only select the ID!
                selectedFields = tmpSelectedFields;
                // ranking needs also to be added to the sub-query (to correctly
                // process LIMIT and OFFSET
                addRankingOrder(endpointType, queryString, selectedFields.get(null), "    ");
                // add LIMIT and OFFSET to the sub-query!
                // TODO: add link to the email
                queryString.append("    ");
                addLimit(query.getLimit(), queryString);
                queryString.append("    ");
                addOffset(query, queryString);
                queryString.append("    ");
            }
            queryString.append("  } \n");
        }
        // All the followig Graphpattern are only processed for the parts
        // selected
        // by the above constraints
        // if no subqueries are used we need now to add the ranking constraints
        if (!subSelectState) {
            addRankingConstraints(endpointType, queryString, selectedFields.get(null));
        }
        // we need to add graph pattern for selected field that are not covered
        // by
        // graph pattern written for the constraint.
        // Implementation Note: selectedFields contains the null key for the
        // root variable
        while (selectedFields.size() > 1) { // if this is the only left element
                                            // we are done
            Iterator<String> it = selectedFields.keySet().iterator();
            String actField; // we need to get a non null value from the map
            do {
                // the outer while ensures an non null value so we need not to
                // use hasNext
                actField = it.next();
            } while (actField == null);
            queryString.append("  OPTIONAL { ");
            // NOTE the following Method removes the written mapping from the
            // Map
            addFieldGraphPattern(queryString, actField, selectedFields, varPrefix, varNum, "");
            queryString.append(". } \n");
        }
    }

    private static void addValueConstraint(StringBuilder queryString,
                                           String field,
                                           ValueConstraint constraint,
                                           Map<String,String> selectedFields,
                                           String varPrefix,
                                           int[] varNum,
                                           String intend) {
        String rootVarName = selectedFields.get(null);
        Collection<String> dataTypes = constraint.getDataTypes();
        if (dataTypes == null) {
            // we need not to distinguish between empty and null
            // because SPARQL includes support for automatic detection of
            // dataTypes
            // see http://www.w3.org/TR/rdf-sparql-query/#QSynLiterals
            dataTypes = Collections.emptySet();
        }
        if (constraint.getValues() != null) {
            if (dataTypes.size() <= 1) {
                addDataTypeValueConstraint(queryString, rootVarName, field, dataTypes.isEmpty() ? null
                        : dataTypes.iterator().next(), constraint.getValues(), constraint.getMode(),
                    varPrefix, varNum, intend);
            } else { // we have multiple dataTypes -> need to use union!
                boolean first = true;
                for (Iterator<String> it = dataTypes.iterator(); it.hasNext();) {
                    String dataType = it.next();
                    if (first) {
                        queryString.append('{');
                        first = false;
                    } else {
                        queryString.append("} UNION {\n");
                    }
                    addDataTypeValueConstraint(queryString, rootVarName, field, dataType,
                        constraint.getValues(), constraint.getMode(), varPrefix, varNum, intend);
                }
                queryString.append('}');
            }
        } else { // no constraint for the value
            // filter all instances that define any value for the given
            // dataTypes
            // see http://www.w3.org/TR/rdf-sparql-query/#func-datatype
            // first we need to select the Variable to filter
            String var = addFieldGraphPattern(queryString, field, selectedFields, varPrefix, varNum, intend);
            queryString.append(". \n").append(intend);
            // now we need to write the filter
            if (dataTypes.size() == 1) {
                addDataTypeFilter(queryString, var, dataTypes.iterator().next());
            } else {
                boolean first = true;
                for (Iterator<String> it = dataTypes.iterator(); it.hasNext();) {
                    String dataType = it.next();
                    if (first) {
                        queryString.append("( \n  ").append(intend);
                        first = false;
                    } else {
                        queryString.append(" || \n  ").append(intend);
                    }
                    addDataTypeFilter(queryString, var, dataType);
                }
                queryString.append(" \n").append(intend).append(")");
            }
        }
    }

    /**
     * Adds a filter that restricts the data type to an variable
     *
     * @param queryString
     *            the query String to add the filter. MUST NOT be <code>null</code>
     * @param var
     *            the variable to add the filter. MUST NOT be <code>null</code>
     * @param dataTypes
     *            the data type uri for the filter. MUST NOT be <code>null</code>
     */
    private static void addDataTypeFilter(StringBuilder queryString, String var, String dataType) {
        queryString.append(String.format("FILTER(datatype(?%s) = <%s>)", var, dataType));
    }

    /**
     * Adds a value constraint for a field including the dataType
     *
     * @param queryString
     *            the query string to add the constraint. MUST NOT be <code>null</code>
     * @param rootVarName
     *            the variable name of the subject. MUST NOT be <code>null</code>
     * @param field
     *            the property name of the field. MUST NOT be <code>null</code>
     * @param dataType
     *            the dataType constraint or <code>null</code> if none
     * @param value
     *            the value. MUST NOT be <code>null</code>.
     */
    private static void addDataTypeValueConstraint(StringBuilder queryString,
                                                   String rootVarName,
                                                   String field,
                                                   String dataType,
                                                   Collection<Object> values,
                                                   MODE mode,
                                                   String varPrefix,
                                                   int[] varNum,
                                                   String intend) {
        String addIntend = intend;
        queryString.append(intend);
        if (values.size() > 1) {
            queryString.append("{ ");
            addIntend = intend + "  ";
        }
        boolean first = true;
        for (Object value : values) {
            if (first) {
                // only add bracket if multiple values are parsed (STANBOL-697)
                if (mode == MODE.any && values.size() > 1) {
                    queryString.append('{');
                }
                first = false;
            } else {
                if (mode == MODE.any) {
                    queryString.append("} UNION {\n");
                } else {
                    queryString.append(" .\n");
                }
                queryString.append(addIntend);
            }
            String fieldVar;
            if (isSpecialField(field)) {
                // in case of a special field replace the field URI with an
                // variable to allow searching all outgoing properties
                fieldVar = varPrefix + varNum[0];
                varNum[0]++;
            } else {
                fieldVar = null;
            }
            if (DataTypeEnum.Reference.getUri().equals(dataType) || value instanceof Reference) {
                if (fieldVar != null) {
                    queryString.append(String.format("?%s ?%s <%s>", rootVarName, fieldVar, value));
                } else {
                    queryString.append(String.format("?%s <%s> <%s>", rootVarName, field, value));
                }
            } else {
                if (fieldVar != null) {
                    queryString.append(String.format("?%s ?%s \"%s\"%s", rootVarName, fieldVar, value,
                        dataType != null ? String.format("^^<%s>", dataType) : ""));
                } else {
                    queryString.append(String.format("?%s <%s> \"%s\"%s", rootVarName, field, value,
                        dataType != null ? String.format("^^<%s>", dataType) : ""));
                }
            }
        }
        if (values.size() > 1) {
            if (mode == MODE.any) { // close the union
                queryString.append('}');
            }
            queryString.append(" }");
        }
    }

    /**
     * Adds an text constraint to the SPARQL query string
     *
     * @param queryString
     *            the query string to add the constraint
     * @param var
     *            the variable name to constraint
     * @param constraint
     *            the constraint
     * @param endpointType
     *            The type of the Endpoint (used to write optimized queries for endpoint type specific
     *            extensions
     */
    private static void addTextConstraint(StringBuilder queryString,
                                          String var,
                                          TextConstraint constraint,
                                          EndpointTypeEnum endpointType,
                                          String intend) {
        boolean filterAdded = false;
        boolean isTextValueConstraint = constraint.getTexts() != null && !constraint.getTexts().isEmpty();

        log.trace("Constraint is text-value constrain [{}][var :: {}][intent :: {}].",
            new Object[]{(isTextValueConstraint ? "yes" : "no"), var, intend});

        if (isTextValueConstraint) {

            if (constraint.getPatternType() == PatternType.regex) {
                queryString.append(" \n").append(intend).append("  FILTER(");
                filterAdded = true;
                addRegexFilter(queryString, var, constraint.getTexts(), constraint.isCaseSensitive());
            } else {
                // TODO: This optimised versions for Virtuoso and LARQ might not
                // respect case sensitive queries. Need more testing!
                if (EndpointTypeEnum.Virtuoso == endpointType) {
                    queryString.append(". \n  ").append(intend);
                    queryString.append(String.format("?%s bif:contains '%s'", var,
                        createFullTextQueryString(constraint.getTexts())));
                } else if (EndpointTypeEnum.LARQ == endpointType) {
                    queryString.append(". \n  ").append(intend);
                    queryString.append(String.format(
                        "?%s <http://jena.hpl.hp.com/ARQ/property#textMatch> '%s'", var,
                        createFullTextQueryString(constraint.getTexts())));
                } else {
                    queryString.append(" \n").append(intend).append("  FILTER(");
                    filterAdded = true;
                    if (constraint.getPatternType() == PatternType.none) {
                        if (constraint.isCaseSensitive()) {
                            boolean first = true;
                            for (String textConstraint : constraint.getTexts()) {
                                if (first) {
                                    first = false;
                                } else {
                                    queryString.append(" || ");
                                }
                                if (textConstraint != null && !textConstraint.isEmpty()) {
                                    queryString.append(String.format("(str(?%s) = \"%s\")", var,
                                        textConstraint));
                                }
                            }
                        } else {
                            Collection<String> regexQueryTexts = new ArrayList<String>(constraint.getTexts()
                                    .size());
                            for (String textConstraint : constraint.getTexts()) {
                                if (textConstraint != null && !textConstraint.isEmpty()) {
                                    regexQueryTexts.add(PatternUtils.value2Regex(textConstraint));
                                }
                            }
                            addRegexFilter(queryString, var, regexQueryTexts, constraint.isCaseSensitive());
                        }
                    } else if (constraint.getPatternType() == PatternType.wildcard) {
                        // parse false, because that is more in line with the
                        // expectations of users!
                        Collection<String> regexQueryTexts = new ArrayList<String>(constraint.getTexts()
                                .size());
                        for (String textConstraint : constraint.getTexts()) {
                            if (textConstraint != null && !textConstraint.isEmpty()) {
                                regexQueryTexts.add(PatternUtils.wildcardToRegex(textConstraint, false));
                            }
                        }
                        addRegexFilter(queryString, var, regexQueryTexts, constraint.isCaseSensitive());
                    } else {
                        log.warn("Unspported Patterntype "
                                 + constraint.getPatternType()
                                 + "! Change this impplementation to support this type! -> treat constaint \""
                                 + constraint.getTexts() + "\"as REGEX");
                        addRegexFilter(queryString, var, constraint.getTexts(), constraint.isCaseSensitive());
                    }
                }
            }
        } // else nothing to do
          // add language Filters
          // TODO check if FILTER ( is already written!
        if (constraint.getLanguages() != null && !constraint.getLanguages().isEmpty()) {

            log.trace("Constraint has languages [filter-added :: {}].",
                (filterAdded ? "yes" : "no"));

            if (!filterAdded) {
                queryString.append(" . \n").append(intend).append("  FILTER(");
                filterAdded = true;
                writeLanguagesFilter(queryString, constraint.getLanguages(), var, null);
            } else {
                writeLanguagesFilter(queryString, constraint.getLanguages(), var, " && ");
            }
        }
        if (filterAdded) {
            queryString.append(")"); // close the FILTER and the graph pattern
        }
    }

    /**
     * (Creates AND Text) OR (Query AND String) like queries based on the parsed TextConstraint as used by
     * {@link EndpointTypeEnum#LARQ LARQ} and {@link EndpointTypeEnum#Virtuoso VIRTUOSO} SPARQL endpoints to
     * speed up full text queries.
     *
     * @param constraints
     *            the as returned by {@link TextConstraint#getTexts()}
     * @return the full text query string
     */
    protected static String createFullTextQueryString(Collection<String> constraints) {
        StringBuilder textQuery = new StringBuilder();
        boolean firstText = true;
        for (String constraintText : constraints) {
            if (constraintText != null && !constraintText.isEmpty()) {
                if (firstText) {
                    firstText = false;
                } else {
                    textQuery.append(" OR ");
                }
                // TODO: maybe we should use a word tokenizer here
                String[] words = constraintText.split("\\s");
                if (words.length > 1) {
                    // not perfect because words might contain empty string, but
                    // it will eliminate most unnecessary brackets .
                    textQuery.append('(');
                }
                boolean firstAndWord = true;
                for (String word : words) {
                    word = word.trim();
                    boolean hasAlphaNumeric = false;
                    for (int i = 0; i < word.length() && !hasAlphaNumeric; i++) {
                        char ch = word.charAt(i);
                        if (Character.isLetter(ch) || Character.isDigit(ch)) {
                            hasAlphaNumeric = true;
                        }
                    }
                    if (hasAlphaNumeric) {
                        if (firstAndWord) {
                            firstAndWord = false;
                        } else {
                            textQuery.append(" AND ");
                        }
                        // we need to double the backslashes because of
                        // replaceAll takes a regular expression
                        // as input.
                        String escapedWord = word.replaceAll("\\\"", "\\\\\"");
                        textQuery.append('"').append(escapedWord).append('"');
                    }
                }
                if (words.length > 1) {
                    textQuery.append(')');
                }
            } // end if not null and not empty
        }
        return textQuery.toString();
    }

    /**
     * Adds a SPARQL regex filter to the parsed query string
     *
     * @param queryString
     *            the string builder to add the constraint
     * @param var
     *            the variable to constrain
     * @param regexContraints
     *            the regex encoded search strings (connected with '||' (OR))
     * @param isCasesensitive
     *            if the constraint is case sensitive or not
     */
    private static void addRegexFilter(StringBuilder queryString,
                                       String var,
                                       Collection<String> regexContraints,
                                       boolean isCasesensitive) {
        boolean first = true;
        for (String regex : regexContraints) {
            if (regex != null && !regex.isEmpty()) {
                if (first) {
                    first = false;
                } else {
                    queryString.append(" || ");
                }
                queryString.append(String.format("regex(str(?%s),\"%s\"%s)", var, regex, isCasesensitive ? ""
                        : ",\"i\""));
            }
        }
    }

    /**
     * Adds an RangeConstraint to the parsed query String
     *
     * @param queryString
     *            the query to add the constraint
     * @param var
     *            the variable to constrain
     * @param constraint
     *            the constraint
     */
    private static void addRangeConstriant(StringBuilder queryString,
                                           String var,
                                           RangeConstraint constraint,
                                           String intend) {
        queryString.append("\n").append(intend).append("FILTER "); // start the
                                                                   // FILTER
        boolean closedRange = constraint.getLowerBound() != null && constraint.getUpperBound() != null;
        if (closedRange) {
            queryString.append("(");
        }
        // write lower and upper bounds
        if (constraint.getLowerBound() != null) {
            addRangeBound(queryString, var, true, constraint.isInclusive(), constraint.getLowerBound());
        }
        if (closedRange) {
            queryString.append(" && ");
        }
        if (constraint.getUpperBound() != null) {
            addRangeBound(queryString, var, false, constraint.isInclusive(), constraint.getUpperBound());
        }
        if (closedRange) {
            queryString.append(")");
        }
    }

    /**
     * Adds a lower/upper bound constraint to the query String
     *
     * @param queryString
     *            the query string
     * @param var
     *            the variable
     * @param lowerBound
     *            <code>true</code> to add the lower bound and <code>false</code> to add the upper bound
     * @param inclusive
     *            if the bound is inclusive (>= or <=)
     * @param value
     *            the value representing the bound.
     */
    private static void addRangeBound(StringBuilder queryString,
                                      String var,
                                      boolean lowerBound,
                                      boolean inclusive,
                                      Object value) {
        String stringValue;
        if (value instanceof Date) {// for dates add the data type!
            stringValue = String.format("%s^^<%s>", DATE_FORMAT.format((Date) value), XSD_DATE_TIME);
        } else { // add additional "if" for special types if necessary
            stringValue = value.toString();
        }
        // adds (?var >/<[=] valueString)
        queryString.append(String.format("(?%s %c%s %s)", var, lowerBound ? '>' : '<', inclusive ? "=" : "",
            stringValue));
    }

    /**
     * Adds a S P O pattern to the query by using the root as subject, the parsed field as predicate and the
     * returned variable as object. This method doese not open a '{' nor close the pattern with any of '.',
     * ',' or ';'
     *
     * @param queryString
     *            the {@link StringBuilder} to add the pattern
     * @param field
     *            the field
     * @param selectedFields
     *            the map field -> var of the selected variables. If the parsed field is selected, the field
     *            is removed from the list and the mapped variable name is returned
     * @param varPrefix
     *            the default prefix for newly created variable names
     * @param varNum
     *            The first element of the array is used to get the number of the created variable. If one is
     *            created the value of the first element is increased by one
     * @return The variable name used for the object of the pattern
     */
    private static String addFieldGraphPattern(StringBuilder queryString,
                                               String field,
                                               Map<String,String> selectedFields,
                                               String varPrefix,
                                               int[] varNum,
                                               String intend) {
        String var = selectedFields.remove(field); // check if the field is
                                                   // selected
        if (var == null) { // this field is not selected
            // we need to generate a temp var
            var = varPrefix + varNum[0];
            varNum[0]++;
        }
        if (isSpecialField(field)) {
            // in case of a special field replace the field URI with an
            // variable to allow searching all outgoing properties
            String fieldVar = varPrefix + varNum[0];
            varNum[0]++;
            queryString.append(String.format("%s?%s ?%s ?%s ", intend, selectedFields.get(null), fieldVar,
                var));
        } else {
            queryString
                    .append(String.format("%s?%s <%s> ?%s ", intend, selectedFields.get(null), field, var));
        }
        return var;
    }

    /**
     * Writes the SPARQL FILTER for the parsed languages. This Method writes <code><pre>
     *      prefix ((lang(?var) = "lang1") [|| (lang(?var) = "lang2..n")])
     * </pre></code>
     *
     * @param queryString
     *            the query string to add the FILTER
     * @param languages
     *            the languages to filter for (may contain <code>null</code> as element)
     * @param var
     *            the name of the variable to filter.
     * @param prefix
     *            The prefix is written in front of the filter expression (if any is created). Typically this
     *            will be
     *            <ul>
     *            <li>FILTER if this is the only filter for an variable
     *            <li>&& if this filter is combined with AND to an other filter or
     *            <li>|| if this filter is combined wit OR to an other filter
     *            </ul>
     */
    private static void writeLanguagesFilter(StringBuilder queryString,
                                             Collection<String> languages,
                                             String var,
                                             String prefix) {

        if (null == languages || languages.isEmpty()) return;

        log.trace("Writing languages filter [var :: {}][prefix :: {}][languages :: {}].",
            new Object[]{var, prefix, languages.size()});

        if (prefix != null) {
            queryString.append(prefix);
        }
        if (languages.size() > 1) {
            queryString.append("(");
        }
        boolean first = true;
        for (String language : languages) {
            if (first) {
                first = false;
            } else {
                queryString.append(" || ");
            }
            queryString.append(String.format("(lang(?%s) = \"%s\")", var, language != null ? language : ""));
            /*
             * NOTE: the lang() returns "" for literals without an language tag. Because of that if the
             * language == null we need to parse "" as an argument
             */
        }
        if (languages.size() > 1) {
            queryString.append(")");
        }
    }

    public static void main(String[] args) {
        SparqlFieldQuery query = SparqlFieldQueryFactory.getInstance().createFieldQuery();
        // query.setConstraint("urn:field1", new
        // ReferenceConstraint("urn:testReference"));
        // query.setConstraint("urn:field1", new ReferenceConstraint(
        // Arrays.asList("urn:testReference","urn:testReference1","urn:testReference3"),MODE.any));
        // query.setConstraint(SpecialFieldEnum.references.getUri(), new
        // ReferenceConstraint(
        // Arrays.asList("urn:testReference","urn:testReference1","urn:testReference3")));
        // query.setConstraint("urn:field1a", new ValueConstraint(null,
        // Arrays.asList(
        // DataTypeEnum.Float.getUri())));
        // query.addSelectedField("urn:field1a");

        // query.setConstraint("urn:field1b", new ValueConstraint(9, Arrays.asList(
        // DataTypeEnum.Float.getUri())));
        // query.setConstraint("urn:field1b", new ValueConstraint(Arrays.asList(9,10,11), Arrays.asList(
        // DataTypeEnum.Float.getUri()),MODE.any));
        // query.setConstraint("urn:field1c", new ValueConstraint(null, Arrays.asList(
        // DataTypeEnum.Float.getUri(),DataTypeEnum.Double.getUri(),DataTypeEnum.Decimal.getUri())));
        // query.addSelectedField("urn:field1c");
        // query.setConstraint("urn:field1d", new ValueConstraint(9, Arrays.asList(
        // DataTypeEnum.Float.getUri(),DataTypeEnum.Double.getUri(),DataTypeEnum.Decimal.getUri())));
        // query.setConstraint("urn:field1d", new ValueConstraint(Arrays.asList(9,10,11), Arrays.asList(
        // DataTypeEnum.Float.getUri(),DataTypeEnum.Double.getUri(),DataTypeEnum.Decimal.getUri())));
        // query.setConstraint("urn:field2", new TextConstraint("test value"));
        // query.setConstraint("urn:field3", new TextConstraint(Arrays.asList(
        // "text value","anothertest","some more values"),true));
        // query.setConstraint(SpecialFieldEnum.fullText.getUri(), new TextConstraint(Arrays.asList(
        // "text value","anothertest","some more values"),true));
        // query.setConstraint("urn:field2a", new TextConstraint(":-]"));
        // //tests escaping of REGEX
        // query.setConstraint("urn:field3", new TextConstraint("language text","en"));
        query.setConstraint("urn:field4", new TextConstraint("multi language text", "en", "de", null));
        // query.setConstraint("urn:field5", new
        // TextConstraint("wildcar*",PatternType.wildcard,false,"en","de"));
        // query.addSelectedField("urn:field5");
        // query.setConstraint("urn:field6", new TextConstraint("^regex",PatternType.REGEX,true));
        // query.setConstraint("urn:field7", new
        // TextConstraint("par*",PatternType.WildCard,false,"en","de",null));
        // query.setConstraint("urn:field8", new TextConstraint(null,"en","de",null));
        // query.setConstraint("urn:field9", new RangeConstraint((int)5, (int)10, true));
        // query.setConstraint("urn:field10", new RangeConstraint((int)5, (int)10, false));
        // query.setConstraint("urn:field11", new RangeConstraint(null, (int)10, true));
        // query.setConstraint("urn:field12", new RangeConstraint((int)5, null, true));
        // query.setConstraint("urn:field12", new RangeConstraint(new Date(), null, true));
        // query.addSelectedField("urn:field2a");
        // query.addSelectedField("urn:field3");
        query.setLimit(5);
        query.setOffset(5);
        System.out.println(createSparqlSelectQuery(query, true, 0, EndpointTypeEnum.LARQ));
        System.out.println();
        System.out.println(createSparqlSelectQuery(query, true, 0, EndpointTypeEnum.Virtuoso));
        System.out.println();
        System.out.println(createSparqlSelectQuery(query, true, 0, EndpointTypeEnum.Standard));
        System.out.println();
        System.out.println(createSparqlConstructQuery(query, 0, EndpointTypeEnum.Virtuoso));
    }

    /**
     * @param query
     * @param resultGraph
     * @return
     */
    public static Iterator<RdfRepresentation> parseQueryResultsFromMGraph(final TripleCollection resultGraph) {
        Iterator<Triple> resultTripleIterator = resultGraph.filter(FIELD_QUERY_RESULT_SET,
            FIELD_QUERY_RESULT, null);
        Iterator<RdfRepresentation> resultIterator = new AdaptingIterator<Triple,RdfRepresentation>(
                resultTripleIterator, new AdaptingIterator.Adapter<Triple,RdfRepresentation>() {
                    /*
                     * Anonymous implementation of an Adapter that converts the filtered Triples of the
                     * resulting graph to RdfRepresentations
                     */
                    @Override
                    public RdfRepresentation adapt(Triple value, Class<RdfRepresentation> type) {
                        Resource object = value.getObject();
                        if (object == null) {
                            return null;
                        } else if (object instanceof UriRef) {
                            return valueFavtory.createRdfRepresentation((UriRef) object, resultGraph);
                        } else {
                            log.warn("Unable to create representation for FieldQueryResult " + object
                                     + " because this Resource is not of Type UriRef (type: "
                                     + object.getClass() + ") -> result gets ignored");
                            return null;
                        }
                    }
                }, RdfRepresentation.class);
        return resultIterator;
    }

}
TOP

Related Classes of org.apache.stanbol.entityhub.query.clerezza.SparqlQueryUtils

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.