Source Code of org.apache.stanbol.entityhub.core.mapping.DefaultFieldMapperImpl

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.stanbol.entityhub.core.mapping;


import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.EnumSet;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.Map.Entry;
import java.util.regex.Pattern;


import org.apache.stanbol.entityhub.servicesapi.defaults.DataTypeEnum;
import org.apache.stanbol.entityhub.servicesapi.mapping.FieldMapper;
import org.apache.stanbol.entityhub.servicesapi.mapping.FieldMapping;
import org.apache.stanbol.entityhub.servicesapi.model.Representation;
import org.apache.stanbol.entityhub.servicesapi.model.Text;
import org.apache.stanbol.entityhub.servicesapi.model.ValueFactory;
import org.apache.stanbol.entityhub.servicesapi.query.TextConstraint;
import org.apache.stanbol.entityhub.servicesapi.query.ValueConstraint;
import org.apache.stanbol.entityhub.servicesapi.query.Constraint.ConstraintType;
import org.apache.stanbol.entityhub.servicesapi.util.PatternUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;


/**
 * This is only an intermediate solution just to have the functionality.
 * This needs to be refactored! This is something similar to a semantic lifting
 * work flow that could used schema translation, reasoning ... <p>
 * The goal is to build a simple Module that supports basics things (like the
 * stuff provided by this implementation) and that allow other implementations
 * to do the advanced stuff.<p>
 * Currently I hope, that when the functionality is in place it is easier to
 * see what a good design for this part of the Entityhub would be.
 * TODO: refactoring (see above comment)
 * @author Rupert Westenthaler
 *
 */
public class DefaultFieldMapperImpl implements FieldMapper, Cloneable {
    private final Logger log = LoggerFactory.getLogger(DefaultFieldMapperImpl.class);
    private final Set<FieldMapping> mappings;
//    private final Map<String,Collection<FieldMapping>> ignoreFieldMap;
//    private final Map<Pattern,Collection<FieldMapping>> ignoreWildcardMap;
    private final Map<String,Set<FieldMapping>> fieldMap;
    private final Map<Pattern,Set<FieldMapping>> wildcardMap;
    private Collection<FieldMapping> unmodMappings;
    private ValueConverterFactory valueConverter;
    //private Map<String,FieldMapping> mappings = Collections.synchronizedMap(new HashMap<String, FieldMapping>());
    public DefaultFieldMapperImpl(ValueConverterFactory valueConverter) {
        super();
        mappings = new HashSet<FieldMapping>();
        unmodMappings = Collections.unmodifiableCollection(mappings);
        fieldMap = new HashMap<String, Set<FieldMapping>>();
        wildcardMap = new HashMap<Pattern, Set<FieldMapping>>();
        if(valueConverter == null){
            throw new IllegalArgumentException("The parsed ValueConverterFactory MUST NOT be NULL");
        }
        this.valueConverter = valueConverter;
//        ignoreFieldMap = new HashMap<String, Collection<FieldMapping>>();
//        ignoreWildcardMap = new HashMap<Pattern, Collection<FieldMapping>>();
    }
    /**
     * Internally used by clone
     * @param fieldMap
     * @param wildcardMap
     */
    private DefaultFieldMapperImpl(ValueConverterFactory valueConverter,Set<FieldMapping> mappings,Map<String,Set<FieldMapping>> fieldMap, Map<Pattern,Set<FieldMapping>> wildcardMap){
        this(valueConverter);
        this.mappings.addAll(mappings);
        this.fieldMap.putAll(fieldMap);
        this.wildcardMap.putAll(wildcardMap);
    }
    /**
     * Getter for all the defined Mappings for a given field name
     * @param field the name of the field
     * @return all the active Mappings
     */
    protected List<FieldMapping> getMappings(String field){
        final List<FieldMapping> fieldMappings = new ArrayList<FieldMapping>();
        //first search the fieldMappings
        Collection<FieldMapping> tmp = fieldMap.get(field);
        if(tmp != null){
            fieldMappings.addAll(tmp);
        }
        //now iterate over the Wildcard Mappings
        for(Entry<Pattern,Set<FieldMapping>> entry : wildcardMap.entrySet()){
            if(entry.getKey().matcher(field).find()){
                fieldMappings.addAll(entry.getValue());
            }
        }
        Collections.sort(fieldMappings, FieldMappingUtils.FIELD_MAPPING_COMPARATOR);
        return fieldMappings;
    }
    /* (non-Javadoc)
     * @see org.apache.stanbol.entityhub.servicesapi.mapping.FieldMapper#addMapping(org.apache.stanbol.entityhub.servicesapi.mapping.FieldMapping)
     */
    public void addMapping(FieldMapping mapping){
        if(mapping == null){
            return;
        }
        if(mappings.add(mapping)){
            if(mapping.usesWildcard()){
                Pattern fieldPattern = mapping.getRegexPattern();
                synchronized (wildcardMap) {
                    Set<FieldMapping> fieldPatternMappings = wildcardMap.get(fieldPattern);
                    if(fieldPatternMappings == null){
                        fieldPatternMappings = new HashSet<FieldMapping>();//new TreeSet<FieldMapping>(FieldMappingUtils.FIELD_MAPPING_COMPARATOR);
                        wildcardMap.put(fieldPattern, fieldPatternMappings);
                    }
                    fieldPatternMappings.add(mapping);
                }
            } else {
                String fieldName = mapping.getFieldPattern();
                synchronized (fieldMap) {
                    Set<FieldMapping> fieldPatternMappings = fieldMap.get(fieldName);
                    if(fieldPatternMappings == null){
                        fieldPatternMappings = new HashSet<FieldMapping>();//new TreeSet<FieldMapping>(FieldMappingUtils.FIELD_MAPPING_COMPARATOR);
                        fieldMap.put(fieldName, fieldPatternMappings);
                    }
                    fieldPatternMappings.add(mapping);
                }
            }
        } //else already present -> nothing todo
    }
    public Collection<FieldMapping> getMappings(){
        return unmodMappings;
    }
//    private static String getPrefix(String fieldPattern){
//        return fieldPattern.split("[\\?\\*]")[0];
//    }
    /* (non-Javadoc)
     * @see org.apache.stanbol.entityhub.servicesapi.mapping.FieldMapper#removeFieldMapping(org.apache.stanbol.entityhub.servicesapi.mapping.FieldMapping)
     */
    public void removeFieldMapping(FieldMapping mapping){
        if(mapping == null){
            return;
        }
        if(mappings.remove(mapping)){
            if(mapping.usesWildcard()){
                Pattern fieldPattern = mapping.getRegexPattern();
                synchronized (wildcardMap) {
                    Collection<FieldMapping> fieldPatternMappings = wildcardMap.get(fieldPattern);
                    if(fieldPatternMappings != null){
                        if(fieldPatternMappings.remove(mapping) && fieldPatternMappings.isEmpty()){
                            //clean up the prefix if last value is removed
                            wildcardMap.remove(fieldPattern);
                        }
                    }
                }
            } else {
                String fieldPattern = mapping.getFieldPattern();
                synchronized (fieldMap) {
                    Collection<FieldMapping> fieldPatternMappings = fieldMap.get(fieldPattern);
                    if(fieldPatternMappings != null){
                        if(fieldPatternMappings.remove(mapping) && fieldPatternMappings.isEmpty()){
                            //clean up the prefix if last value is removed
                            fieldMap.remove(fieldPattern);
                        }
                    }
                }
            }
        } //else nothing todo
    }
    /**
     * Removes the FieldMapping based on the fieldPattern
     * @param fieldPattern the field pattern
     */
    public void removeFieldMapping(String fieldPattern){
        if(fieldPattern == null || fieldPattern.length()<1){
            return;
        }
        if(PatternUtils.usesWildCard(fieldPattern)){
            Pattern pattern = Pattern.compile(PatternUtils.wildcardToRegex(fieldPattern,true));
            synchronized (wildcardMap) {
                wildcardMap.remove(pattern);
            }
        } else {
            synchronized (fieldMap) {
                fieldMap.remove(fieldPattern);
            }
        }
    }
    /* (non-Javadoc)
     * @see org.apache.stanbol.entityhub.servicesapi.mapping.FieldMapper#applyMappings(org.apache.stanbol.entityhub.servicesapi.model.Representation, org.apache.stanbol.entityhub.servicesapi.model.Representation)
     */
    public Representation applyMappings(Representation source, Representation target, ValueFactory valueFactory) {
        Collection<String> fields = new HashSet<String>();
        for(Iterator<String> fieldIt = source.getFieldNames();fieldIt.hasNext();){
            fields.add(fieldIt.next());
        }
        for(String field : fields){
//            log.info("  > process field: "+field);
            //get the active Mappings
            List<FieldMapping> activeMappings = getMappings(field);
            if(!activeMappings.isEmpty()){
                //get all the values (store them in an Collection, because we need them more than once)
                Collection<Object> values = new ArrayList<Object>();
                for(Iterator<Object> valueIt = source.get(field);valueIt.hasNext();){
                    values.add(valueIt.next());
                }
                //only to be sure, that this is not changed by Filters!
                values = Collections.unmodifiableCollection(values);
                /*
                 * (1) Before working with the values first analyse the active
                 * mappings and filters. Two things
                 * a) Init Wildcard Filters:
                 *    Language filters set on namespaces are executed on all field
                 *    mappings that define no language filter
                 * b) calculate the mapped fields. Possible there are no mappings
                 *    left. Than we need not to process all the values
                 */
                Set<String> targetFields = new HashSet<String>();
                TextConstraint globalFilter = null;
                Collection<Object> globalFiltered = null;
                /*
                 * NOTE: the mappings are sorted in the way, that the most
                 *   prominent one will be at index 0. The wildcard "*" will
                 *   be always the last.
                 *   So we need to parse backwards because than more prominent
                 *   things will overwrite and win!
                 */
                for(int i=activeMappings.size()-1;i>=0;i--){
                    FieldMapping mapping = activeMappings.get(i);
                    if(mapping.usesWildcard() //if wildcard
                            && !mapping.ignoreField() && //and not ignore
                            mapping.getFilter() != null && //and a filter is present
                            mapping.getFilter().getType() == ConstraintType.text){ //and of type text
                        //set the global text filter.
                        //NOTE: the active mappings are sorted in that way, that
                        //      the most specific one is set last
                        globalFilter = (TextConstraint)mapping.getFilter();
                    }
                    for(String targetField : mapping.getMappings()){
                        if(mapping.ignoreField()){
                            targetFields.remove(targetField);
                        } else {
                            targetFields.add(targetField);
                        }
                    }
                }
//                log.info("    o targets: "+targetFields);
//                log.info("    o global text filter: "+globalFilter);
                if(globalFilter != null){
                    globalFiltered = new HashSet<Object>(values);
                    //parse false ass third argument, because we need not to filter
                    //non-Text values for wildcard filter!
                    processFilter(globalFilter, globalFiltered,false);
                }
                //now process the mappings
                for(FieldMapping mapping : activeMappings){
                    if(!mapping.ignoreField() &&
                            !Collections.disjoint(targetFields, mapping.getMappings())){
                        processMapping(mapping, valueFactory, field,  values,globalFiltered, targetFields, target);
//                    } else if(!mapping.ignoreField()) {
//                        log.info(String.format("  << ignore mapping %s ",mapping));
//                    } else {
//                        log.info(String.format("  << %s ",mapping));
                    }
                }
            }
        }
        /*
         * TODO: return a "MappingReport"
         * All mapping activities should be documented and stored with the
         * MappedEntity as MappingActivity!
         */
        return target;
    }
    /**
     *
     * @param mapping
     * @param valueFactory The value factory used to create converted values
     * @param field
     * @param values
     * @param globalFiltered
     * @param targets
     */
    private void processMapping(FieldMapping mapping, ValueFactory valueFactory,String field,  Collection<Object> values, Collection<Object> globalFiltered, Set<String> activeTargets,Representation targetRepresentation) {
        //parsed mappings are all !ignore and some mappings are active
        Collection<Object> filtered; //this collection will be modified by the filters later on
        if(globalFiltered == null || //if no global filter is present and therefore globalFiltered == null or
                //there is a more special text filter defined in this mapping
                mapping.getFilter() != null && mapping.getFilter().getType() == ConstraintType.text){
            filtered = new HashSet<Object>(values);//start with all values
        } else { //start with the values filtered by the global filter
            filtered = new HashSet<Object>(globalFiltered);
        }
        if(mapping.getFilter()!=null){
            switch (mapping.getFilter().getType()) {
            case value:
                ValueConstraint valueConstraint = (ValueConstraint)mapping.getFilter();
                processFilter(valueConstraint,filtered,valueFactory);
                break;
            case text:
                TextConstraint textConstraint = (TextConstraint)mapping.getFilter();
                //for wildcard mappings only filter TextValues. if the mapping is
                //for a specific field filter also non text values.
                processFilter(textConstraint,filtered,!mapping.usesWildcard());
                break;
            default:
                log.warn(String.format("Filter of type %s are not supported -> select all values! (Constraint=%s)",
                        mapping.getFilter().getType(),mapping.getFilter()));
                break;
            }
            /*
             * TODO: add general purpose functionality to apply Constraints.
             * Currently this is done by the specific Query Implementations :(
             *  - use the constraint to filter the values collection!
             */


        } //nothing to do
        for(String mappedField : mapping.getMappings()){
            //activeTargets still uses null for the current field
            // -> this is because wildcard filters can not know the actual field name
            if(activeTargets.contains(mappedField)){ //so use null to match
                if(mappedField == null){ //and than replace null with the field name
                    mappedField = field;
                }
//                log.info(String.format("  >> copy%s to %s &d values",
//                        mappedField.equals(field)?"":" from "+field,mappedField,filtered.size()));
                targetRepresentation.add(mappedField, filtered);
//            } else {
//                log.info(String.format("  << ignore%s %s",
//                        mappedField.equals(field)?"":"mapping from "+field+"to",mappedField));
            }
        }


    }
    /**
     * This method filters the parsed {@link Text} values based on the languages
     * parsed in the {@link TextConstraint}.
     * This method modifies the parsed collection by using the
     * {@link Iterator#remove()} method.
     * @param textConstraint the text constraint containing the active languages
     * @param values the values to filter. This method modifies this collection
     * @return the modified collection to allow nested calls
     */
    private Collection<Object> processFilter(TextConstraint textConstraint, Collection<Object> values,boolean filterNonTextValues) {
        if(textConstraint.getTexts() != null){
            log.warn("Filtering based on values is not implemented");
        }
        /*
         * TODO: If filterNonTextValues=true and acceptDefaultLanguate=true
         *       we could also try to convert non-Text values to Text (by using
         *       the valueConverter.
         */
        Set<String> langs = textConstraint.getLanguages();
        boolean acceptDefaultLanguage = textConstraint.getLanguages().contains(null);
        for(Iterator<Object> it = values.iterator();it.hasNext();){
            Object value = it.next();
            if(value instanceof Text){
                if(!langs.contains(((Text)value).getLanguage())){
                    it.remove();
//                    log.info(String.format("   - value %s(type:%s) rejected by text filter",value,value.getClass()));
//                } else {
//                    log.info(String.format("   + value %s(type:%s) accepted by text filter",value,value.getClass()));
                }
            } else if(filterNonTextValues && value instanceof String){
                //Strings only if the default language is enabled
                if(!acceptDefaultLanguage){
                    it.remove();
//                    log.info(String.format("   - value %s(type:%s) rejected by text filter",value,value.getClass()));
//                } else {
//                    log.info(String.format("   + value %s(type:%s) accepted by text filter",value,value.getClass()));
                }
            } else if(filterNonTextValues){
                it.remove();
//                log.info(String.format("   - value %s(type:%s) rejected by text filter",value,value.getClass()));
            } //else non text value and filterNonTextValues=false -> nothing to do
        }
        return values;
    }
    /**
     * This method converts - or if not possible filters the parsed values based
     * on the parsed constraint
     * @param valueConstraint
     * @param values
     * @return
     */
    private Collection<Object> processFilter(ValueConstraint valueConstraint, Collection<Object> values,ValueFactory valueFactory) {
        if(valueConstraint.getValues() != null){
            log.warn("Filtering based on values is not yet implemented");
        }
        //1) collect all active dataTypes
        //first a EnumSet for really fast containsAll ... operations
        Set<DataTypeEnum> activeDataTypes = EnumSet.noneOf(DataTypeEnum.class);
        //second a List to keep track of the ordering of the dataTypes in the
        //constraint for later conversions!
        List<DataTypeEnum> sortedActiveDataTypes = new ArrayList<DataTypeEnum>(valueConstraint.getDataTypes().size());
        //NOTE: using a LinkedHashSet would slow down this code, because EnumSet
        //  gives constant processing time even for bulk operations!
        for(String dataTypeUri : valueConstraint.getDataTypes()){
            DataTypeEnum dataType = DataTypeEnum.getDataType(dataTypeUri);
            if(dataType == null){
                log.warn(String.format("DataType %s not supported"));
            } else {
                if(activeDataTypes.add(dataType)){
                    //only of set has changed to avoid duplicates in the list
                    sortedActiveDataTypes.add(dataType);
                }
            }
        }
        //2) now process the values
//        log.info(" --- Filter values ---");
        //calculating acceptable and not acceptable types needs some processing time
        //and usually values will be only of very less different types.
        //Therefore it makes sense to cache accepted and rejected types!
        Set<Class<?>> accepted = new HashSet<Class<?>>();
        Set<Class<?>> rejected = new HashSet<Class<?>>();
        //Set that stores rejected values. Such will be converted later on!
        Set<Object> needConversion = new HashSet<Object>();
        for(Iterator<Object> it = values.iterator();it.hasNext();){
            Object value = it.next();
//            if(accepted.contains(value.getClass())){
//                log.info(String.format("   + value %s(type:%s) accepted by value filter",value,value.getClass()));
                //nothing to do
//            } else 
            if(rejected.contains(value.getClass())){
                it.remove(); //remove also the current value of that type
                needConversion.add(value); //save as value that need to be converted
//                log.info(String.format("   - value %s(type:%s) rejected by value filter",value,value.getClass()));
            } else { //new class ... calculate
                Set<DataTypeEnum> valueTypes = DataTypeEnum.getAllDataTypes(value.getClass());
                if(valueTypes.removeAll(activeDataTypes)){
                    accepted.add(value.getClass());
//                    log.info(String.format("   + value %s(type:%s) accepted by value filter",value,value.getClass()));
                } else {
                    rejected.add(getClass());
                    it.remove(); //remove the Item
                    needConversion.add(value); //save as value that need to be converted
//                    log.info(String.format("   - value %s(type:%s) rejected by value filter",value,value.getClass()));
                }
            }
        }
        //3) try to convert values to the active dataTypes
//        log.info(" --- Try to Convert rejected values ---");
        for(Object value : needConversion){
            Object converted = null;
            DataTypeEnum convertedTo = null;
            for(Iterator<DataTypeEnum> dataTypes = sortedActiveDataTypes.iterator(); //iterate over all active dataTypes
                converted == null && dataTypes.hasNext();){ //while converted still null and more dataTypes to try
                convertedTo = dataTypes.next();
                converted = valueConverter.convert(value, convertedTo.getUri(),valueFactory); //try the conversion
            }
            if(converted != null){
//                log.info(String.format("   + value %s(javaType=%s) successfully converted to %s(datatype=%s)",
//                        value,value.getClass().getSimpleName(),converted,convertedTo.getShortName()));
                values.add(converted);
//            } else {
//                log.info(String.format("   - value %s(javaType=%s) could not be converted"),
//                        value,value.getClass().getSimpleName());
            }
        }
        return values;
    }
    @Override
    public DefaultFieldMapperImpl clone() {
        return new DefaultFieldMapperImpl(this.valueConverter,this.mappings,this.fieldMap, this.wildcardMap);
    }
    @Override
    public int hashCode() {
        return mappings.hashCode();
    }
    @Override
    public boolean equals(Object o) {
        return o instanceof DefaultFieldMapperImpl &&
            ((DefaultFieldMapperImpl)o).mappings.equals(mappings);
    }
}
Source Code of org.apache.stanbol.entityhub.core.mapping.DefaultFieldMapperImpl

Related Classes of org.apache.stanbol.entityhub.core.mapping.DefaultFieldMapperImpl