Package com.linkedin.data.schema.validation

Source Code of com.linkedin.data.schema.validation.ValidateDataAgainstSchema$State$Context

/*
   Copyright (c) 2012 LinkedIn Corp.

   Licensed under the Apache License, Version 2.0 (the "License");
   you may not use this file except in compliance with the License.
   You may obtain a copy of the License at

       http://www.apache.org/licenses/LICENSE-2.0

   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   See the License for the specific language governing permissions and
   limitations under the License.
*/

package com.linkedin.data.schema.validation;


import java.math.BigDecimal;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import com.linkedin.data.ByteString;
import com.linkedin.data.Data;
import com.linkedin.data.DataList;
import com.linkedin.data.DataMap;
import com.linkedin.data.Null;
import com.linkedin.data.element.DataElement;
import com.linkedin.data.element.MutableDataElement;
import com.linkedin.data.element.SimpleDataElement;
import com.linkedin.data.it.IterationOrder;
import com.linkedin.data.it.ObjectIterator;
import com.linkedin.data.message.Message;
import com.linkedin.data.message.MessageList;
import com.linkedin.data.schema.ArrayDataSchema;
import com.linkedin.data.schema.BytesDataSchema;
import com.linkedin.data.schema.DataSchema;
import com.linkedin.data.schema.DataSchemaConstants;
import com.linkedin.data.schema.EnumDataSchema;
import com.linkedin.data.schema.FixedDataSchema;
import com.linkedin.data.schema.MapDataSchema;
import com.linkedin.data.schema.RecordDataSchema;
import com.linkedin.data.schema.TyperefDataSchema;
import com.linkedin.data.schema.UnionDataSchema;
import com.linkedin.data.schema.validator.Validator;
import com.linkedin.data.schema.validator.ValidatorContext;
import com.linkedin.data.template.DataTemplate;



/**
* A utility class to validate and fixup data objects according to the objects' {@link DataSchema}.
*/
public final class ValidateDataAgainstSchema
{
  // Private as the class is not to be instantiated.
  private ValidateDataAgainstSchema()
  {
  }

  @SuppressWarnings("serial")
  private static final HashMap<DataSchema.Type, Class<?>> _primitiveTypeToClassMap = new HashMap<DataSchema.Type, Class<?>>()
  {
    {
      put(DataSchema.Type.INT, Integer.class);
      put(DataSchema.Type.LONG, Long.class);
      put(DataSchema.Type.FLOAT, Float.class);
      put(DataSchema.Type.DOUBLE, Double.class);
      put(DataSchema.Type.STRING, String.class);
      put(DataSchema.Type.BOOLEAN, Boolean.class);
      put(DataSchema.Type.NULL, Null.class);
    }
  };

  public static ValidationResult validate(DataTemplate<?> dataTemplate, ValidationOptions options)
  {
    return validate(dataTemplate, options, null);
  }

  public static ValidationResult validate(DataTemplate<?> dataTemplate, ValidationOptions options, Validator validator)
  {
    if(dataTemplate.schema() == null)
    {
      return new State(options, validator); // return an empty validation response if no schema is available
    }

    return validate(dataTemplate.data(), dataTemplate.schema(), options, validator);
  }

  public static ValidationResult validate(Object object, DataSchema schema, ValidationOptions options)
  {
    return validate(object, schema, options, null);
  }

  public static ValidationResult validate(Object object, DataSchema schema, ValidationOptions options, Validator validator)
  {
    return validate(new SimpleDataElement(object, schema), options, validator);
  }

  public static ValidationResult validate(DataElement element, ValidationOptions options)
  {
    return validate(element, options, null);
  }

  public static ValidationResult validate(DataElement element, ValidationOptions options, Validator validator)
  {
    State state = new State(options, validator);
    state.validate(element);
    return state;
  }

  private static class State implements ValidationResult
  {
    private boolean _recursive;
    private final ValidationOptions _options;
    private final Validator _validator;
    private boolean _hasFix = false;
    private boolean _hasFixupReadOnlyError = false;
    private Object _fixed = null;
    private boolean _valid = true;
    private final Context _context;

    private State(ValidationOptions options, Validator validator)
    {
      _options = options;
      _validator = validator;
      _context = (validator == null ? null : new Context());
    }

    protected void validate(DataElement element)
    {
      if (_options.isAvroUnionMode())
      {
        validateRecursive(element);
      }
      else
      {
        validateIterative(element);
      }
    }

    protected void validateRecursive(DataElement element)
    {
      _recursive = true;
      _fixed = validate(element, element.getSchema(), element.getValue());
    }

    protected void validateIterative(DataElement element)
    {
      _recursive = false;
      _fixed = element.getValue();
      ObjectIterator it = new ObjectIterator(element, IterationOrder.POST_ORDER);
      DataElement nextElement;
      while ((nextElement = it.next()) != null)
      {
        DataSchema nextElementSchema = nextElement.getSchema();
        if (nextElementSchema != null)
        {
          validate(nextElement, nextElementSchema, nextElement.getValue());
        }
      }
    }

    protected Object validate(DataElement element, DataSchema schema, Object object)
    {
      Object fixed;
      switch (schema.getType())
      {
        case ARRAY:
          fixed = validateArray(element, (ArrayDataSchema) schema, object);
          break;
        case BYTES:
          fixed = validateBytes(element, (BytesDataSchema) schema, object);
          break;
        case ENUM:
          fixed = validateEnum(element, (EnumDataSchema) schema, object);
          break;
        case FIXED:
          fixed = validateFixed(element, (FixedDataSchema) schema, object);
          break;
        case MAP:
          fixed = validateMap(element, (MapDataSchema) schema, object);
          break;
        case RECORD:
          fixed = validateRecord(element, (RecordDataSchema) schema, object);
          break;
        case TYPEREF:
          fixed = validateTyperef(element, (TyperefDataSchema) schema, object);
          break;
        case UNION:
          fixed = validateUnion(element, (UnionDataSchema) schema, object);
          break;
        default:
          fixed = validatePrimitive(element, schema, object);
          break;
      }
      if (fixed != object)
      {
        fixValue(element, fixed);
      }
      if (_validator != null && element.getSchema() == schema)
      {
        DataElement validatorElement;
        if (fixed == object)
        {
          validatorElement = element;
        }
        else if (element instanceof MutableDataElement)
        {
          ((MutableDataElement) element).setValue(fixed);
          validatorElement = element;
        }
        else
        {
           validatorElement = new SimpleDataElement(fixed, element.getName(), schema, element.getParent());
        }
        _context._dataElement = validatorElement;
        _validator.validate(_context);
      }
      return fixed;
    }

    protected void fixValue(DataElement element, Object fixed)
    {
      assert(_options.getCoercionMode() != CoercionMode.OFF);
      _hasFix = true;
      DataElement parentElement = element.getParent();
      if (parentElement == null)
      {
        _fixed = fixed;
      }
      else
      {
        Object parent = parentElement.getValue();
        if (parent.getClass() == DataMap.class)
        {
          DataMap map = (DataMap) parent;
          if (map.isReadOnly())
          {
            _hasFixupReadOnlyError = true;
            addMessage(element, "cannot be fixed because DataMap backing %1$s type is read-only", parentElement.getSchema().getUnionMemberKey());
          }
          else
          {
            map.put((String) element.getName(), fixed);
          }
        }
        else if (parent.getClass() == DataList.class)
        {
          DataList list = (DataList) parent;
          if (list.isReadOnly())
          {
            _hasFixupReadOnlyError = true;
            addMessage(element, "cannot be fixed because DataList backing an array type is read-only");
          }
          else
          {
            list.set((Integer) element.getName(), fixed);
          }
        }
      }
    }

    protected Object validateTyperef(DataElement element, TyperefDataSchema schema, Object object)
    {
      return validate(element, schema.getRef(), object);
    }

    protected void recurseRecord(DataElement element, RecordDataSchema schema, DataMap map)
    {
      MutableDataElement childElement = new MutableDataElement(element);
      for (Map.Entry<String, Object> entry : map.entrySet())
      {
        String key = entry.getKey();
        RecordDataSchema.Field field = schema.getField(key);
        if (field != null)
        {
          Object value = entry.getValue();
          DataSchema childSchema = field.getType();
          childElement.setValueNameSchema(value, key, childSchema);
          validate(childElement, childSchema, value);
        }
      }
    }

    protected Object validateRecord(DataElement element, RecordDataSchema schema, Object object)
    {
      if (object instanceof DataMap)
      {
        DataMap map = (DataMap) object;
        if (_recursive)
        {
          recurseRecord(element, schema, map);
        }
        RequiredMode requiredMode = _options.getRequiredMode();
        if (requiredMode != RequiredMode.IGNORE)
        {
          for (RecordDataSchema.Field field : schema.getFields())
          {
            if (field.getOptional() == false && map.containsKey(field.getName()) == false)
            {
              switch (requiredMode)
              {
                case MUST_BE_PRESENT:
                  addIsRequiredMessage(
                    element, field,
                    "field is required but not found"
                  );
                  break;
                case CAN_BE_ABSENT_IF_HAS_DEFAULT:
                  if (field.getDefault() == null)
                  {
                    addIsRequiredMessage(
                      element, field,
                      "field is required but not found and has no default value"
                    );
                  }
                  break;
                case FIXUP_ABSENT_WITH_DEFAULT:
                  Object defaultValue = field.getDefault();
                  if (defaultValue == null)
                  {
                    addIsRequiredMessage(
                      element, field,
                      "field is required but not found and has no default value"
                    );
                  }
                  else if (map.isReadOnly())
                  {
                    _hasFix = true;
                    _hasFixupReadOnlyError = true;
                    addIsRequiredMessage(
                      element, field,
                      "field is required and has default value but not found and cannot be fixed because DataMap of record is read-only"
                    );
                  }
                  else
                  {
                    _hasFix = true;
                    map.put(field.getName(), defaultValue);
                  }
                  break;
              }
            }
          }
        }
      }
      else
      {
        addMessage(element, "record type is not backed by a DataMap");
      }
      return object;
    }

    protected Object validateUnion(DataElement element, UnionDataSchema schema, Object object)
    {
      if (object == Data.NULL)
      {
        if (schema.getType(DataSchemaConstants.NULL_TYPE) == null)
        {
          addMessage(element, "null is not a member type of union %1$s", schema);
        }
      }
      else if (_options.isAvroUnionMode())
      {
        // Avro union default value does not include member type discriminator
        List<DataSchema> memberTypes = schema.getTypes();
        if (memberTypes.isEmpty())
        {
          addMessage(element, "value %1$s is not valid for empty union", object.toString());
        }
        else
        {
          DataSchema memberSchema = memberTypes.get(0);
          assert(_recursive);
          validate(element, memberSchema, object);
        }
      }
      else if (object instanceof DataMap)
      {
        // Pegasus mode
        DataMap map = (DataMap) object;
        if (map.size() != 1)
        {
          addMessage(element, "DataMap should have exactly one entry for a union type");
        }
        else
        {
          Map.Entry<String, Object> entry = map.entrySet().iterator().next();
          String key = entry.getKey();
          DataSchema memberSchema = schema.getType(key);
          if (memberSchema == null)
          {
            addMessage(element, "\"%1$s\" is not a member type of union %2$s", key, schema);
          }
          else if (_recursive)
          {
            Object value = entry.getValue();
            MutableDataElement memberElement = new MutableDataElement(value, key, memberSchema, element);
            validate(memberElement, memberSchema, value);
          }
        }
      }
      else
      {
        addMessage(element, "union type is not backed by a DataMap or null");
      }
      return object;
    }

    protected Object validateEnum(DataElement element, EnumDataSchema schema, Object object)
    {
      Object fixed = object;
      if (object instanceof String)
      {
        String value = (String) object;
        if (schema.contains(value) == false)
        {
          addMessage(element, "\"%1$s\" is not an enum symbol", value);
        }
      }
      else
      {
        addMessage(element, "enum type is not backed by a String");
      }
      return fixed;
    }

    protected void recurseArray(DataElement element, ArrayDataSchema schema, Object object)
    {
      DataList list = (DataList) object;
      DataSchema childSchema = schema.getItems();
      int index = 0;
      MutableDataElement childElement = new MutableDataElement(element);
      for (Object value : list)
      {
        childElement.setValueNameSchema(value, index, childSchema);
        validate(childElement, childSchema, value);
        index++;
      }
    }

    protected Object validateArray(DataElement element, ArrayDataSchema schema, Object object)
    {
      if (object instanceof DataList)
      {
        if (_recursive)
        {
          recurseArray(element, schema, object);
        }
      }
      else
      {
        addMessage(element, "array type is not backed by a DataList");
      }
      return object;
    }

    protected void recurseMap(DataElement element, MapDataSchema schema, Object object)
    {
      DataMap map = (DataMap) object;
      DataSchema childSchema = schema.getValues();
      MutableDataElement childElement = new MutableDataElement(element);
      for (Map.Entry<String, Object> entry : map.entrySet())
      {
        String key = entry.getKey();
        Object value = entry.getValue();
        childElement.setValueNameSchema(value, key, childSchema);
        validate(childElement, childSchema, value);
      }
    }

    protected Object validateMap(DataElement element, MapDataSchema schema, Object object)
    {
      if (object instanceof DataMap)
      {
        if (_recursive)
        {
          recurseMap(element, schema, object);
        }
      }
      else
      {
        addMessage(element, "map type is not backed by a DataMap");
      }
      return object;
    }

    protected Object validateFixed(DataElement element, FixedDataSchema schema, Object object)
    {
      Object fixed = object;
      Class<?> clazz = object.getClass();
      int size = schema.getSize();
      if (clazz == String.class)
      {
        String str = (String) object;
        boolean error = false;
        if (str.length() != size)
        {
          addMessage(element,
                     "\"%1$s\" length (%2$d) is inconsistent with expected fixed size of %3$d",
                     str,
                     str.length(),
                     size);
        }
        else
        {
          if (_options.getCoercionMode() != CoercionMode.OFF)
          {
            ByteString bytes = ByteString.copyAvroString(str, true);
            if (bytes != null)
            {
              _hasFix = true;
              fixed = bytes;
            }
            else
            {
              error = true;
            }
          }
          else
          {
            error = ! Data.validStringAsBytes(str);
          }
        }
        if (error)
        {
          addMessage(element, "\"%1$s\" is not a valid string representation of bytes", str);
        }
      }
      else if (clazz == ByteString.class)
      {
        ByteString bytes = (ByteString) object;
        if (bytes.length() != size)
        {
          addMessage(element, "\"%1$s\" length (%2$d) is inconsistent with expected fixed size of %3$d", bytes, bytes.length(), size);
        }
      }
      else
      {
        addMessage(element, "fixed type is not backed by a String or ByteString");
      }
      return fixed;
    }

    protected Object validateBytes(DataElement element, BytesDataSchema schema, Object object)
    {
      Object fixed = object;
      Class<?> clazz = object.getClass();
      if (clazz == String.class)
      {
        String str = (String) object;
        boolean error = false;
        if (_options.getCoercionMode() != CoercionMode.OFF)
        {
          ByteString bytes = ByteString.copyAvroString(str, true);
          if (bytes != null)
          {
            _hasFix = true;
            fixed = bytes;
          }
          else
          {
            error = true;
          }
        }
        else
        {
          error = ! Data.validStringAsBytes(str);
        }
        if (error)
        {
          addMessage(element, "\"%1$s\" is not a valid string representation of bytes", str);
        }
      }
      else if (clazz != ByteString.class)
      {
        addMessage(element, "bytes type is not backed by a String or ByteString");
      }
      return fixed;
    }

    protected Object validatePrimitive(DataElement element, DataSchema schema, Object object)
    {
      Class<?> primitiveClass = _primitiveTypeToClassMap.get(schema.getType());

      Object fixed = object;
      if (object.getClass() != primitiveClass)
      {
        if (_options.getCoercionMode() != CoercionMode.OFF)
        {
          fixed = fixupPrimitive(schema, object);
          if (fixed == object)
          {
            addMessage(element, "%1$s cannot be coerced to %2$s", String.valueOf(object), primitiveClass.getSimpleName());
          }
        }
        else
        {
          addMessage(element, "%1$s is not backed by a %2$s", String.valueOf(object), primitiveClass.getSimpleName());
        }
      }
      return fixed;
    }

    protected Object fixupPrimitive(DataSchema schema, Object object)
    {
      DataSchema.Type schemaType = schema.getType();

      try
      {
        switch (schemaType)
        {
          case INT:
            return
              (object instanceof Number) ?
                (((Number) object).intValue()) :
                (object.getClass() == String.class && _options.getCoercionMode() == CoercionMode.STRING_TO_PRIMITIVE) ?
                  (new BigDecimal((String) object)).intValue() :
                  object;
          case LONG:
            return
              (object instanceof Number) ?
                (((Number) object).longValue()) :
                (object.getClass() == String.class && _options.getCoercionMode() == CoercionMode.STRING_TO_PRIMITIVE) ?
                  (new BigDecimal((String) object)).longValue() :
                  object;
          case FLOAT:
            return
              (object instanceof Number) ?
                (((Number) object).floatValue()) :
                (object.getClass() == String.class && _options.getCoercionMode() == CoercionMode.STRING_TO_PRIMITIVE) ?
                  (new BigDecimal((String) object)).floatValue() :
                  object;
          case DOUBLE:
            return
              (object instanceof Number) ?
                (((Number) object).doubleValue()) :
                (object.getClass() == String.class && _options.getCoercionMode() == CoercionMode.STRING_TO_PRIMITIVE) ?
                  (new BigDecimal((String) object)).doubleValue() :
                  object;
          case BOOLEAN:
            if (object.getClass() == String.class && _options.getCoercionMode() == CoercionMode.STRING_TO_PRIMITIVE)
            {
              String string = (String) object;
              if ("true".equalsIgnoreCase(string))
              {
                return Boolean.TRUE;
              }
              if ("false".equalsIgnoreCase(string))
              {
                return Boolean.FALSE;
              }
            }
            return object;
          case STRING:
          case NULL:
          default:
            return object;
        }
      }
      catch (NumberFormatException exc)
      {
        return object;
      }
    }
   
    protected void addMessage(DataElement element, String format, Object... args)
    {
      _messages.add(new Message(element.path(), format, args));
      _valid = false;
    }

    protected void addIsRequiredMessage(DataElement element, RecordDataSchema.Field field, String msg)
    {
      _messages.add(new Message(element.path(field.getName()), msg));
      _valid = false;
    }

    private MessageList<Message> _messages = new MessageList<Message>();

    @Override
    public boolean hasFix()
    {
      return _hasFix;
    }

    @Override
    public boolean hasFixupReadOnlyError()
    {
      return _hasFixupReadOnlyError;
    }

    @Override
    public Object getFixed()
    {
      return _fixed;
    }

    @Override
    public boolean isValid()
    {
      return _valid;
    }

    @Override
    public Collection<Message> getMessages()
    {
      return Collections.unmodifiableList(_messages);
    }

    @Override
    public String toString()
    {
      StringBuilder sb = new StringBuilder();
      sb.append("hasFix=").append(_hasFix)
        .append(", hasFixupReadOnlyError=").append(_hasFixupReadOnlyError)
        .append(", valid=").append(_valid).append("\n")
        .append("fixed=").append(_fixed).append("\n");
      _messages.appendTo(sb);
      return sb.toString();
    }

    private class Context implements ValidatorContext
    {
      private DataElement _dataElement;

      @Override
      public DataElement dataElement()
      {
        return _dataElement;
      }

      @Override
      public void addResult(Message message)
      {
        _messages.add(message);
        if (message.isError())
        {
          _valid = false;
        }
      }

      @Override
      public void setHasFix(boolean value)
      {
        _hasFix = value;
      }

      @Override
      public void setHasFixupReadOnlyError(boolean value)
      {
        _hasFixupReadOnlyError = value;
      }

      @Override
      public ValidationOptions validationOptions()
      {
        return _options;
      }
    }
  }
}
TOP

Related Classes of com.linkedin.data.schema.validation.ValidateDataAgainstSchema$State$Context

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.