Source code for factorytx.validation

"""This module contains the basic functions and types used for cleaning and
validating configurations. "Cleaning" a configuration may include modifying it,
eg. to adapt it to a new configuration structure or to inject default values.

Concrete validations are implemented by other modules. For example, the
`factorytx.config` module cleans and validates entire configuration files while
individual components (eg. transforms) implement the logic to clean and validate
their sub-configurations.

"""
import collections
import copy
import enum
from typing import Any, Dict, Generator, List, NamedTuple, Tuple, Type, Union

import jsonschema

from factorytx.utils import JsonDict

__all__ = ['ConfigPath', 'Level', 'ValidationMessage', 'ValidationError', 'ValidationWarning',
           'clean_with_json_schema', 'format_pretty_path', 'has_errors']


@enum.unique
class Level(enum.Enum):
    WARNING = 'warning'
    ERROR = 'error'


ConfigPath = Tuple[Union[int, str], ...]
ConfigPath.__doc__ = """\
A ConfigPath represents a path to an element in a hierarchy of dicts
and lists. An integer represents a 0-based offset into a list, while a string
represents a key in a dictionary. For example, given the structure

    {"a": [{"b": 1, "c": 2}, {"d": 3}], "e": 4}

the element at the ConfigPath ["a", 0, "c"] is 2, while the element at the
path ["e"] is 4.

"""


[docs] class ValidationMessage(NamedTuple): """Represents a message returned when validating a configuration. :ivar level: either Level.WARNING or Level.ERROR. :ivar path: ConfigPath indicating the object that this message applies to. For example, if the "asset" property of the first stream in a document had an issue then the path would be `["streams", 0, "asset"]`. :ivar message: message to present to the user. """ level: Level path: ConfigPath message: str
[docs] def ValidationWarning(path: ConfigPath, message: str) -> ValidationMessage: return ValidationMessage(Level.WARNING, path, message)
[docs] def ValidationError(path: ConfigPath, message: str) -> ValidationMessage: return ValidationMessage(Level.ERROR, path, message)
[docs] def has_errors(validation_results: List[ValidationMessage]) -> bool: """Returns True if a collection of ValidationMessages contains at least one error, or False otherwise. """ return any(v.level == Level.ERROR for v in validation_results)
def format_pretty_path(path: ConfigPath) -> str: """Converts a ConfigPath into a more readable format. >>> format_pretty_path(["a key", 3, "data"]) 'a key[3].data' """ result_parts: List[str] = [] for path_part in path: if isinstance(path_part, int): result_parts.append(f'[{path_part}]') elif isinstance(path_part, str): if result_parts: result_parts.append('.') result_parts.append(path_part) else: assert False return ''.join(result_parts) def _extend_with_default(validator_class: Type) -> Type: """Extends a jsonschema validator class to automatically inject defaults into documents as they are validated. """ # http://python-jsonschema.readthedocs.io/en/latest/faq/#why-doesn-t-my-schema-that-has-a-default-property-actually-set-the-default-on-my-instance validate_properties = validator_class.VALIDATORS["properties"] def set_defaults(validator: Any, properties: JsonDict, instance: JsonDict, schema: JsonDict) -> Generator[jsonschema.ValidationError, None, None]: for property, subschema in properties.items(): if "default" in subschema and isinstance(instance, dict): default_value = subschema["default"] if not isinstance(default_value, (int, float, bool, str)): default_value = copy.deepcopy(default_value) instance.setdefault(property, default_value) for error in validate_properties( validator, properties, instance, schema, ): yield error return jsonschema.validators.extend( validator_class, {"properties": set_defaults}, ) DefaultDraft4Validator = _extend_with_default(jsonschema.Draft4Validator)
[docs] def clean_with_json_schema(schema: Dict[str, Any], instance: Dict[str, Any]) -> List[ValidationMessage]: """Validates a document against a JSON schema and modifies it by injecting default values. The document must correspond to a JSON document, i.e. it must be a tree of dicts, list, ints, bools, and numbers. >>> schema = { ... 'type': 'object', ... 'properties': { ... 'a': {'type': 'integer', 'default': 2}, ... 'b': {'type': 'string', 'minLength': 1}}, ... 'required': ['b']} >>> doc = {'b': 'hello'} >>> clean_with_json_schema(schema, doc) [] >>> doc {'b': 'hello', 'a': 2} >>> doc = {'a': 'nope'} >>> clean_with_json_schema(schema, doc) [ValidationMessage(level=<Level.ERROR: 'error'>, path=('a',), message="'nope' is not of type 'integer'"), ValidationMessage(level=<Level.ERROR: 'error'>, path=(), message="'b' is a required property")] """ validation_errors = [] validator = DefaultDraft4Validator(schema) for schema_error in validator.iter_errors(instance): # TODO: add custom error message handling. (e.g: regex pattern errors are sometimes not easy to read) assert isinstance(schema_error.absolute_path, collections.deque) validation_error = ValidationError( path=tuple(schema_error.absolute_path), message=schema_error.message, ) validation_errors.append(validation_error) return validation_errors