""" Register Hypothesis strategies for Pydantic custom types. This enables fully-automatic generation of test data for most Pydantic classes. Note that this module has *no* runtime impact on Pydantic itself; instead it is registered as a setuptools entry point and Hypothesis will import it if Pydantic is installed. See also: https://hypothesis.readthedocs.io/en/latest/strategies.html#registering-strategies-via-setuptools-entry-points https://hypothesis.readthedocs.io/en/latest/data.html#hypothesis.strategies.register_type_strategy https://hypothesis.readthedocs.io/en/latest/strategies.html#interaction-with-pytest-cov https://pydantic-docs.helpmanual.io/usage/types/#pydantic-types Note that because our motivation is to *improve user experience*, the strategies are always sound (never generate invalid data) but sacrifice completeness for maintainability (ie may be unable to generate some tricky but valid data). Finally, this module makes liberal use of `# type: ignore[]` pragmas. This is because Hypothesis annotates `register_type_strategy()` with `(T, SearchStrategy[T])`, but in most cases we register e.g. `ConstrainedInt` to generate instances of the builtin `int` type which match the constraints. """ import contextlib import ipaddress import json import math from fractions import Fraction from typing import Callable, Dict, Type, Union, cast, overload import hypothesis.strategies as st import pydantic import pydantic.color import pydantic.types # FilePath and DirectoryPath are explicitly unsupported, as we'd have to create # them on-disk, and that's unsafe in general without being told *where* to do so. # # URLs are unsupported because it's easy for users to define their own strategy for # "normal" URLs, and hard for us to define a general strategy which includes "weird" # URLs but doesn't also have unpredictable performance problems. # # conlist() and conset() are unsupported for now, because the workarounds for # Cython and Hypothesis to handle parametrized generic types are incompatible. # Once Cython can support 'normal' generics we'll revisit this. # Emails try: import email_validator except ImportError: # pragma: no cover pass else: def is_valid_email(s: str) -> bool: # Hypothesis' st.emails() occasionally generates emails like 0@A0--0.ac # that are invalid according to email-validator, so we filter those out. try: email_validator.validate_email(s, check_deliverability=False) return True except email_validator.EmailNotValidError: # pragma: no cover return False # Note that these strategies deliberately stay away from any tricky Unicode # or other encoding issues; we're just trying to generate *something* valid. st.register_type_strategy(pydantic.EmailStr, st.emails().filter(is_valid_email)) # type: ignore[arg-type] st.register_type_strategy( pydantic.NameEmail, st.builds( '{} <{}>'.format, # type: ignore[arg-type] st.from_regex('[A-Za-z0-9_]+( [A-Za-z0-9_]+){0,5}', fullmatch=True), st.emails().filter(is_valid_email), ), ) # PyObject - dotted names, in this case taken from the math module. st.register_type_strategy( pydantic.PyObject, # type: ignore[arg-type] st.sampled_from( [cast(pydantic.PyObject, f'math.{name}') for name in sorted(vars(math)) if not name.startswith('_')] ), ) # CSS3 Colors; as name, hex, rgb(a) tuples or strings, or hsl strings _color_regexes = ( '|'.join( ( pydantic.color.r_hex_short, pydantic.color.r_hex_long, pydantic.color.r_rgb, pydantic.color.r_rgba, pydantic.color.r_hsl, pydantic.color.r_hsla, ) ) # Use more precise regex patterns to avoid value-out-of-range errors .replace(pydantic.color._r_sl, r'(?:(\d\d?(?:\.\d+)?|100(?:\.0+)?)%)') .replace(pydantic.color._r_alpha, r'(?:(0(?:\.\d+)?|1(?:\.0+)?|\.\d+|\d{1,2}%))') .replace(pydantic.color._r_255, r'(?:((?:\d|\d\d|[01]\d\d|2[0-4]\d|25[0-4])(?:\.\d+)?|255(?:\.0+)?))') ) st.register_type_strategy( pydantic.color.Color, st.one_of( st.sampled_from(sorted(pydantic.color.COLORS_BY_NAME)), st.tuples( st.integers(0, 255), st.integers(0, 255), st.integers(0, 255), st.none() | st.floats(0, 1) | st.floats(0, 100).map('{}%'.format), ), st.from_regex(_color_regexes, fullmatch=True), ), ) # Card numbers, valid according to the Luhn algorithm def add_luhn_digit(card_number: str) -> str: # See https://en.wikipedia.org/wiki/Luhn_algorithm for digit in '0123456789': with contextlib.suppress(Exception): pydantic.PaymentCardNumber.validate_luhn_check_digit(card_number + digit) return card_number + digit raise AssertionError('Unreachable') # pragma: no cover card_patterns = ( # Note that these patterns omit the Luhn check digit; that's added by the function above '4[0-9]{14}', # Visa '5[12345][0-9]{13}', # Mastercard '3[47][0-9]{12}', # American Express '[0-26-9][0-9]{10,17}', # other (incomplete to avoid overlap) ) st.register_type_strategy( pydantic.PaymentCardNumber, st.from_regex('|'.join(card_patterns), fullmatch=True).map(add_luhn_digit), # type: ignore[arg-type] ) # UUIDs st.register_type_strategy(pydantic.UUID1, st.uuids(version=1)) st.register_type_strategy(pydantic.UUID3, st.uuids(version=3)) st.register_type_strategy(pydantic.UUID4, st.uuids(version=4)) st.register_type_strategy(pydantic.UUID5, st.uuids(version=5)) # Secrets st.register_type_strategy(pydantic.SecretBytes, st.binary().map(pydantic.SecretBytes)) st.register_type_strategy(pydantic.SecretStr, st.text().map(pydantic.SecretStr)) # IP addresses, networks, and interfaces st.register_type_strategy(pydantic.IPvAnyAddress, st.ip_addresses()) st.register_type_strategy( pydantic.IPvAnyInterface, st.from_type(ipaddress.IPv4Interface) | st.from_type(ipaddress.IPv6Interface), # type: ignore[arg-type] ) st.register_type_strategy( pydantic.IPvAnyNetwork, st.from_type(ipaddress.IPv4Network) | st.from_type(ipaddress.IPv6Network), # type: ignore[arg-type] ) # We hook into the con***() functions and the ConstrainedNumberMeta metaclass, # so here we only have to register subclasses for other constrained types which # don't go via those mechanisms. Then there are the registration hooks below. st.register_type_strategy(pydantic.StrictBool, st.booleans()) st.register_type_strategy(pydantic.StrictStr, st.text()) # Constrained-type resolver functions # # For these ones, we actually want to inspect the type in order to work out a # satisfying strategy. First up, the machinery for tracking resolver functions: RESOLVERS: Dict[type, Callable[[type], st.SearchStrategy]] = {} # type: ignore[type-arg] @overload def _registered(typ: Type[pydantic.types.T]) -> Type[pydantic.types.T]: pass @overload def _registered(typ: pydantic.types.ConstrainedNumberMeta) -> pydantic.types.ConstrainedNumberMeta: pass def _registered( typ: Union[Type[pydantic.types.T], pydantic.types.ConstrainedNumberMeta] ) -> Union[Type[pydantic.types.T], pydantic.types.ConstrainedNumberMeta]: # This function replaces the version in `pydantic.types`, in order to # effect the registration of new constrained types so that Hypothesis # can generate valid examples. pydantic.types._DEFINED_TYPES.add(typ) for supertype, resolver in RESOLVERS.items(): if issubclass(typ, supertype): st.register_type_strategy(typ, resolver(typ)) # type: ignore return typ raise NotImplementedError(f'Unknown type {typ!r} has no resolver to register') # pragma: no cover def resolves( typ: Union[type, pydantic.types.ConstrainedNumberMeta] ) -> Callable[[Callable[..., st.SearchStrategy]], Callable[..., st.SearchStrategy]]: # type: ignore[type-arg] def inner(f): # type: ignore assert f not in RESOLVERS RESOLVERS[typ] = f return f return inner # Type-to-strategy resolver functions @resolves(pydantic.JsonWrapper) def resolve_json(cls): # type: ignore[no-untyped-def] try: inner = st.none() if cls.inner_type is None else st.from_type(cls.inner_type) except Exception: # pragma: no cover finite = st.floats(allow_infinity=False, allow_nan=False) inner = st.recursive( base=st.one_of(st.none(), st.booleans(), st.integers(), finite, st.text()), extend=lambda x: st.lists(x) | st.dictionaries(st.text(), x), # type: ignore ) return st.builds( json.dumps, inner, ensure_ascii=st.booleans(), indent=st.none() | st.integers(0, 16), sort_keys=st.booleans(), ) @resolves(pydantic.ConstrainedBytes) def resolve_conbytes(cls): # type: ignore[no-untyped-def] # pragma: no cover min_size = cls.min_length or 0 max_size = cls.max_length if not cls.strip_whitespace: return st.binary(min_size=min_size, max_size=max_size) # Fun with regex to ensure we neither start nor end with whitespace repeats = '{{{},{}}}'.format( min_size - 2 if min_size > 2 else 0, max_size - 2 if (max_size or 0) > 2 else '', ) if min_size >= 2: pattern = rf'\W.{repeats}\W' elif min_size == 1: pattern = rf'\W(.{repeats}\W)?' else: assert min_size == 0 pattern = rf'(\W(.{repeats}\W)?)?' return st.from_regex(pattern.encode(), fullmatch=True) @resolves(pydantic.ConstrainedDecimal) def resolve_condecimal(cls): # type: ignore[no-untyped-def] min_value = cls.ge max_value = cls.le if cls.gt is not None: assert min_value is None, 'Set `gt` or `ge`, but not both' min_value = cls.gt if cls.lt is not None: assert max_value is None, 'Set `lt` or `le`, but not both' max_value = cls.lt s = st.decimals(min_value, max_value, allow_nan=False) if cls.lt is not None: s = s.filter(lambda d: d < cls.lt) if cls.gt is not None: s = s.filter(lambda d: cls.gt < d) return s @resolves(pydantic.ConstrainedFloat) def resolve_confloat(cls): # type: ignore[no-untyped-def] min_value = cls.ge max_value = cls.le exclude_min = False exclude_max = False if cls.gt is not None: assert min_value is None, 'Set `gt` or `ge`, but not both' min_value = cls.gt exclude_min = True if cls.lt is not None: assert max_value is None, 'Set `lt` or `le`, but not both' max_value = cls.lt exclude_max = True if cls.multiple_of is None: return st.floats(min_value, max_value, exclude_min=exclude_min, exclude_max=exclude_max, allow_nan=False) if min_value is not None: min_value = math.ceil(min_value / cls.multiple_of) if exclude_min: min_value = min_value + 1 if max_value is not None: assert max_value >= cls.multiple_of, 'Cannot build model with max value smaller than multiple of' max_value = math.floor(max_value / cls.multiple_of) if exclude_max: max_value = max_value - 1 return st.integers(min_value, max_value).map(lambda x: x * cls.multiple_of) @resolves(pydantic.ConstrainedInt) def resolve_conint(cls): # type: ignore[no-untyped-def] min_value = cls.ge max_value = cls.le if cls.gt is not None: assert min_value is None, 'Set `gt` or `ge`, but not both' min_value = cls.gt + 1 if cls.lt is not None: assert max_value is None, 'Set `lt` or `le`, but not both' max_value = cls.lt - 1 if cls.multiple_of is None or cls.multiple_of == 1: return st.integers(min_value, max_value) # These adjustments and the .map handle integer-valued multiples, while the # .filter handles trickier cases as for confloat. if min_value is not None: min_value = math.ceil(Fraction(min_value) / Fraction(cls.multiple_of)) if max_value is not None: max_value = math.floor(Fraction(max_value) / Fraction(cls.multiple_of)) return st.integers(min_value, max_value).map(lambda x: x * cls.multiple_of) @resolves(pydantic.ConstrainedStr) def resolve_constr(cls): # type: ignore[no-untyped-def] # pragma: no cover min_size = cls.min_length or 0 max_size = cls.max_length if cls.regex is None and not cls.strip_whitespace: return st.text(min_size=min_size, max_size=max_size) if cls.regex is not None: strategy = st.from_regex(cls.regex) if cls.strip_whitespace: strategy = strategy.filter(lambda s: s == s.strip()) elif cls.strip_whitespace: repeats = '{{{},{}}}'.format( min_size - 2 if min_size > 2 else 0, max_size - 2 if (max_size or 0) > 2 else '', ) if min_size >= 2: strategy = st.from_regex(rf'\W.{repeats}\W') elif min_size == 1: strategy = st.from_regex(rf'\W(.{repeats}\W)?') else: assert min_size == 0 strategy = st.from_regex(rf'(\W(.{repeats}\W)?)?') if min_size == 0 and max_size is None: return strategy elif max_size is None: return strategy.filter(lambda s: min_size <= len(s)) return strategy.filter(lambda s: min_size <= len(s) <= max_size) # Finally, register all previously-defined types, and patch in our new function for typ in pydantic.types._DEFINED_TYPES: _registered(typ) pydantic.types._registered = _registered st.register_type_strategy(pydantic.Json, resolve_json)