""" These test the method maybe_promote from core/dtypes/cast.py """ import datetime import numpy as np import pytest from pandas._libs.tslibs import NaT from pandas.core.dtypes.cast import maybe_promote from pandas.core.dtypes.common import ( is_complex_dtype, is_datetime64_dtype, is_datetime_or_timedelta_dtype, is_float_dtype, is_integer_dtype, is_object_dtype, is_scalar, is_timedelta64_dtype, ) from pandas.core.dtypes.dtypes import DatetimeTZDtype from pandas.core.dtypes.missing import isna import pandas as pd @pytest.fixture( params=[ bool, "uint8", "int32", "uint64", "float32", "float64", "complex64", "complex128", "M8[ns]", "m8[ns]", str, bytes, object, ] ) def any_numpy_dtype_reduced(request): """ Parameterized fixture for numpy dtypes, reduced from any_numpy_dtype. * bool * 'int32' * 'uint64' * 'float32' * 'float64' * 'complex64' * 'complex128' * 'M8[ns]' * 'M8[ns]' * str * bytes * object """ return request.param def _check_promote(dtype, fill_value, expected_dtype, exp_val_for_scalar=None): """ Auxiliary function to unify testing of scalar/array promotion. Parameters ---------- dtype : dtype The value to pass on as the first argument to maybe_promote. fill_value : scalar The value to pass on as the second argument to maybe_promote as a scalar. expected_dtype : dtype The expected dtype returned by maybe_promote (by design this is the same regardless of whether fill_value was passed as a scalar or in an array!). exp_val_for_scalar : scalar The expected value for the (potentially upcast) fill_value returned by maybe_promote. """ assert is_scalar(fill_value) # here, we pass on fill_value as a scalar directly; the expected value # returned from maybe_promote is fill_value, potentially upcast to the # returned dtype. result_dtype, result_fill_value = maybe_promote(dtype, fill_value) expected_fill_value = exp_val_for_scalar assert result_dtype == expected_dtype _assert_match(result_fill_value, expected_fill_value) def _assert_match(result_fill_value, expected_fill_value): # GH#23982/25425 require the same type in addition to equality/NA-ness res_type = type(result_fill_value) ex_type = type(expected_fill_value) if hasattr(result_fill_value, "dtype"): # Compare types in a way that is robust to platform-specific # idiosyncracies where e.g. sometimes we get "ulonglong" as an alias # for "uint64" or "intc" as an alias for "int32" assert result_fill_value.dtype.kind == expected_fill_value.dtype.kind assert result_fill_value.dtype.itemsize == expected_fill_value.dtype.itemsize else: # On some builds, type comparison fails, e.g. np.int32 != np.int32 assert res_type == ex_type or res_type.__name__ == ex_type.__name__ match_value = result_fill_value == expected_fill_value # Note: type check above ensures that we have the _same_ NA value # for missing values, None == None (which is checked # through match_value above), but np.nan != np.nan and pd.NaT != pd.NaT match_missing = isna(result_fill_value) and isna(expected_fill_value) assert match_value or match_missing @pytest.mark.parametrize( "dtype, fill_value, expected_dtype", [ # size 8 ("int8", 1, "int8"), ("int8", np.iinfo("int8").max + 1, "int16"), ("int8", np.iinfo("int16").max + 1, "int32"), ("int8", np.iinfo("int32").max + 1, "int64"), ("int8", np.iinfo("int64").max + 1, "object"), ("int8", -1, "int8"), ("int8", np.iinfo("int8").min - 1, "int16"), ("int8", np.iinfo("int16").min - 1, "int32"), ("int8", np.iinfo("int32").min - 1, "int64"), ("int8", np.iinfo("int64").min - 1, "object"), # keep signed-ness as long as possible ("uint8", 1, "uint8"), ("uint8", np.iinfo("int8").max + 1, "uint8"), ("uint8", np.iinfo("uint8").max + 1, "uint16"), ("uint8", np.iinfo("int16").max + 1, "uint16"), ("uint8", np.iinfo("uint16").max + 1, "uint32"), ("uint8", np.iinfo("int32").max + 1, "uint32"), ("uint8", np.iinfo("uint32").max + 1, "uint64"), ("uint8", np.iinfo("int64").max + 1, "uint64"), ("uint8", np.iinfo("uint64").max + 1, "object"), # max of uint8 cannot be contained in int8 ("uint8", -1, "int16"), ("uint8", np.iinfo("int8").min - 1, "int16"), ("uint8", np.iinfo("int16").min - 1, "int32"), ("uint8", np.iinfo("int32").min - 1, "int64"), ("uint8", np.iinfo("int64").min - 1, "object"), # size 16 ("int16", 1, "int16"), ("int16", np.iinfo("int8").max + 1, "int16"), ("int16", np.iinfo("int16").max + 1, "int32"), ("int16", np.iinfo("int32").max + 1, "int64"), ("int16", np.iinfo("int64").max + 1, "object"), ("int16", -1, "int16"), ("int16", np.iinfo("int8").min - 1, "int16"), ("int16", np.iinfo("int16").min - 1, "int32"), ("int16", np.iinfo("int32").min - 1, "int64"), ("int16", np.iinfo("int64").min - 1, "object"), ("uint16", 1, "uint16"), ("uint16", np.iinfo("int8").max + 1, "uint16"), ("uint16", np.iinfo("uint8").max + 1, "uint16"), ("uint16", np.iinfo("int16").max + 1, "uint16"), ("uint16", np.iinfo("uint16").max + 1, "uint32"), ("uint16", np.iinfo("int32").max + 1, "uint32"), ("uint16", np.iinfo("uint32").max + 1, "uint64"), ("uint16", np.iinfo("int64").max + 1, "uint64"), ("uint16", np.iinfo("uint64").max + 1, "object"), ("uint16", -1, "int32"), ("uint16", np.iinfo("int8").min - 1, "int32"), ("uint16", np.iinfo("int16").min - 1, "int32"), ("uint16", np.iinfo("int32").min - 1, "int64"), ("uint16", np.iinfo("int64").min - 1, "object"), # size 32 ("int32", 1, "int32"), ("int32", np.iinfo("int8").max + 1, "int32"), ("int32", np.iinfo("int16").max + 1, "int32"), ("int32", np.iinfo("int32").max + 1, "int64"), ("int32", np.iinfo("int64").max + 1, "object"), ("int32", -1, "int32"), ("int32", np.iinfo("int8").min - 1, "int32"), ("int32", np.iinfo("int16").min - 1, "int32"), ("int32", np.iinfo("int32").min - 1, "int64"), ("int32", np.iinfo("int64").min - 1, "object"), ("uint32", 1, "uint32"), ("uint32", np.iinfo("int8").max + 1, "uint32"), ("uint32", np.iinfo("uint8").max + 1, "uint32"), ("uint32", np.iinfo("int16").max + 1, "uint32"), ("uint32", np.iinfo("uint16").max + 1, "uint32"), ("uint32", np.iinfo("int32").max + 1, "uint32"), ("uint32", np.iinfo("uint32").max + 1, "uint64"), ("uint32", np.iinfo("int64").max + 1, "uint64"), ("uint32", np.iinfo("uint64").max + 1, "object"), ("uint32", -1, "int64"), ("uint32", np.iinfo("int8").min - 1, "int64"), ("uint32", np.iinfo("int16").min - 1, "int64"), ("uint32", np.iinfo("int32").min - 1, "int64"), ("uint32", np.iinfo("int64").min - 1, "object"), # size 64 ("int64", 1, "int64"), ("int64", np.iinfo("int8").max + 1, "int64"), ("int64", np.iinfo("int16").max + 1, "int64"), ("int64", np.iinfo("int32").max + 1, "int64"), ("int64", np.iinfo("int64").max + 1, "object"), ("int64", -1, "int64"), ("int64", np.iinfo("int8").min - 1, "int64"), ("int64", np.iinfo("int16").min - 1, "int64"), ("int64", np.iinfo("int32").min - 1, "int64"), ("int64", np.iinfo("int64").min - 1, "object"), ("uint64", 1, "uint64"), ("uint64", np.iinfo("int8").max + 1, "uint64"), ("uint64", np.iinfo("uint8").max + 1, "uint64"), ("uint64", np.iinfo("int16").max + 1, "uint64"), ("uint64", np.iinfo("uint16").max + 1, "uint64"), ("uint64", np.iinfo("int32").max + 1, "uint64"), ("uint64", np.iinfo("uint32").max + 1, "uint64"), ("uint64", np.iinfo("int64").max + 1, "uint64"), ("uint64", np.iinfo("uint64").max + 1, "object"), ("uint64", -1, "object"), ("uint64", np.iinfo("int8").min - 1, "object"), ("uint64", np.iinfo("int16").min - 1, "object"), ("uint64", np.iinfo("int32").min - 1, "object"), ("uint64", np.iinfo("int64").min - 1, "object"), ], ) def test_maybe_promote_int_with_int(dtype, fill_value, expected_dtype): dtype = np.dtype(dtype) expected_dtype = np.dtype(expected_dtype) # output is not a generic int, but corresponds to expected_dtype exp_val_for_scalar = np.array([fill_value], dtype=expected_dtype)[0] _check_promote(dtype, fill_value, expected_dtype, exp_val_for_scalar) def test_maybe_promote_int_with_float(any_int_dtype, float_dtype): dtype = np.dtype(any_int_dtype) fill_dtype = np.dtype(float_dtype) # create array of given dtype; casts "1" to correct dtype fill_value = np.array([1], dtype=fill_dtype)[0] # filling int with float always upcasts to float64 expected_dtype = np.float64 # fill_value can be different float type exp_val_for_scalar = np.float64(fill_value) _check_promote(dtype, fill_value, expected_dtype, exp_val_for_scalar) def test_maybe_promote_float_with_int(float_dtype, any_int_dtype): dtype = np.dtype(float_dtype) fill_dtype = np.dtype(any_int_dtype) # create array of given dtype; casts "1" to correct dtype fill_value = np.array([1], dtype=fill_dtype)[0] # filling float with int always keeps float dtype # because: np.finfo('float32').max > np.iinfo('uint64').max expected_dtype = dtype # output is not a generic float, but corresponds to expected_dtype exp_val_for_scalar = np.array([fill_value], dtype=expected_dtype)[0] _check_promote(dtype, fill_value, expected_dtype, exp_val_for_scalar) @pytest.mark.parametrize( "dtype, fill_value, expected_dtype", [ # float filled with float ("float32", 1, "float32"), ("float32", np.finfo("float32").max * 1.1, "float64"), ("float64", 1, "float64"), ("float64", np.finfo("float32").max * 1.1, "float64"), # complex filled with float ("complex64", 1, "complex64"), ("complex64", np.finfo("float32").max * 1.1, "complex128"), ("complex128", 1, "complex128"), ("complex128", np.finfo("float32").max * 1.1, "complex128"), # float filled with complex ("float32", 1 + 1j, "complex64"), ("float32", np.finfo("float32").max * (1.1 + 1j), "complex128"), ("float64", 1 + 1j, "complex128"), ("float64", np.finfo("float32").max * (1.1 + 1j), "complex128"), # complex filled with complex ("complex64", 1 + 1j, "complex64"), ("complex64", np.finfo("float32").max * (1.1 + 1j), "complex128"), ("complex128", 1 + 1j, "complex128"), ("complex128", np.finfo("float32").max * (1.1 + 1j), "complex128"), ], ) def test_maybe_promote_float_with_float(dtype, fill_value, expected_dtype): dtype = np.dtype(dtype) expected_dtype = np.dtype(expected_dtype) # output is not a generic float, but corresponds to expected_dtype exp_val_for_scalar = np.array([fill_value], dtype=expected_dtype)[0] _check_promote(dtype, fill_value, expected_dtype, exp_val_for_scalar) def test_maybe_promote_bool_with_any(any_numpy_dtype_reduced): dtype = np.dtype(bool) fill_dtype = np.dtype(any_numpy_dtype_reduced) # create array of given dtype; casts "1" to correct dtype fill_value = np.array([1], dtype=fill_dtype)[0] # filling bool with anything but bool casts to object expected_dtype = np.dtype(object) if fill_dtype != bool else fill_dtype exp_val_for_scalar = fill_value _check_promote(dtype, fill_value, expected_dtype, exp_val_for_scalar) def test_maybe_promote_any_with_bool(any_numpy_dtype_reduced): dtype = np.dtype(any_numpy_dtype_reduced) fill_value = True # filling anything but bool with bool casts to object expected_dtype = np.dtype(object) if dtype != bool else dtype # output is not a generic bool, but corresponds to expected_dtype exp_val_for_scalar = np.array([fill_value], dtype=expected_dtype)[0] _check_promote(dtype, fill_value, expected_dtype, exp_val_for_scalar) def test_maybe_promote_bytes_with_any(bytes_dtype, any_numpy_dtype_reduced): dtype = np.dtype(bytes_dtype) fill_dtype = np.dtype(any_numpy_dtype_reduced) # create array of given dtype; casts "1" to correct dtype fill_value = np.array([1], dtype=fill_dtype)[0] # we never use bytes dtype internally, always promote to object expected_dtype = np.dtype(np.object_) exp_val_for_scalar = fill_value _check_promote(dtype, fill_value, expected_dtype, exp_val_for_scalar) def test_maybe_promote_any_with_bytes(any_numpy_dtype_reduced, bytes_dtype): dtype = np.dtype(any_numpy_dtype_reduced) # create array of given dtype fill_value = b"abc" # we never use bytes dtype internally, always promote to object expected_dtype = np.dtype(np.object_) # output is not a generic bytes, but corresponds to expected_dtype exp_val_for_scalar = np.array([fill_value], dtype=expected_dtype)[0] _check_promote(dtype, fill_value, expected_dtype, exp_val_for_scalar) def test_maybe_promote_datetime64_with_any(datetime64_dtype, any_numpy_dtype_reduced): dtype = np.dtype(datetime64_dtype) fill_dtype = np.dtype(any_numpy_dtype_reduced) # create array of given dtype; casts "1" to correct dtype fill_value = np.array([1], dtype=fill_dtype)[0] # filling datetime with anything but datetime casts to object if is_datetime64_dtype(fill_dtype): expected_dtype = dtype # for datetime dtypes, scalar values get cast to to_datetime64 exp_val_for_scalar = pd.Timestamp(fill_value).to_datetime64() else: expected_dtype = np.dtype(object) exp_val_for_scalar = fill_value _check_promote(dtype, fill_value, expected_dtype, exp_val_for_scalar) @pytest.mark.parametrize( "fill_value", [ pd.Timestamp("now"), np.datetime64("now"), datetime.datetime.now(), datetime.date.today(), ], ids=["pd.Timestamp", "np.datetime64", "datetime.datetime", "datetime.date"], ) def test_maybe_promote_any_with_datetime64( any_numpy_dtype_reduced, datetime64_dtype, fill_value ): dtype = np.dtype(any_numpy_dtype_reduced) # filling datetime with anything but datetime casts to object if is_datetime64_dtype(dtype): expected_dtype = dtype # for datetime dtypes, scalar values get cast to pd.Timestamp.value exp_val_for_scalar = pd.Timestamp(fill_value).to_datetime64() else: expected_dtype = np.dtype(object) exp_val_for_scalar = fill_value _check_promote(dtype, fill_value, expected_dtype, exp_val_for_scalar) def test_maybe_promote_datetimetz_with_any_numpy_dtype( tz_aware_fixture, any_numpy_dtype_reduced ): dtype = DatetimeTZDtype(tz=tz_aware_fixture) fill_dtype = np.dtype(any_numpy_dtype_reduced) # create array of given dtype; casts "1" to correct dtype fill_value = np.array([1], dtype=fill_dtype)[0] # filling datetimetz with any numpy dtype casts to object expected_dtype = np.dtype(object) exp_val_for_scalar = fill_value _check_promote(dtype, fill_value, expected_dtype, exp_val_for_scalar) def test_maybe_promote_datetimetz_with_datetimetz(tz_aware_fixture, tz_aware_fixture2): dtype = DatetimeTZDtype(tz=tz_aware_fixture) fill_dtype = DatetimeTZDtype(tz=tz_aware_fixture2) # create array of given dtype; casts "1" to correct dtype fill_value = pd.Series([10 ** 9], dtype=fill_dtype)[0] # filling datetimetz with datetimetz casts to object, unless tz matches exp_val_for_scalar = fill_value if dtype.tz == fill_dtype.tz: expected_dtype = dtype else: expected_dtype = np.dtype(object) _check_promote(dtype, fill_value, expected_dtype, exp_val_for_scalar) @pytest.mark.parametrize("fill_value", [None, np.nan, NaT]) def test_maybe_promote_datetimetz_with_na(tz_aware_fixture, fill_value): dtype = DatetimeTZDtype(tz=tz_aware_fixture) expected_dtype = dtype exp_val_for_scalar = NaT _check_promote(dtype, fill_value, expected_dtype, exp_val_for_scalar) @pytest.mark.parametrize( "fill_value", [ pd.Timestamp("now"), np.datetime64("now"), datetime.datetime.now(), datetime.date.today(), ], ids=["pd.Timestamp", "np.datetime64", "datetime.datetime", "datetime.date"], ) def test_maybe_promote_any_numpy_dtype_with_datetimetz( any_numpy_dtype_reduced, tz_aware_fixture, fill_value ): dtype = np.dtype(any_numpy_dtype_reduced) fill_dtype = DatetimeTZDtype(tz=tz_aware_fixture) fill_value = pd.Series([fill_value], dtype=fill_dtype)[0] # filling any numpy dtype with datetimetz casts to object expected_dtype = np.dtype(object) exp_val_for_scalar = fill_value _check_promote(dtype, fill_value, expected_dtype, exp_val_for_scalar) def test_maybe_promote_timedelta64_with_any(timedelta64_dtype, any_numpy_dtype_reduced): dtype = np.dtype(timedelta64_dtype) fill_dtype = np.dtype(any_numpy_dtype_reduced) # create array of given dtype; casts "1" to correct dtype fill_value = np.array([1], dtype=fill_dtype)[0] # filling timedelta with anything but timedelta casts to object if is_timedelta64_dtype(fill_dtype): expected_dtype = dtype # for timedelta dtypes, scalar values get cast to pd.Timedelta.value exp_val_for_scalar = pd.Timedelta(fill_value).to_timedelta64() else: expected_dtype = np.dtype(object) exp_val_for_scalar = fill_value _check_promote(dtype, fill_value, expected_dtype, exp_val_for_scalar) @pytest.mark.parametrize( "fill_value", [pd.Timedelta(days=1), np.timedelta64(24, "h"), datetime.timedelta(1)], ids=["pd.Timedelta", "np.timedelta64", "datetime.timedelta"], ) def test_maybe_promote_any_with_timedelta64( any_numpy_dtype_reduced, timedelta64_dtype, fill_value ): dtype = np.dtype(any_numpy_dtype_reduced) # filling anything but timedelta with timedelta casts to object if is_timedelta64_dtype(dtype): expected_dtype = dtype # for timedelta dtypes, scalar values get cast to pd.Timedelta.value exp_val_for_scalar = pd.Timedelta(fill_value).to_timedelta64() else: expected_dtype = np.dtype(object) exp_val_for_scalar = fill_value _check_promote(dtype, fill_value, expected_dtype, exp_val_for_scalar) def test_maybe_promote_string_with_any(string_dtype, any_numpy_dtype_reduced): dtype = np.dtype(string_dtype) fill_dtype = np.dtype(any_numpy_dtype_reduced) # create array of given dtype; casts "1" to correct dtype fill_value = np.array([1], dtype=fill_dtype)[0] # filling string with anything casts to object expected_dtype = np.dtype(object) exp_val_for_scalar = fill_value _check_promote(dtype, fill_value, expected_dtype, exp_val_for_scalar) def test_maybe_promote_any_with_string(any_numpy_dtype_reduced, string_dtype): dtype = np.dtype(any_numpy_dtype_reduced) # create array of given dtype fill_value = "abc" # filling anything with a string casts to object expected_dtype = np.dtype(object) exp_val_for_scalar = fill_value _check_promote(dtype, fill_value, expected_dtype, exp_val_for_scalar) def test_maybe_promote_object_with_any(object_dtype, any_numpy_dtype_reduced): dtype = np.dtype(object_dtype) fill_dtype = np.dtype(any_numpy_dtype_reduced) # create array of given dtype; casts "1" to correct dtype fill_value = np.array([1], dtype=fill_dtype)[0] # filling object with anything stays object expected_dtype = np.dtype(object) exp_val_for_scalar = fill_value _check_promote(dtype, fill_value, expected_dtype, exp_val_for_scalar) def test_maybe_promote_any_with_object(any_numpy_dtype_reduced, object_dtype): dtype = np.dtype(any_numpy_dtype_reduced) # create array of object dtype from a scalar value (i.e. passing # dtypes.common.is_scalar), which can however not be cast to int/float etc. fill_value = pd.DateOffset(1) # filling object with anything stays object expected_dtype = np.dtype(object) exp_val_for_scalar = fill_value _check_promote(dtype, fill_value, expected_dtype, exp_val_for_scalar) @pytest.mark.parametrize("fill_value", [None, np.nan, NaT]) def test_maybe_promote_any_numpy_dtype_with_na(any_numpy_dtype_reduced, fill_value): dtype = np.dtype(any_numpy_dtype_reduced) if is_integer_dtype(dtype) and fill_value is not NaT: # integer + other missing value (np.nan / None) casts to float expected_dtype = np.float64 exp_val_for_scalar = np.nan elif is_object_dtype(dtype) and fill_value is NaT: # inserting into object does not cast the value # but *does* cast None to np.nan expected_dtype = np.dtype(object) exp_val_for_scalar = fill_value elif is_datetime_or_timedelta_dtype(dtype): # datetime / timedelta cast all missing values to dtyped-NaT expected_dtype = dtype exp_val_for_scalar = dtype.type("NaT", "ns") elif fill_value is NaT: # NaT upcasts everything that's not datetime/timedelta to object expected_dtype = np.dtype(object) exp_val_for_scalar = NaT elif is_float_dtype(dtype) or is_complex_dtype(dtype): # float / complex + missing value (!= NaT) stays the same expected_dtype = dtype exp_val_for_scalar = np.nan else: # all other cases cast to object, and use np.nan as missing value expected_dtype = np.dtype(object) exp_val_for_scalar = np.nan _check_promote(dtype, fill_value, expected_dtype, exp_val_for_scalar) @pytest.mark.parametrize("dim", [0, 2, 3]) def test_maybe_promote_dimensions(any_numpy_dtype_reduced, dim): dtype = np.dtype(any_numpy_dtype_reduced) # create 0-dim array of given dtype; casts "1" to correct dtype fill_array = np.array(1, dtype=dtype) # expand to desired dimension: for _ in range(dim): fill_array = np.expand_dims(fill_array, 0) if dtype != object: # test against 1-dimensional case with pytest.raises(ValueError, match="fill_value must be a scalar"): maybe_promote(dtype, np.array([1], dtype=dtype)) with pytest.raises(ValueError, match="fill_value must be a scalar"): maybe_promote(dtype, fill_array) else: expected_dtype, expected_missing_value = maybe_promote( dtype, np.array([1], dtype=dtype) ) result_dtype, result_missing_value = maybe_promote(dtype, fill_array) assert result_dtype == expected_dtype _assert_match(result_missing_value, expected_missing_value)