# Copyright 2016 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Base classes for probability distributions.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import abc import contextlib import types import numpy as np import six from tensorflow.python.eager import context from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.framework import tensor_shape from tensorflow.python.framework import tensor_util from tensorflow.python.ops import array_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops.distributions import kullback_leibler from tensorflow.python.ops.distributions import util from tensorflow.python.util import deprecation from tensorflow.python.util import tf_inspect from tensorflow.python.util.tf_export import tf_export __all__ = [ "ReparameterizationType", "FULLY_REPARAMETERIZED", "NOT_REPARAMETERIZED", "Distribution", ] _DISTRIBUTION_PUBLIC_METHOD_WRAPPERS = [ "batch_shape", "batch_shape_tensor", "cdf", "covariance", "cross_entropy", "entropy", "event_shape", "event_shape_tensor", "kl_divergence", "log_cdf", "log_prob", "log_survival_function", "mean", "mode", "prob", "sample", "stddev", "survival_function", "variance", ] @six.add_metaclass(abc.ABCMeta) class _BaseDistribution(object): """Abstract base class needed for resolving subclass hierarchy.""" pass def _copy_fn(fn): """Create a deep copy of fn. Args: fn: a callable Returns: A `FunctionType`: a deep copy of fn. Raises: TypeError: if `fn` is not a callable. """ if not callable(fn): raise TypeError("fn is not callable: %s" % fn) # The blessed way to copy a function. copy.deepcopy fails to create a # non-reference copy. Since: # types.FunctionType == type(lambda: None), # and the docstring for the function type states: # # function(code, globals[, name[, argdefs[, closure]]]) # # Create a function object from a code object and a dictionary. # ... # # Here we can use this to create a new function with the old function's # code, globals, closure, etc. return types.FunctionType( code=fn.__code__, globals=fn.__globals__, name=fn.__name__, argdefs=fn.__defaults__, closure=fn.__closure__) def _update_docstring(old_str, append_str): """Update old_str by inserting append_str just before the "Args:" section.""" old_str = old_str or "" old_str_lines = old_str.split("\n") # Step 0: Prepend spaces to all lines of append_str. This is # necessary for correct markdown generation. append_str = "\n".join(" %s" % line for line in append_str.split("\n")) # Step 1: Find mention of "Args": has_args_ix = [ ix for ix, line in enumerate(old_str_lines) if line.strip().lower() == "args:"] if has_args_ix: final_args_ix = has_args_ix[-1] return ("\n".join(old_str_lines[:final_args_ix]) + "\n\n" + append_str + "\n\n" + "\n".join(old_str_lines[final_args_ix:])) else: return old_str + "\n\n" + append_str def _convert_to_tensor(value, name=None, preferred_dtype=None): """Converts to tensor avoiding an eager bug that loses float precision.""" # TODO(b/116672045): Remove this function. if (context.executing_eagerly() and preferred_dtype is not None and (preferred_dtype.is_integer or preferred_dtype.is_bool)): v = ops.convert_to_tensor(value, name=name) if v.dtype.is_floating: return v return ops.convert_to_tensor( value, name=name, preferred_dtype=preferred_dtype) class _DistributionMeta(abc.ABCMeta): def __new__(mcs, classname, baseclasses, attrs): """Control the creation of subclasses of the Distribution class. The main purpose of this method is to properly propagate docstrings from private Distribution methods, like `_log_prob`, into their public wrappers as inherited by the Distribution base class (e.g. `log_prob`). Args: classname: The name of the subclass being created. baseclasses: A tuple of parent classes. attrs: A dict mapping new attributes to their values. Returns: The class object. Raises: TypeError: If `Distribution` is not a subclass of `BaseDistribution`, or the new class is derived via multiple inheritance and the first parent class is not a subclass of `BaseDistribution`. AttributeError: If `Distribution` does not implement e.g. `log_prob`. ValueError: If a `Distribution` public method lacks a docstring. """ if not baseclasses: # Nothing to be done for Distribution raise TypeError("Expected non-empty baseclass. Does Distribution " "not subclass _BaseDistribution?") which_base = [ base for base in baseclasses if base == _BaseDistribution or issubclass(base, Distribution)] base = which_base[0] if base == _BaseDistribution: # Nothing to be done for Distribution return abc.ABCMeta.__new__(mcs, classname, baseclasses, attrs) if not issubclass(base, Distribution): raise TypeError("First parent class declared for %s must be " "Distribution, but saw '%s'" % (classname, base.__name__)) for attr in _DISTRIBUTION_PUBLIC_METHOD_WRAPPERS: special_attr = "_%s" % attr class_attr_value = attrs.get(attr, None) if attr in attrs: # The method is being overridden, do not update its docstring continue base_attr_value = getattr(base, attr, None) if not base_attr_value: raise AttributeError( "Internal error: expected base class '%s' to implement method '%s'" % (base.__name__, attr)) class_special_attr_value = attrs.get(special_attr, None) if class_special_attr_value is None: # No _special method available, no need to update the docstring. continue class_special_attr_docstring = tf_inspect.getdoc(class_special_attr_value) if not class_special_attr_docstring: # No docstring to append. continue class_attr_value = _copy_fn(base_attr_value) class_attr_docstring = tf_inspect.getdoc(base_attr_value) if class_attr_docstring is None: raise ValueError( "Expected base class fn to contain a docstring: %s.%s" % (base.__name__, attr)) class_attr_value.__doc__ = _update_docstring( class_attr_value.__doc__, ("Additional documentation from `%s`:\n\n%s" % (classname, class_special_attr_docstring))) attrs[attr] = class_attr_value return abc.ABCMeta.__new__(mcs, classname, baseclasses, attrs) @tf_export(v1=["distributions.ReparameterizationType"]) class ReparameterizationType(object): """Instances of this class represent how sampling is reparameterized. Two static instances exist in the distributions library, signifying one of two possible properties for samples from a distribution: `FULLY_REPARAMETERIZED`: Samples from the distribution are fully reparameterized, and straight-through gradients are supported. `NOT_REPARAMETERIZED`: Samples from the distribution are not fully reparameterized, and straight-through gradients are either partially unsupported or are not supported at all. In this case, for purposes of e.g. RL or variational inference, it is generally safest to wrap the sample results in a `stop_gradients` call and use policy gradients / surrogate loss instead. """ @deprecation.deprecated( "2019-01-01", "The TensorFlow Distributions library has moved to " "TensorFlow Probability " "(https://github.com/tensorflow/probability). You " "should update all references to use `tfp.distributions` " "instead of `tf.distributions`.", warn_once=True) def __init__(self, rep_type): self._rep_type = rep_type def __repr__(self): return "" % self._rep_type def __eq__(self, other): """Determine if this `ReparameterizationType` is equal to another. Since ReparameterizationType instances are constant static global instances, equality checks if two instances' id() values are equal. Args: other: Object to compare against. Returns: `self is other`. """ return self is other # Fully reparameterized distribution: samples from a fully # reparameterized distribution support straight-through gradients with # respect to all parameters. FULLY_REPARAMETERIZED = ReparameterizationType("FULLY_REPARAMETERIZED") tf_export(v1=["distributions.FULLY_REPARAMETERIZED"]).export_constant( __name__, "FULLY_REPARAMETERIZED") # Not reparameterized distribution: samples from a non- # reparameterized distribution do not support straight-through gradients for # at least some of the parameters. NOT_REPARAMETERIZED = ReparameterizationType("NOT_REPARAMETERIZED") tf_export(v1=["distributions.NOT_REPARAMETERIZED"]).export_constant( __name__, "NOT_REPARAMETERIZED") @six.add_metaclass(_DistributionMeta) @tf_export(v1=["distributions.Distribution"]) class Distribution(_BaseDistribution): """A generic probability distribution base class. `Distribution` is a base class for constructing and organizing properties (e.g., mean, variance) of random variables (e.g, Bernoulli, Gaussian). #### Subclassing Subclasses are expected to implement a leading-underscore version of the same-named function. The argument signature should be identical except for the omission of `name="..."`. For example, to enable `log_prob(value, name="log_prob")` a subclass should implement `_log_prob(value)`. Subclasses can append to public-level docstrings by providing docstrings for their method specializations. For example: ```python @util.AppendDocstring("Some other details.") def _log_prob(self, value): ... ``` would add the string "Some other details." to the `log_prob` function docstring. This is implemented as a simple decorator to avoid python linter complaining about missing Args/Returns/Raises sections in the partial docstrings. #### Broadcasting, batching, and shapes All distributions support batches of independent distributions of that type. The batch shape is determined by broadcasting together the parameters. The shape of arguments to `__init__`, `cdf`, `log_cdf`, `prob`, and `log_prob` reflect this broadcasting, as does the return value of `sample` and `sample_n`. `sample_n_shape = [n] + batch_shape + event_shape`, where `sample_n_shape` is the shape of the `Tensor` returned from `sample_n`, `n` is the number of samples, `batch_shape` defines how many independent distributions there are, and `event_shape` defines the shape of samples from each of those independent distributions. Samples are independent along the `batch_shape` dimensions, but not necessarily so along the `event_shape` dimensions (depending on the particulars of the underlying distribution). Using the `Uniform` distribution as an example: ```python minval = 3.0 maxval = [[4.0, 6.0], [10.0, 12.0]] # Broadcasting: # This instance represents 4 Uniform distributions. Each has a lower bound at # 3.0 as the `minval` parameter was broadcasted to match `maxval`'s shape. u = Uniform(minval, maxval) # `event_shape` is `TensorShape([])`. event_shape = u.event_shape # `event_shape_t` is a `Tensor` which will evaluate to []. event_shape_t = u.event_shape_tensor() # Sampling returns a sample per distribution. `samples` has shape # [5, 2, 2], which is [n] + batch_shape + event_shape, where n=5, # batch_shape=[2, 2], and event_shape=[]. samples = u.sample_n(5) # The broadcasting holds across methods. Here we use `cdf` as an example. The # same holds for `log_cdf` and the likelihood functions. # `cum_prob` has shape [2, 2] as the `value` argument was broadcasted to the # shape of the `Uniform` instance. cum_prob_broadcast = u.cdf(4.0) # `cum_prob`'s shape is [2, 2], one per distribution. No broadcasting # occurred. cum_prob_per_dist = u.cdf([[4.0, 5.0], [6.0, 7.0]]) # INVALID as the `value` argument is not broadcastable to the distribution's # shape. cum_prob_invalid = u.cdf([4.0, 5.0, 6.0]) ``` #### Shapes There are three important concepts associated with TensorFlow Distributions shapes: - Event shape describes the shape of a single draw from the distribution; it may be dependent across dimensions. For scalar distributions, the event shape is `[]`. For a 5-dimensional MultivariateNormal, the event shape is `[5]`. - Batch shape describes independent, not identically distributed draws, aka a "collection" or "bunch" of distributions. - Sample shape describes independent, identically distributed draws of batches from the distribution family. The event shape and the batch shape are properties of a Distribution object, whereas the sample shape is associated with a specific call to `sample` or `log_prob`. For detailed usage examples of TensorFlow Distributions shapes, see [this tutorial]( https://github.com/tensorflow/probability/blob/master/tensorflow_probability/examples/jupyter_notebooks/Understanding_TensorFlow_Distributions_Shapes.ipynb) #### Parameter values leading to undefined statistics or distributions. Some distributions do not have well-defined statistics for all initialization parameter values. For example, the beta distribution is parameterized by positive real numbers `concentration1` and `concentration0`, and does not have well-defined mode if `concentration1 < 1` or `concentration0 < 1`. The user is given the option of raising an exception or returning `NaN`. ```python a = tf.exp(tf.matmul(logits, weights_a)) b = tf.exp(tf.matmul(logits, weights_b)) # Will raise exception if ANY batch member has a < 1 or b < 1. dist = distributions.beta(a, b, allow_nan_stats=False) mode = dist.mode().eval() # Will return NaN for batch members with either a < 1 or b < 1. dist = distributions.beta(a, b, allow_nan_stats=True) # Default behavior mode = dist.mode().eval() ``` In all cases, an exception is raised if *invalid* parameters are passed, e.g. ```python # Will raise an exception if any Op is run. negative_a = -1.0 * a # beta distribution by definition has a > 0. dist = distributions.beta(negative_a, b, allow_nan_stats=True) dist.mean().eval() ``` """ @deprecation.deprecated( "2019-01-01", "The TensorFlow Distributions library has moved to " "TensorFlow Probability " "(https://github.com/tensorflow/probability). You " "should update all references to use `tfp.distributions` " "instead of `tf.distributions`.", warn_once=True) def __init__(self, dtype, reparameterization_type, validate_args, allow_nan_stats, parameters=None, graph_parents=None, name=None): """Constructs the `Distribution`. **This is a private method for subclass use.** Args: dtype: The type of the event samples. `None` implies no type-enforcement. reparameterization_type: Instance of `ReparameterizationType`. If `distributions.FULLY_REPARAMETERIZED`, this `Distribution` can be reparameterized in terms of some standard distribution with a function whose Jacobian is constant for the support of the standard distribution. If `distributions.NOT_REPARAMETERIZED`, then no such reparameterization is available. validate_args: Python `bool`, default `False`. When `True` distribution parameters are checked for validity despite possibly degrading runtime performance. When `False` invalid inputs may silently render incorrect outputs. allow_nan_stats: Python `bool`, default `True`. When `True`, statistics (e.g., mean, mode, variance) use the value "`NaN`" to indicate the result is undefined. When `False`, an exception is raised if one or more of the statistic's batch members are undefined. parameters: Python `dict` of parameters used to instantiate this `Distribution`. graph_parents: Python `list` of graph prerequisites of this `Distribution`. name: Python `str` name prefixed to Ops created by this class. Default: subclass name. Raises: ValueError: if any member of graph_parents is `None` or not a `Tensor`. """ graph_parents = [] if graph_parents is None else graph_parents for i, t in enumerate(graph_parents): if t is None or not tensor_util.is_tensor(t): raise ValueError("Graph parent item %d is not a Tensor; %s." % (i, t)) if not name or name[-1] != "/": # `name` is not a name scope non_unique_name = name or type(self).__name__ with ops.name_scope(non_unique_name) as name: pass self._dtype = dtype self._reparameterization_type = reparameterization_type self._allow_nan_stats = allow_nan_stats self._validate_args = validate_args self._parameters = parameters or {} self._graph_parents = graph_parents self._name = name @property def _parameters(self): return self._parameter_dict @_parameters.setter def _parameters(self, value): """Intercept assignments to self._parameters to avoid reference cycles. Parameters are often created using locals(), so we need to clean out any references to `self` before assigning it to an attribute. Args: value: A dictionary of parameters to assign to the `_parameters` property. """ if "self" in value: del value["self"] self._parameter_dict = value @classmethod def param_shapes(cls, sample_shape, name="DistributionParamShapes"): """Shapes of parameters given the desired shape of a call to `sample()`. This is a class method that describes what key/value arguments are required to instantiate the given `Distribution` so that a particular shape is returned for that instance's call to `sample()`. Subclasses should override class method `_param_shapes`. Args: sample_shape: `Tensor` or python list/tuple. Desired shape of a call to `sample()`. name: name to prepend ops with. Returns: `dict` of parameter name to `Tensor` shapes. """ with ops.name_scope(name, values=[sample_shape]): return cls._param_shapes(sample_shape) @classmethod def param_static_shapes(cls, sample_shape): """param_shapes with static (i.e. `TensorShape`) shapes. This is a class method that describes what key/value arguments are required to instantiate the given `Distribution` so that a particular shape is returned for that instance's call to `sample()`. Assumes that the sample's shape is known statically. Subclasses should override class method `_param_shapes` to return constant-valued tensors when constant values are fed. Args: sample_shape: `TensorShape` or python list/tuple. Desired shape of a call to `sample()`. Returns: `dict` of parameter name to `TensorShape`. Raises: ValueError: if `sample_shape` is a `TensorShape` and is not fully defined. """ if isinstance(sample_shape, tensor_shape.TensorShape): if not sample_shape.is_fully_defined(): raise ValueError("TensorShape sample_shape must be fully defined") sample_shape = sample_shape.as_list() params = cls.param_shapes(sample_shape) static_params = {} for name, shape in params.items(): static_shape = tensor_util.constant_value(shape) if static_shape is None: raise ValueError( "sample_shape must be a fully-defined TensorShape or list/tuple") static_params[name] = tensor_shape.TensorShape(static_shape) return static_params @staticmethod def _param_shapes(sample_shape): raise NotImplementedError("_param_shapes not implemented") @property def name(self): """Name prepended to all ops created by this `Distribution`.""" return self._name @property def dtype(self): """The `DType` of `Tensor`s handled by this `Distribution`.""" return self._dtype @property def parameters(self): """Dictionary of parameters used to instantiate this `Distribution`.""" # Remove "self", "__class__", or other special variables. These can appear # if the subclass used: # `parameters = dict(locals())`. return {k: v for k, v in self._parameters.items() if not k.startswith("__") and k != "self"} @property def reparameterization_type(self): """Describes how samples from the distribution are reparameterized. Currently this is one of the static instances `distributions.FULLY_REPARAMETERIZED` or `distributions.NOT_REPARAMETERIZED`. Returns: An instance of `ReparameterizationType`. """ return self._reparameterization_type @property def allow_nan_stats(self): """Python `bool` describing behavior when a stat is undefined. Stats return +/- infinity when it makes sense. E.g., the variance of a Cauchy distribution is infinity. However, sometimes the statistic is undefined, e.g., if a distribution's pdf does not achieve a maximum within the support of the distribution, the mode is undefined. If the mean is undefined, then by definition the variance is undefined. E.g. the mean for Student's T for df = 1 is undefined (no clear way to say it is either + or - infinity), so the variance = E[(X - mean)**2] is also undefined. Returns: allow_nan_stats: Python `bool`. """ return self._allow_nan_stats @property def validate_args(self): """Python `bool` indicating possibly expensive checks are enabled.""" return self._validate_args def copy(self, **override_parameters_kwargs): """Creates a deep copy of the distribution. Note: the copy distribution may continue to depend on the original initialization arguments. Args: **override_parameters_kwargs: String/value dictionary of initialization arguments to override with new values. Returns: distribution: A new instance of `type(self)` initialized from the union of self.parameters and override_parameters_kwargs, i.e., `dict(self.parameters, **override_parameters_kwargs)`. """ parameters = dict(self.parameters, **override_parameters_kwargs) return type(self)(**parameters) def _batch_shape_tensor(self): raise NotImplementedError( "batch_shape_tensor is not implemented: {}".format(type(self).__name__)) def batch_shape_tensor(self, name="batch_shape_tensor"): """Shape of a single sample from a single event index as a 1-D `Tensor`. The batch dimensions are indexes into independent, non-identical parameterizations of this distribution. Args: name: name to give to the op Returns: batch_shape: `Tensor`. """ with self._name_scope(name): if self.batch_shape.is_fully_defined(): return ops.convert_to_tensor(self.batch_shape.as_list(), dtype=dtypes.int32, name="batch_shape") return self._batch_shape_tensor() def _batch_shape(self): return tensor_shape.TensorShape(None) @property def batch_shape(self): """Shape of a single sample from a single event index as a `TensorShape`. May be partially defined or unknown. The batch dimensions are indexes into independent, non-identical parameterizations of this distribution. Returns: batch_shape: `TensorShape`, possibly unknown. """ return tensor_shape.as_shape(self._batch_shape()) def _event_shape_tensor(self): raise NotImplementedError( "event_shape_tensor is not implemented: {}".format(type(self).__name__)) def event_shape_tensor(self, name="event_shape_tensor"): """Shape of a single sample from a single batch as a 1-D int32 `Tensor`. Args: name: name to give to the op Returns: event_shape: `Tensor`. """ with self._name_scope(name): if self.event_shape.is_fully_defined(): return ops.convert_to_tensor(self.event_shape.as_list(), dtype=dtypes.int32, name="event_shape") return self._event_shape_tensor() def _event_shape(self): return tensor_shape.TensorShape(None) @property def event_shape(self): """Shape of a single sample from a single batch as a `TensorShape`. May be partially defined or unknown. Returns: event_shape: `TensorShape`, possibly unknown. """ return tensor_shape.as_shape(self._event_shape()) def is_scalar_event(self, name="is_scalar_event"): """Indicates that `event_shape == []`. Args: name: Python `str` prepended to names of ops created by this function. Returns: is_scalar_event: `bool` scalar `Tensor`. """ with self._name_scope(name): return ops.convert_to_tensor( self._is_scalar_helper(self.event_shape, self.event_shape_tensor), name="is_scalar_event") def is_scalar_batch(self, name="is_scalar_batch"): """Indicates that `batch_shape == []`. Args: name: Python `str` prepended to names of ops created by this function. Returns: is_scalar_batch: `bool` scalar `Tensor`. """ with self._name_scope(name): return ops.convert_to_tensor( self._is_scalar_helper(self.batch_shape, self.batch_shape_tensor), name="is_scalar_batch") def _sample_n(self, n, seed=None): raise NotImplementedError("sample_n is not implemented: {}".format( type(self).__name__)) def _call_sample_n(self, sample_shape, seed, name, **kwargs): with self._name_scope(name, values=[sample_shape]): sample_shape = ops.convert_to_tensor( sample_shape, dtype=dtypes.int32, name="sample_shape") sample_shape, n = self._expand_sample_shape_to_vector( sample_shape, "sample_shape") samples = self._sample_n(n, seed, **kwargs) batch_event_shape = array_ops.shape(samples)[1:] final_shape = array_ops.concat([sample_shape, batch_event_shape], 0) samples = array_ops.reshape(samples, final_shape) samples = self._set_sample_static_shape(samples, sample_shape) return samples def sample(self, sample_shape=(), seed=None, name="sample"): """Generate samples of the specified shape. Note that a call to `sample()` without arguments will generate a single sample. Args: sample_shape: 0D or 1D `int32` `Tensor`. Shape of the generated samples. seed: Python integer seed for RNG name: name to give to the op. Returns: samples: a `Tensor` with prepended dimensions `sample_shape`. """ return self._call_sample_n(sample_shape, seed, name) def _log_prob(self, value): raise NotImplementedError("log_prob is not implemented: {}".format( type(self).__name__)) def _call_log_prob(self, value, name, **kwargs): with self._name_scope(name, values=[value]): value = _convert_to_tensor( value, name="value", preferred_dtype=self.dtype) try: return self._log_prob(value, **kwargs) except NotImplementedError as original_exception: try: return math_ops.log(self._prob(value, **kwargs)) except NotImplementedError: raise original_exception def log_prob(self, value, name="log_prob"): """Log probability density/mass function. Args: value: `float` or `double` `Tensor`. name: Python `str` prepended to names of ops created by this function. Returns: log_prob: a `Tensor` of shape `sample_shape(x) + self.batch_shape` with values of type `self.dtype`. """ return self._call_log_prob(value, name) def _prob(self, value): raise NotImplementedError("prob is not implemented: {}".format( type(self).__name__)) def _call_prob(self, value, name, **kwargs): with self._name_scope(name, values=[value]): value = _convert_to_tensor( value, name="value", preferred_dtype=self.dtype) try: return self._prob(value, **kwargs) except NotImplementedError as original_exception: try: return math_ops.exp(self._log_prob(value, **kwargs)) except NotImplementedError: raise original_exception def prob(self, value, name="prob"): """Probability density/mass function. Args: value: `float` or `double` `Tensor`. name: Python `str` prepended to names of ops created by this function. Returns: prob: a `Tensor` of shape `sample_shape(x) + self.batch_shape` with values of type `self.dtype`. """ return self._call_prob(value, name) def _log_cdf(self, value): raise NotImplementedError("log_cdf is not implemented: {}".format( type(self).__name__)) def _call_log_cdf(self, value, name, **kwargs): with self._name_scope(name, values=[value]): value = _convert_to_tensor( value, name="value", preferred_dtype=self.dtype) try: return self._log_cdf(value, **kwargs) except NotImplementedError as original_exception: try: return math_ops.log(self._cdf(value, **kwargs)) except NotImplementedError: raise original_exception def log_cdf(self, value, name="log_cdf"): """Log cumulative distribution function. Given random variable `X`, the cumulative distribution function `cdf` is: ```none log_cdf(x) := Log[ P[X <= x] ] ``` Often, a numerical approximation can be used for `log_cdf(x)` that yields a more accurate answer than simply taking the logarithm of the `cdf` when `x << -1`. Args: value: `float` or `double` `Tensor`. name: Python `str` prepended to names of ops created by this function. Returns: logcdf: a `Tensor` of shape `sample_shape(x) + self.batch_shape` with values of type `self.dtype`. """ return self._call_log_cdf(value, name) def _cdf(self, value): raise NotImplementedError("cdf is not implemented: {}".format( type(self).__name__)) def _call_cdf(self, value, name, **kwargs): with self._name_scope(name, values=[value]): value = _convert_to_tensor( value, name="value", preferred_dtype=self.dtype) try: return self._cdf(value, **kwargs) except NotImplementedError as original_exception: try: return math_ops.exp(self._log_cdf(value, **kwargs)) except NotImplementedError: raise original_exception def cdf(self, value, name="cdf"): """Cumulative distribution function. Given random variable `X`, the cumulative distribution function `cdf` is: ```none cdf(x) := P[X <= x] ``` Args: value: `float` or `double` `Tensor`. name: Python `str` prepended to names of ops created by this function. Returns: cdf: a `Tensor` of shape `sample_shape(x) + self.batch_shape` with values of type `self.dtype`. """ return self._call_cdf(value, name) def _log_survival_function(self, value): raise NotImplementedError( "log_survival_function is not implemented: {}".format( type(self).__name__)) def _call_log_survival_function(self, value, name, **kwargs): with self._name_scope(name, values=[value]): value = _convert_to_tensor( value, name="value", preferred_dtype=self.dtype) try: return self._log_survival_function(value, **kwargs) except NotImplementedError as original_exception: try: return math_ops.log1p(-self.cdf(value, **kwargs)) except NotImplementedError: raise original_exception def log_survival_function(self, value, name="log_survival_function"): """Log survival function. Given random variable `X`, the survival function is defined: ```none log_survival_function(x) = Log[ P[X > x] ] = Log[ 1 - P[X <= x] ] = Log[ 1 - cdf(x) ] ``` Typically, different numerical approximations can be used for the log survival function, which are more accurate than `1 - cdf(x)` when `x >> 1`. Args: value: `float` or `double` `Tensor`. name: Python `str` prepended to names of ops created by this function. Returns: `Tensor` of shape `sample_shape(x) + self.batch_shape` with values of type `self.dtype`. """ return self._call_log_survival_function(value, name) def _survival_function(self, value): raise NotImplementedError("survival_function is not implemented: {}".format( type(self).__name__)) def _call_survival_function(self, value, name, **kwargs): with self._name_scope(name, values=[value]): value = _convert_to_tensor( value, name="value", preferred_dtype=self.dtype) try: return self._survival_function(value, **kwargs) except NotImplementedError as original_exception: try: return 1. - self.cdf(value, **kwargs) except NotImplementedError: raise original_exception def survival_function(self, value, name="survival_function"): """Survival function. Given random variable `X`, the survival function is defined: ```none survival_function(x) = P[X > x] = 1 - P[X <= x] = 1 - cdf(x). ``` Args: value: `float` or `double` `Tensor`. name: Python `str` prepended to names of ops created by this function. Returns: `Tensor` of shape `sample_shape(x) + self.batch_shape` with values of type `self.dtype`. """ return self._call_survival_function(value, name) def _entropy(self): raise NotImplementedError("entropy is not implemented: {}".format( type(self).__name__)) def entropy(self, name="entropy"): """Shannon entropy in nats.""" with self._name_scope(name): return self._entropy() def _mean(self): raise NotImplementedError("mean is not implemented: {}".format( type(self).__name__)) def mean(self, name="mean"): """Mean.""" with self._name_scope(name): return self._mean() def _quantile(self, value): raise NotImplementedError("quantile is not implemented: {}".format( type(self).__name__)) def _call_quantile(self, value, name, **kwargs): with self._name_scope(name, values=[value]): value = _convert_to_tensor( value, name="value", preferred_dtype=self.dtype) return self._quantile(value, **kwargs) def quantile(self, value, name="quantile"): """Quantile function. Aka "inverse cdf" or "percent point function". Given random variable `X` and `p in [0, 1]`, the `quantile` is: ```none quantile(p) := x such that P[X <= x] == p ``` Args: value: `float` or `double` `Tensor`. name: Python `str` prepended to names of ops created by this function. Returns: quantile: a `Tensor` of shape `sample_shape(x) + self.batch_shape` with values of type `self.dtype`. """ return self._call_quantile(value, name) def _variance(self): raise NotImplementedError("variance is not implemented: {}".format( type(self).__name__)) def variance(self, name="variance"): """Variance. Variance is defined as, ```none Var = E[(X - E[X])**2] ``` where `X` is the random variable associated with this distribution, `E` denotes expectation, and `Var.shape = batch_shape + event_shape`. Args: name: Python `str` prepended to names of ops created by this function. Returns: variance: Floating-point `Tensor` with shape identical to `batch_shape + event_shape`, i.e., the same shape as `self.mean()`. """ with self._name_scope(name): try: return self._variance() except NotImplementedError as original_exception: try: return math_ops.square(self._stddev()) except NotImplementedError: raise original_exception def _stddev(self): raise NotImplementedError("stddev is not implemented: {}".format( type(self).__name__)) def stddev(self, name="stddev"): """Standard deviation. Standard deviation is defined as, ```none stddev = E[(X - E[X])**2]**0.5 ``` where `X` is the random variable associated with this distribution, `E` denotes expectation, and `stddev.shape = batch_shape + event_shape`. Args: name: Python `str` prepended to names of ops created by this function. Returns: stddev: Floating-point `Tensor` with shape identical to `batch_shape + event_shape`, i.e., the same shape as `self.mean()`. """ with self._name_scope(name): try: return self._stddev() except NotImplementedError as original_exception: try: return math_ops.sqrt(self._variance()) except NotImplementedError: raise original_exception def _covariance(self): raise NotImplementedError("covariance is not implemented: {}".format( type(self).__name__)) def covariance(self, name="covariance"): """Covariance. Covariance is (possibly) defined only for non-scalar-event distributions. For example, for a length-`k`, vector-valued distribution, it is calculated as, ```none Cov[i, j] = Covariance(X_i, X_j) = E[(X_i - E[X_i]) (X_j - E[X_j])] ``` where `Cov` is a (batch of) `k x k` matrix, `0 <= (i, j) < k`, and `E` denotes expectation. Alternatively, for non-vector, multivariate distributions (e.g., matrix-valued, Wishart), `Covariance` shall return a (batch of) matrices under some vectorization of the events, i.e., ```none Cov[i, j] = Covariance(Vec(X)_i, Vec(X)_j) = [as above] ``` where `Cov` is a (batch of) `k' x k'` matrices, `0 <= (i, j) < k' = reduce_prod(event_shape)`, and `Vec` is some function mapping indices of this distribution's event dimensions to indices of a length-`k'` vector. Args: name: Python `str` prepended to names of ops created by this function. Returns: covariance: Floating-point `Tensor` with shape `[B1, ..., Bn, k', k']` where the first `n` dimensions are batch coordinates and `k' = reduce_prod(self.event_shape)`. """ with self._name_scope(name): return self._covariance() def _mode(self): raise NotImplementedError("mode is not implemented: {}".format( type(self).__name__)) def mode(self, name="mode"): """Mode.""" with self._name_scope(name): return self._mode() def _cross_entropy(self, other): return kullback_leibler.cross_entropy( self, other, allow_nan_stats=self.allow_nan_stats) def cross_entropy(self, other, name="cross_entropy"): """Computes the (Shannon) cross entropy. Denote this distribution (`self`) by `P` and the `other` distribution by `Q`. Assuming `P, Q` are absolutely continuous with respect to one another and permit densities `p(x) dr(x)` and `q(x) dr(x)`, (Shanon) cross entropy is defined as: ```none H[P, Q] = E_p[-log q(X)] = -int_F p(x) log q(x) dr(x) ``` where `F` denotes the support of the random variable `X ~ P`. Args: other: `tfp.distributions.Distribution` instance. name: Python `str` prepended to names of ops created by this function. Returns: cross_entropy: `self.dtype` `Tensor` with shape `[B1, ..., Bn]` representing `n` different calculations of (Shanon) cross entropy. """ with self._name_scope(name): return self._cross_entropy(other) def _kl_divergence(self, other): return kullback_leibler.kl_divergence( self, other, allow_nan_stats=self.allow_nan_stats) def kl_divergence(self, other, name="kl_divergence"): """Computes the Kullback--Leibler divergence. Denote this distribution (`self`) by `p` and the `other` distribution by `q`. Assuming `p, q` are absolutely continuous with respect to reference measure `r`, the KL divergence is defined as: ```none KL[p, q] = E_p[log(p(X)/q(X))] = -int_F p(x) log q(x) dr(x) + int_F p(x) log p(x) dr(x) = H[p, q] - H[p] ``` where `F` denotes the support of the random variable `X ~ p`, `H[., .]` denotes (Shanon) cross entropy, and `H[.]` denotes (Shanon) entropy. Args: other: `tfp.distributions.Distribution` instance. name: Python `str` prepended to names of ops created by this function. Returns: kl_divergence: `self.dtype` `Tensor` with shape `[B1, ..., Bn]` representing `n` different calculations of the Kullback-Leibler divergence. """ with self._name_scope(name): return self._kl_divergence(other) def __str__(self): return ("tfp.distributions.{type_name}(" "\"{self_name}\"" "{maybe_batch_shape}" "{maybe_event_shape}" ", dtype={dtype})".format( type_name=type(self).__name__, self_name=self.name, maybe_batch_shape=(", batch_shape={}".format(self.batch_shape) if self.batch_shape.ndims is not None else ""), maybe_event_shape=(", event_shape={}".format(self.event_shape) if self.event_shape.ndims is not None else ""), dtype=self.dtype.name)) def __repr__(self): return ("".format( type_name=type(self).__name__, self_name=self.name, batch_shape=self.batch_shape, event_shape=self.event_shape, dtype=self.dtype.name)) @contextlib.contextmanager def _name_scope(self, name=None, values=None): """Helper function to standardize op scope.""" with ops.name_scope(self.name): with ops.name_scope(name, values=( ([] if values is None else values) + self._graph_parents)) as scope: yield scope def _expand_sample_shape_to_vector(self, x, name): """Helper to `sample` which ensures input is 1D.""" x_static_val = tensor_util.constant_value(x) if x_static_val is None: prod = math_ops.reduce_prod(x) else: prod = np.prod(x_static_val, dtype=x.dtype.as_numpy_dtype()) ndims = x.get_shape().ndims # != sample_ndims if ndims is None: # Maybe expand_dims. ndims = array_ops.rank(x) expanded_shape = util.pick_vector( math_ops.equal(ndims, 0), np.array([1], dtype=np.int32), array_ops.shape(x)) x = array_ops.reshape(x, expanded_shape) elif ndims == 0: # Definitely expand_dims. if x_static_val is not None: x = ops.convert_to_tensor( np.array([x_static_val], dtype=x.dtype.as_numpy_dtype()), name=name) else: x = array_ops.reshape(x, [1]) elif ndims != 1: raise ValueError("Input is neither scalar nor vector.") return x, prod def _set_sample_static_shape(self, x, sample_shape): """Helper to `sample`; sets static shape info.""" # Set shape hints. sample_shape = tensor_shape.TensorShape( tensor_util.constant_value(sample_shape)) ndims = x.get_shape().ndims sample_ndims = sample_shape.ndims batch_ndims = self.batch_shape.ndims event_ndims = self.event_shape.ndims # Infer rank(x). if (ndims is None and sample_ndims is not None and batch_ndims is not None and event_ndims is not None): ndims = sample_ndims + batch_ndims + event_ndims x.set_shape([None] * ndims) # Infer sample shape. if ndims is not None and sample_ndims is not None: shape = sample_shape.concatenate([None]*(ndims - sample_ndims)) x.set_shape(x.get_shape().merge_with(shape)) # Infer event shape. if ndims is not None and event_ndims is not None: shape = tensor_shape.TensorShape( [None]*(ndims - event_ndims)).concatenate(self.event_shape) x.set_shape(x.get_shape().merge_with(shape)) # Infer batch shape. if batch_ndims is not None: if ndims is not None: if sample_ndims is None and event_ndims is not None: sample_ndims = ndims - batch_ndims - event_ndims elif event_ndims is None and sample_ndims is not None: event_ndims = ndims - batch_ndims - sample_ndims if sample_ndims is not None and event_ndims is not None: shape = tensor_shape.TensorShape([None]*sample_ndims).concatenate( self.batch_shape).concatenate([None]*event_ndims) x.set_shape(x.get_shape().merge_with(shape)) return x def _is_scalar_helper(self, static_shape, dynamic_shape_fn): """Implementation for `is_scalar_batch` and `is_scalar_event`.""" if static_shape.ndims is not None: return static_shape.ndims == 0 shape = dynamic_shape_fn() if (shape.get_shape().ndims is not None and shape.get_shape().dims[0].value is not None): # If the static_shape is correctly written then we should never execute # this branch. We keep it just in case there's some unimagined corner # case. return shape.get_shape().as_list() == [0] return math_ops.equal(array_ops.shape(shape)[0], 0)