# Copyright 2015 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Gradient checker for functions. The gradient checker verifies numerically that an function properly computes the gradients """ from __future__ import absolute_import from __future__ import division from __future__ import print_function import numpy as np from tensorflow.python.eager import backprop from tensorflow.python.eager import context from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.ops import array_ops from tensorflow.python.platform import tf_logging as logging from tensorflow.python.util.tf_export import tf_export def _product(t): if isinstance(t, int): return t else: y = 1 for x in t: y *= x return y def _eval_indexed_slices(a): """Converts IndexedSlices to IndexedSlicesValue with numpy indices/values. When eager execution is enabled, converts IndexedSlices to IndexedSlicesValue with numpy indices/values. Args: a: any value. Returns: If a is IndexedSlices and eager execution is enabled, calls numpy() on a's fields. Otherwise returns a unchanged. """ if isinstance(a, ops.IndexedSlices) and context.executing_eagerly(): return ops.IndexedSlicesValue( indices=[x.numpy() for x in a.indices], values=[x.numpy() for x in a.values], dense_shape=a.dense_shape) return a def _to_numpy(a): """Converts Tensors, EagerTensors, and IndexedSlicesValue to numpy arrays. Args: a: any value. Returns: If a is EagerTensor or Tensor, returns the evaluation of a by calling numpy() or run(). If a is IndexedSlicesValue, constructs the corresponding dense numpy array. Otherwise returns a unchanged. """ if isinstance(a, ops.EagerTensor): return a.numpy() if isinstance(a, ops.Tensor): sess = ops.get_default_session() return sess.run(a) if isinstance(a, ops.IndexedSlicesValue): arr = np.zeros(a.dense_shape) assert len(a.values) == len(a.indices), ( "IndexedSlicesValue has %s value slices but %s indices\n%s" % (a.values, a.indices, a)) for values_slice, index in zip(a.values, a.indices): assert 0 <= index < len(arr), ( "IndexedSlicesValue has invalid index %s\n%s" % (index, a)) arr[index] += values_slice return arr return a def _prepare(f, xs_dtypes, xs_shapes): """Return a function that executes 'f'. In TF 2.x, this is the same as `f`. In TF 1.x, returns a Python function that executes the graph defined by `f` in a Session. Args: f: the function. xs_dtypes: dtypes of f's arguments. xs_shapes: shapes of f's arguments. Returns: """ if context.executing_eagerly(): def decorated_eager(*xs_data): return f(*map(ops.convert_to_tensor, xs_data)) return decorated_eager xs = [ array_ops.placeholder(x_dtype, shape=x_shape) for x_dtype, x_shape in zip(xs_dtypes, xs_shapes) ] y = f(*xs) sess = ops.get_default_session() def decorated_graph(*xs_data): xs_data = [_to_numpy(a) for a in xs_data] return sess.run(y, feed_dict=dict(zip(xs, xs_data))) return decorated_graph def _compute_theoretical_jacobian(f, y_shape, y_dtype, xs, param): """Computes the theoretical Jacobian for f regarding xs[param]. One can think of the relation among f, xs and y as y = f(xs). Args: f: the function. y_shape: the shape of the result. y_dtype: the dtype of the result. xs: a list of tensors. param: the index of the target parameter. Returns: A 2-d numpy array representing the Jacobian. It has "y_size" rows and "x_size" columns where "x_size" is the number of elements in xs[param] and "y_size" is the number of elements in the result. Raises: ValueError: If result is empty but the gradient is nonzero. """ x = xs[param] # Complex vectors are treated as vectors of twice as many reals. x_shape = tuple(x.shape) + (2,) if x.dtype.is_complex else x.shape y_factor = 2 if y_dtype.is_complex else 1 # To compute the jacobian, we treat x and y as one-dimensional vectors. x_size = _product(x_shape) x_val_size = _product(x_shape[1:]) # This is used for sparse gradients y_size = _product(y_shape) * y_factor # Allocate 2-D Jacobian, with y dimensions smashed into the first # dimension and x dimensions smashed into the second. jacobian = np.zeros((y_size, x_size), dtype=x.dtype.real_dtype.as_numpy_dtype) # For each of the entry of dy, we set this to be 1 and # everything else to be 0 and compute the gradients -- this will give us one # row of the Jacobian matrix. dy_data = np.zeros(y_shape, dtype=y_dtype.as_numpy_dtype) dy_data_flat = dy_data.ravel().view(y_dtype.real_dtype.as_numpy_dtype) grad_fn_unprep = backprop.gradients_function(f, [param]) grad_fn = _prepare(lambda dy, *xs: grad_fn_unprep(*xs, dy=dy), [y_dtype] + [z.dtype for z in xs], [None] + [z.shape for z in xs]) for row in range(y_size): dy_data_flat[row] = 1 grad = _to_numpy(grad_fn(dy_data, *xs)[0]) grad = _eval_indexed_slices(grad) if isinstance(grad, ops.IndexedSlicesValue): for i, v in zip(grad.indices, grad.values): c_begin = i * x_val_size c_end = c_begin + x_val_size jacobian[row, c_begin:c_end] += v.flat elif grad is not None: jacobian[row, :] = grad.ravel().view(jacobian.dtype) # This reset of `dy_data_flat` needs to happen after `grad` is copied to # `jacobian` because `grad` and `dy_data_flat` may share memory. dy_data_flat[row] = 0 # If the output is empty, run the gradients at least once and make sure # they produce zeros. if y_size == 0: # don't use 'not y_size', because y_size may not be an int grad = _to_numpy(grad_fn(dy_data, *xs)[0]) if grad.shape != x.shape: raise ValueError("Empty gradient has wrong shape: expected %s, got %s" % (x.shape, grad.shape)) if np.any(grad): raise ValueError("Empty tensor with nonzero gradients") logging.vlog(1, "Theoretical Jacobian =\n%s", jacobian) return jacobian def _compute_numeric_jacobian(f, y_size, y_dtype, xs, param, delta): """Computes the numeric Jacobian for f regarding xs[param]. One can think of the relation among f, xs and y as y = f(xs). Args: f: the function. y_size: the number of elements of the result. y_dtype: the dtype of the result. xs: a list of tensors. param: the index of the target parameter. delta: the amount of perturbation we give to the input. Returns: A 2-d numpy array representing the Jacobian. It has "y_size" rows and "x_size" columns where "x_size" is the number of elements in xs[param] and "y_size" is the number of elements in the result. """ x_shape = xs[param].shape x_dtype = xs[param].dtype # To compute the jacobian, we treat x and y as one-dimensional vectors x_size = _product(x_shape) * (2 if x_dtype.is_complex else 1) y_size = y_size * (2 if y_dtype.is_complex else 1) x_dtype = x_dtype.real_dtype.as_numpy_dtype y_dtype = y_dtype.real_dtype.as_numpy_dtype xs_dtypes = [x.dtype for x in xs] xs_shapes = [x.shape for x in xs] # Converts xs to numpy arrays to do in-place perturbation. # Calls asarray() to avoid copying in ravel() later. xs = [np.asarray(_to_numpy(x)) for x in xs] x = xs[param] # Make sure we have the right types scale = np.asarray(2 * delta, dtype=y_dtype)[()] jacobian = np.zeros((y_size, x_size), dtype=x_dtype) # For each of the entry of x, we slightly perturbs this by adding and # subtracting a delta and then compute difference between the outputs. This # will give us one column of the Jacobian matrix. f = _prepare(f, xs_dtypes, xs_shapes) for col in range(x_size): original = x.ravel().view(x_dtype)[col] x.ravel().view(x_dtype)[col] += delta y_pos = _to_numpy(f(*xs)) x.ravel().view(x_dtype)[col] = original x.ravel().view(x_dtype)[col] -= delta y_neg = _to_numpy(f(*xs)) x.ravel().view(x_dtype)[col] = original diff = (y_pos - y_neg) / scale jacobian[:, col] = diff.ravel().view(y_dtype) logging.vlog(1, "Numeric Jacobian =\n%s", jacobian) return jacobian def _compute_gradient(f, y_shape, y_dtype, xs, param, delta): """Computes the theoretical and numerical jacobian.""" x = xs[param] t = x.dtype allowed_types = [ dtypes.float16, dtypes.bfloat16, dtypes.float32, dtypes.float64, dtypes.complex64, dtypes.complex128 ] assert t.base_dtype in allowed_types, ("Cannot compute gradient for " "unsupported type %s of argument %s" % (t.name, param)) t2 = y_dtype assert t2.base_dtype in allowed_types, ("Cannot compute gradient for " "unsupported type %s of y" % t2.name) y_size = _product(y_shape) jacob_t = _compute_theoretical_jacobian(f, y_shape, y_dtype, xs, param) jacob_n = _compute_numeric_jacobian(f, y_size, y_dtype, xs, param, delta) return jacob_t, jacob_n def _compute_gradient_list(f, xs, delta): """Compute gradients for a list of x values.""" # convert xs to tensors so that dtype and shape have uniform types xs = [ops.convert_to_tensor(x) for x in xs] # run the function to get info of the result xs_dtypes = [x.dtype for x in xs] xs_shapes = [x.shape for x in xs] f_temp = _prepare(f, xs_dtypes, xs_shapes) y = f_temp(*xs) return tuple(zip(*[ _compute_gradient(f, y.shape, dtypes.as_dtype(y.dtype), xs, i, delta) for i in range(len(xs)) ])) @tf_export("test.compute_gradient", v1=[]) def compute_gradient(f, x, delta=1e-3): """Computes the theoretical and numeric Jacobian of `f`. With y = f(x), computes the theoretical and numeric Jacobian dy/dx. Args: f: the function. x: the arguments for the function as a list or tuple of values convertible to a Tensor. delta: (optional) perturbation used to compute numeric Jacobian. Returns: A pair of lists, where the first is a list of 2-d numpy arrays representing the theoretical Jacobians for each argument, and the second list is the numerical ones. Each 2-d array has "y_size" rows and "x_size" columns where "x_size" is the number of elements in the corresponding argument and "y_size" is the number of elements in f(x). Raises: ValueError: If result is empty but the gradient is nonzero. ValueError: If x is not list, but any other type. Example: ```python @tf.function def test_func(x): return x*x theoretical, numerical = tf.test.compute_gradient(test_func, [1.0]) theoretical, numerical # ((array([[2.]], dtype=float32),), (array([[2.000004]], dtype=float32),)) ``` """ if not isinstance(x, (list, tuple)): raise ValueError( "`x` must be a list or tuple of values convertible to a Tensor " "(arguments to `f`), not a %s" % type(x)) return _compute_gradient_list(f, x, delta) def max_error(grad1, grad2): """Computes maximum elementwise gap. Computes the maximum elementwise gap between two lists of tensors of the same shape. Args: grad1: a lists of tensors. grad2: a lists of tensors with the same shape as grad1. Returns: The maximum elementwise gap between the two. """ error = 0 for j_t, j_n in zip(grad1, grad2): if j_t.size or j_n.size: # Handle zero size tensors correctly error = np.maximum(error, np.fabs(j_t - j_n).max()) return error