# Copyright 2015 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Initializers for TF 2.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import math import numpy as np from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.ops import array_ops from tensorflow.python.ops import gen_linalg_ops from tensorflow.python.ops import linalg_ops_impl from tensorflow.python.ops import math_ops from tensorflow.python.ops import random_ops from tensorflow.python.ops import stateless_random_ops from tensorflow.python.ops.init_ops import _compute_fans from tensorflow.python.util.tf_export import tf_export _PARTITION_SHAPE = "partition_shape" _PARTITION_OFFSET = "partition_offset" class Initializer(object): """Initializer base class: all initializers inherit from this class. Initializers should implement a `__call__` method with the following signature: ```python def __call__(self, shape, dtype=None, **kwargs): # returns a tensor of shape `shape` and dtype `dtype` # containing values drawn from a distribution of your choice. ``` """ def __call__(self, shape, dtype=None, **kwargs): """Returns a tensor object initialized as specified by the initializer. Args: shape: Shape of the tensor. dtype: Optional dtype of the tensor. If not provided will return tensor of `tf.float32`. **kwargs: Additional keyword arguments. Accepted values: `partition_shape` and `partition_offset`. Used when creating a single partition in a partitioned variable. `partition_shape` is the shape of the partition (i.e. the shape of the returned tensor) and `partition_offset` is a tuple of `int` specifying the offset of this partition w.r.t each axis. For example, a tensor of shape `(30, 100)` can be partitioned into two partitions: `p0` of shape `(10, 100)` and `p1` of shape `(20, 100)`; if the initializer is called with `partition_shape=(20, 100)` and `partition_offset=(10, 0)`, it should return the value for `p1`. """ raise NotImplementedError def get_config(self): """Returns the configuration of the initializer as a JSON-serializable dict. Returns: A JSON-serializable Python dict. """ return {} @classmethod def from_config(cls, config): """Instantiates an initializer from a configuration dictionary. Example: ```python initializer = RandomUniform(-1, 1) config = initializer.get_config() initializer = RandomUniform.from_config(config) ``` Args: config: A Python dictionary. It will typically be the output of `get_config`. Returns: An Initializer instance. """ config.pop("dtype", None) return cls(**config) def _validate_kwargs(self, kwargs, support_partition=True): for kwarg in kwargs: if kwarg not in [_PARTITION_SHAPE, _PARTITION_OFFSET]: raise TypeError("Unknown keyword arguments: %s" % kwarg) elif not support_partition: raise ValueError("%s initializer doesn't support partition-related" " arguments" % self.__class__.__name__) @tf_export("zeros_initializer", v1=[]) class Zeros(Initializer): """Initializer that generates tensors initialized to 0. Initializers allow you to pre-specify an initialization strategy, encoded in the Initializer object, without knowing the shape and dtype of the variable being initialized. Examples: >>> def make_variables(k, initializer): ... return (tf.Variable(initializer(shape=[k], dtype=tf.float32)), ... tf.Variable(initializer(shape=[k, k], dtype=tf.float32))) >>> v1, v2 = make_variables(3, tf.zeros_initializer()) >>> v1 >>> v2 >>> make_variables(4, tf.random_uniform_initializer(minval=-1., maxval=1.)) (, >> def make_variables(k, initializer): ... return (tf.Variable(initializer(shape=[k], dtype=tf.float32)), ... tf.Variable(initializer(shape=[k, k], dtype=tf.float32))) >>> v1, v2 = make_variables(3, tf.ones_initializer()) >>> v1 >>> v2 >>> make_variables(4, tf.random_uniform_initializer(minval=-1., maxval=1.)) (, >> def make_variables(k, initializer): ... return (tf.Variable(initializer(shape=[k], dtype=tf.float32)), ... tf.Variable(initializer(shape=[k, k], dtype=tf.float32))) >>> v1, v2 = make_variables(3, tf.constant_initializer(2.)) >>> v1 >>> v2 >>> make_variables(4, tf.random_uniform_initializer(minval=-1., maxval=1.)) (, >> value = [0, 1, 2, 3, 4, 5, 6, 7] >>> init = tf.constant_initializer(value) >>> # Fitting shape >>> tf.Variable(init(shape=[2, 4], dtype=tf.float32)) >>> # Larger shape >>> tf.Variable(init(shape=[3, 4], dtype=tf.float32)) Traceback (most recent call last): ... TypeError: ...value has 8 elements, shape is (3, 4) with 12 elements... >>> # Smaller shape >>> tf.Variable(init(shape=[2, 3], dtype=tf.float32)) Traceback (most recent call last): ... TypeError: ...value has 8 elements, shape is (2, 3) with 6 elements... Args: value: A Python scalar, list or tuple of values, or a N-dimensional numpy array. All elements of the initialized variable will be set to the corresponding value in the `value` argument. Raises: TypeError: If the input `value` is not one of the expected types. """ def __init__(self, value=0): if not (np.isscalar(value) or isinstance(value, (list, tuple, np.ndarray))): raise TypeError( "Invalid type for initial value: %s (expected Python scalar, list or " "tuple of values, or numpy.ndarray)." % type(value)) self.value = value def __call__(self, shape, dtype=None, **kwargs): """Returns a tensor object initialized as specified by the initializer. Args: shape: Shape of the tensor. dtype: Optional dtype of the tensor. If not provided the dtype of the tensor created will be the type of the inital value. **kwargs: Additional keyword arguments. Raises: TypeError: If the initializer cannot create a tensor of the requested dtype. """ self._validate_kwargs(kwargs, support_partition=False) if dtype is not None: dtype = dtypes.as_dtype(dtype) return constant_op.constant(self.value, dtype=dtype, shape=shape) def get_config(self): return {"value": self.value} @tf_export("random_uniform_initializer", v1=[]) class RandomUniform(Initializer): """Initializer that generates tensors with a uniform distribution. Initializers allow you to pre-specify an initialization strategy, encoded in the Initializer object, without knowing the shape and dtype of the variable being initialized. Examples: >>> def make_variables(k, initializer): ... return (tf.Variable(initializer(shape=[k], dtype=tf.float32)), ... tf.Variable(initializer(shape=[k, k], dtype=tf.float32))) >>> v1, v2 = make_variables(3, tf.ones_initializer()) >>> v1 >>> v2 >>> make_variables(4, tf.random_uniform_initializer(minval=-1., maxval=1.)) (, >> def make_variables(k, initializer): ... return (tf.Variable(initializer(shape=[k], dtype=tf.float32)), ... tf.Variable(initializer(shape=[k, k], dtype=tf.float32))) >>> v1, v2 = make_variables(3, ... tf.random_normal_initializer(mean=1., stddev=2.)) >>> v1 >>> v2 >> make_variables(4, tf.random_uniform_initializer(minval=-1., maxval=1.)) (, >> def make_variables(k, initializer): ... return (tf.Variable(initializer(shape=[k], dtype=tf.float32)), ... tf.Variable(initializer(shape=[k, k], dtype=tf.float32))) >>> v1, v2 = make_variables( ... 3, tf.initializers.TruncatedNormal(mean=1., stddev=2.)) >>> v1 >>> v2 >> make_variables(4, tf.initializers.RandomUniform(minval=-1., maxval=1.)) (, >> def make_variables(k, initializer): ... return (tf.Variable(initializer(shape=[k], dtype=tf.float32)), ... tf.Variable(initializer(shape=[k, k], dtype=tf.float32))) >>> v1, v2 = make_variables(3, tf.initializers.VarianceScaling(scale=1.)) >>> v1 >>> v2 >> make_variables(4, tf.initializers.VarianceScaling(distribution='uniform')) (, >> def make_variables(k, initializer): ... return (tf.Variable(initializer(shape=[k, k], dtype=tf.float32)), ... tf.Variable(initializer(shape=[k, k, k], dtype=tf.float32))) >>> v1, v2 = make_variables(3, tf.initializers.Orthogonal()) >>> v1 >> v2 >> make_variables(4, tf.initializers.Orthogonal(gain=0.5)) (>> def make_variable(k, initializer): ... return tf.Variable(initializer(shape=[k, k], dtype=tf.float32)) >>> make_variable(2, tf.initializers.Identity()) >>> make_variable(3, tf.initializers.Identity(gain=0.5)) Args: gain: Multiplicative factor to apply to the identity matrix. """ def __init__(self, gain=1.0): self.gain = gain def __call__(self, shape, dtype=dtypes.float32, **kwargs): """Returns a tensor object initialized as specified by the initializer. Args: shape: Shape of the tensor. dtype: Optional dtype of the tensor. Only floating point types are supported. **kwargs: Additional keyword arguments. Raises: ValueError: If the dtype is not floating point ValueError: If the requested shape does not have exactly two axes. """ self._validate_kwargs(kwargs, support_partition=False) dtype = _assert_float_dtype(dtype) if len(shape) != 2: raise ValueError( "Identity matrix initializer can only be used for 2D matrices.") initializer = linalg_ops_impl.eye(*shape, dtype=dtype) return self.gain * initializer def get_config(self): return {"gain": self.gain} class GlorotUniform(VarianceScaling): """The Glorot uniform initializer, also called Xavier uniform initializer. Initializers allow you to pre-specify an initialization strategy, encoded in the Initializer object, without knowing the shape and dtype of the variable being initialized. Draws samples from a uniform distribution within [-limit, limit] where `limit` is `sqrt(6 / (fan_in + fan_out))` where `fan_in` is the number of input units in the weight tensor and `fan_out` is the number of output units in the weight tensor. Examples: >>> def make_variables(k, initializer): ... return (tf.Variable(initializer(shape=[k, k], dtype=tf.float32)), ... tf.Variable(initializer(shape=[k, k, k], dtype=tf.float32))) >>> v1, v2 = make_variables(3, tf.initializers.GlorotUniform()) >>> v1 >> v2 >> make_variables(4, tf.initializers.RandomNormal()) (>> def make_variables(k, initializer): ... return (tf.Variable(initializer(shape=[k, k], dtype=tf.float32)), ... tf.Variable(initializer(shape=[k, k, k], dtype=tf.float32))) >>> v1, v2 = make_variables(3, tf.initializers.GlorotNormal()) >>> v1 >> v2 >> make_variables(4, tf.initializers.RandomNormal()) (>> def make_variables(k, initializer): ... return (tf.Variable(initializer(shape=[k, k], dtype=tf.float32)), ... tf.Variable(initializer(shape=[k, k, k], dtype=tf.float32))) >>> v1, v2 = make_variables(3, tf.initializers.lecun_normal()) >>> v1 >> v2 >> make_variables(4, tf.initializers.RandomNormal()) (>> def make_variables(k, initializer): ... return (tf.Variable(initializer(shape=[k, k], dtype=tf.float32)), ... tf.Variable(initializer(shape=[k, k, k], dtype=tf.float32))) >>> v1, v2 = make_variables(3, tf.initializers.lecun_uniform()) >>> v1 >> v2 >> make_variables(4, tf.initializers.RandomNormal()) (>> def make_variables(k, initializer): ... return (tf.Variable(initializer(shape=[k, k], dtype=tf.float32)), ... tf.Variable(initializer(shape=[k, k, k], dtype=tf.float32))) >>> v1, v2 = make_variables(3, tf.initializers.he_normal()) >>> v1 >> v2 >> make_variables(4, tf.initializers.RandomNormal()) (>> def make_variables(k, initializer): ... return (tf.Variable(initializer(shape=[k, k], dtype=tf.float32)), ... tf.Variable(initializer(shape=[k, k, k], dtype=tf.float32))) >>> v1, v2 = make_variables(3, tf.initializers.he_uniform()) >>> v1 >> v2 >> make_variables(4, tf.initializers.RandomNormal()) (