Home » Python » python – I can't reply Adam in tensorflow 2.1.0 with using colab TPU-Exceptionshub

python – I can't reply Adam in tensorflow 2.1.0 with using colab TPU-Exceptionshub

Posted by: admin February 24, 2020 Leave a comment

Questions:

I’m trying to train simple cnn in tensorflow 2.1.0 with using colab TPU.
Here is my code.

[1]
%tensorflow_version 2.x

[2]
import tensorflow as tf
import os

tpu_grpc_url = "grpc://" + os.environ["COLAB_TPU_ADDR"]
tpu_cluster_resolver = tf.distribute.cluster_resolver.TPUClusterResolver(tpu_grpc_url)
tf.config.experimental_connect_to_cluster(tpu_cluster_resolver)
tf.tpu.experimental.initialize_tpu_system(tpu_cluster_resolver)
strategy = tf.distribute.experimental.TPUStrategy(tpu_cluster_resolver)

[3]
from tensorflow.keras.layers import *

def conv_block(x, ch):
    x = Conv2D(ch, 3, padding='same')(x)
    x = BatchNormalization()(x)
    x = ReLU()(x)
    return x

def create_net():
    inputs = Input((32, 32, 3))
    x = conv_block(inputs, 32)
    x = AveragePooling2D()(x)
    x = conv_block(x, 64)
    x = AveragePooling2D()(x)
    x = conv_block(x, 128)
    x = AveragePooling2D()(x)
    x = conv_block(x, 256)
    x = GlobalAveragePooling2D()(x)
    x = Dense(10)(x)
    out = Softmax()(x)
    return tf.keras.models.Model(inputs, out)

[4]
import random
import numpy as np

net = create_net()
loss_func = tf.keras.losses.CategoricalCrossentropy(reduction=tf.keras.losses.Reduction.NONE)
opt = tf.keras.optimizers.Adam()

temp_x = tf.random.normal((8, 32, 32, 3))
temp_y = np.array([random.randint(1, 9) for i in range(8)])
temp_y = tf.keras.utils.to_categorical(temp_y, num_classes=10)
temp_y = tf.convert_to_tensor(temp_y)

def train_on_batch(x, y):
    with tf.GradientTape() as tape:
        p = net(x)
        loss = tf.reduce_sum(loss_func(y, p))
    grad = tape.gradient(loss, net.trainable_variables)
    opt.apply_gradients(zip(grad, net.trainable_variables))
    return loss

loss = train_on_batch(temp_x, temp_y)

And I got a error in below.

---------------------------------------------------------------------------
InvalidArgumentError                      Traceback (most recent call last)
<ipython-input-21-cd67a4cb373f> in <module>()
     19     return loss
     20 
---> 21 loss = train_on_batch(temp_x, temp_y)

12 frames
<ipython-input-21-cd67a4cb373f> in train_on_batch(x, y)
     16         loss = tf.reduce_sum(loss_func(y, p))
     17     grad = tape.gradient(loss, net.trainable_variables)
---> 18     opt.apply_gradients(zip(grad, net.trainable_variables))
     19     return loss
     20 

/tensorflow-2.1.0/python3.6/tensorflow_core/python/keras/optimizer_v2/optimizer_v2.py in apply_gradients(self, grads_and_vars, name)
    442           functools.partial(self._distributed_apply, apply_state=apply_state),
    443           args=(grads_and_vars,),
--> 444           kwargs={"name": name})
    445 
    446   def _distributed_apply(self, distribution, grads_and_vars, name, apply_state):

/tensorflow-2.1.0/python3.6/tensorflow_core/python/distribute/distribute_lib.py in merge_call(self, merge_fn, args, kwargs)
   1947     if kwargs is None:
   1948       kwargs = {}
-> 1949     return self._merge_call(merge_fn, args, kwargs)
   1950 
   1951   def _merge_call(self, merge_fn, args, kwargs):

/tensorflow-2.1.0/python3.6/tensorflow_core/python/distribute/distribute_lib.py in _merge_call(self, merge_fn, args, kwargs)
   1954         distribution_strategy_context._CrossReplicaThreadMode(self._strategy))  # pylint: disable=protected-access
   1955     try:
-> 1956       return merge_fn(self._strategy, *args, **kwargs)
   1957     finally:
   1958       _pop_per_thread_mode()

/tensorflow-2.1.0/python3.6/tensorflow_core/python/keras/optimizer_v2/optimizer_v2.py in _distributed_apply(self, distribution, grads_and_vars, name, apply_state)
    486           update_ops.extend(
    487               distribution.extended.update(
--> 488                   var, apply_grad_to_update_var, args=(grad,), group=False))
    489 
    490       any_symbolic = any(isinstance(i, ops.Operation) or

/tensorflow-2.1.0/python3.6/tensorflow_core/python/distribute/distribute_lib.py in update(self, var, fn, args, kwargs, group)
   1541       kwargs = {}
   1542     with self._container_strategy().scope():
-> 1543       return self._update(var, fn, args, kwargs, group)
   1544 
   1545   def _update(self, var, fn, args, kwargs, group):

/tensorflow-2.1.0/python3.6/tensorflow_core/python/distribute/distribute_lib.py in _update(self, var, fn, args, kwargs, group)
   2172     # The implementations of _update() and _update_non_slot() are identical
   2173     # except _update() passes `var` as the first argument to `fn()`.
-> 2174     return self._update_non_slot(var, fn, (var,) + tuple(args), kwargs, group)
   2175 
   2176   def _update_non_slot(self, colocate_with, fn, args, kwargs, should_group):

/tensorflow-2.1.0/python3.6/tensorflow_core/python/distribute/distribute_lib.py in _update_non_slot(self, colocate_with, fn, args, kwargs, should_group)
   2178     # once that value is used for something.
   2179     with UpdateContext(colocate_with):
-> 2180       result = fn(*args, **kwargs)
   2181       if should_group:
   2182         return result

/tensorflow-2.1.0/python3.6/tensorflow_core/python/keras/optimizer_v2/optimizer_v2.py in apply_grad_to_update_var(var, grad)
    468       if "apply_state" in self._dense_apply_args:
    469         apply_kwargs["apply_state"] = apply_state
--> 470       update_op = self._resource_apply_dense(grad, var, **apply_kwargs)
    471       if var.constraint is not None:
    472         with ops.control_dependencies([update_op]):

/tensorflow-2.1.0/python3.6/tensorflow_core/python/keras/optimizer_v2/adam.py in _resource_apply_dense(self, grad, var, apply_state)
    205           coefficients['epsilon'],
    206           grad,
--> 207           use_locking=self._use_locking)
    208     else:
    209       vhat = self.get_slot(var, 'vhat')

/tensorflow-2.1.0/python3.6/tensorflow_core/python/training/gen_training_ops.py in resource_apply_adam(var, m, v, beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad, use_locking, use_nesterov, name)
   1421         pass  # Add nodes to the TensorFlow graph.
   1422     except _core._NotOkStatusException as e:
-> 1423       _ops.raise_from_not_ok_status(e, name)
   1424   # Add nodes to the TensorFlow graph.
   1425   if use_locking is None:

/tensorflow-2.1.0/python3.6/tensorflow_core/python/framework/ops.py in raise_from_not_ok_status(e, name)
   6604   message = e.message + (" name: " + name if name is not None else "")
   6605   # pylint: disable=protected-access
-> 6606   six.raise_from(core._status_to_exception(e.code, message), None)
   6607   # pylint: enable=protected-access
   6608 

/usr/local/lib/python3.6/dist-packages/six.py in raise_from(value, from_value)

InvalidArgumentError: Shapes must be equal rank, but are 0 and 4 for 'ResourceApplyAdam' (op: 'ResourceApplyAdam') with input shapes: [], [], [], [], [], [], [], [], [], [3,3,3,32]. [Op:ResourceApplyAdam]

I also run this code

[5]
with tf.GradientTape() as tape:
    p = net(temp_x)
    loss = tf.reduce_sum(loss_func(temp_y, p))
grad = tape.gradient(loss, net.trainable_variables)
for i in range(len(grad)):
    print(grad[i].shape, net.trainable_variables[i].shape)

then out is below, and it seems that the shapes of gradients and parameters is equal.

(3, 3, 3, 32) (3, 3, 3, 32)
(32,) (32,)
(32,) (32,)
(32,) (32,)
(3, 3, 32, 64) (3, 3, 32, 64)
(64,) (64,)
(64,) (64,)
(64,) (64,)
(3, 3, 64, 128) (3, 3, 64, 128)
(128,) (128,)
(128,) (128,)
(128,) (128,)
(3, 3, 128, 256) (3, 3, 128, 256)
(256,) (256,)
(256,) (256,)
(256,) (256,)
(256, 10) (256, 10)
(10,) (10,)

I tried to chage optimizers and found that RMSprop was able to work.
But, I want to use Adam and custom_training because I want to run GAN network.
Please tell me what can I do to solve this problem.

How to&Answers: