I’m trying to train simple cnn in tensorflow 2.1.0 with using colab TPU.
Here is my code.
[1]
%tensorflow_version 2.x
[2]
import tensorflow as tf
import os
tpu_grpc_url = "grpc://" + os.environ["COLAB_TPU_ADDR"]
tpu_cluster_resolver = tf.distribute.cluster_resolver.TPUClusterResolver(tpu_grpc_url)
tf.config.experimental_connect_to_cluster(tpu_cluster_resolver)
tf.tpu.experimental.initialize_tpu_system(tpu_cluster_resolver)
strategy = tf.distribute.experimental.TPUStrategy(tpu_cluster_resolver)
[3]
from tensorflow.keras.layers import *
def conv_block(x, ch):
x = Conv2D(ch, 3, padding='same')(x)
x = BatchNormalization()(x)
x = ReLU()(x)
return x
def create_net():
inputs = Input((32, 32, 3))
x = conv_block(inputs, 32)
x = AveragePooling2D()(x)
x = conv_block(x, 64)
x = AveragePooling2D()(x)
x = conv_block(x, 128)
x = AveragePooling2D()(x)
x = conv_block(x, 256)
x = GlobalAveragePooling2D()(x)
x = Dense(10)(x)
out = Softmax()(x)
return tf.keras.models.Model(inputs, out)
[4]
import random
import numpy as np
net = create_net()
loss_func = tf.keras.losses.CategoricalCrossentropy(reduction=tf.keras.losses.Reduction.NONE)
opt = tf.keras.optimizers.Adam()
temp_x = tf.random.normal((8, 32, 32, 3))
temp_y = np.array([random.randint(1, 9) for i in range(8)])
temp_y = tf.keras.utils.to_categorical(temp_y, num_classes=10)
temp_y = tf.convert_to_tensor(temp_y)
def train_on_batch(x, y):
with tf.GradientTape() as tape:
p = net(x)
loss = tf.reduce_sum(loss_func(y, p))
grad = tape.gradient(loss, net.trainable_variables)
opt.apply_gradients(zip(grad, net.trainable_variables))
return loss
loss = train_on_batch(temp_x, temp_y)
And I got a error in below.
---------------------------------------------------------------------------
InvalidArgumentError Traceback (most recent call last)
<ipython-input-21-cd67a4cb373f> in <module>()
19 return loss
20
---> 21 loss = train_on_batch(temp_x, temp_y)
12 frames
<ipython-input-21-cd67a4cb373f> in train_on_batch(x, y)
16 loss = tf.reduce_sum(loss_func(y, p))
17 grad = tape.gradient(loss, net.trainable_variables)
---> 18 opt.apply_gradients(zip(grad, net.trainable_variables))
19 return loss
20
/tensorflow-2.1.0/python3.6/tensorflow_core/python/keras/optimizer_v2/optimizer_v2.py in apply_gradients(self, grads_and_vars, name)
442 functools.partial(self._distributed_apply, apply_state=apply_state),
443 args=(grads_and_vars,),
--> 444 kwargs={"name": name})
445
446 def _distributed_apply(self, distribution, grads_and_vars, name, apply_state):
/tensorflow-2.1.0/python3.6/tensorflow_core/python/distribute/distribute_lib.py in merge_call(self, merge_fn, args, kwargs)
1947 if kwargs is None:
1948 kwargs = {}
-> 1949 return self._merge_call(merge_fn, args, kwargs)
1950
1951 def _merge_call(self, merge_fn, args, kwargs):
/tensorflow-2.1.0/python3.6/tensorflow_core/python/distribute/distribute_lib.py in _merge_call(self, merge_fn, args, kwargs)
1954 distribution_strategy_context._CrossReplicaThreadMode(self._strategy)) # pylint: disable=protected-access
1955 try:
-> 1956 return merge_fn(self._strategy, *args, **kwargs)
1957 finally:
1958 _pop_per_thread_mode()
/tensorflow-2.1.0/python3.6/tensorflow_core/python/keras/optimizer_v2/optimizer_v2.py in _distributed_apply(self, distribution, grads_and_vars, name, apply_state)
486 update_ops.extend(
487 distribution.extended.update(
--> 488 var, apply_grad_to_update_var, args=(grad,), group=False))
489
490 any_symbolic = any(isinstance(i, ops.Operation) or
/tensorflow-2.1.0/python3.6/tensorflow_core/python/distribute/distribute_lib.py in update(self, var, fn, args, kwargs, group)
1541 kwargs = {}
1542 with self._container_strategy().scope():
-> 1543 return self._update(var, fn, args, kwargs, group)
1544
1545 def _update(self, var, fn, args, kwargs, group):
/tensorflow-2.1.0/python3.6/tensorflow_core/python/distribute/distribute_lib.py in _update(self, var, fn, args, kwargs, group)
2172 # The implementations of _update() and _update_non_slot() are identical
2173 # except _update() passes `var` as the first argument to `fn()`.
-> 2174 return self._update_non_slot(var, fn, (var,) + tuple(args), kwargs, group)
2175
2176 def _update_non_slot(self, colocate_with, fn, args, kwargs, should_group):
/tensorflow-2.1.0/python3.6/tensorflow_core/python/distribute/distribute_lib.py in _update_non_slot(self, colocate_with, fn, args, kwargs, should_group)
2178 # once that value is used for something.
2179 with UpdateContext(colocate_with):
-> 2180 result = fn(*args, **kwargs)
2181 if should_group:
2182 return result
/tensorflow-2.1.0/python3.6/tensorflow_core/python/keras/optimizer_v2/optimizer_v2.py in apply_grad_to_update_var(var, grad)
468 if "apply_state" in self._dense_apply_args:
469 apply_kwargs["apply_state"] = apply_state
--> 470 update_op = self._resource_apply_dense(grad, var, **apply_kwargs)
471 if var.constraint is not None:
472 with ops.control_dependencies([update_op]):
/tensorflow-2.1.0/python3.6/tensorflow_core/python/keras/optimizer_v2/adam.py in _resource_apply_dense(self, grad, var, apply_state)
205 coefficients['epsilon'],
206 grad,
--> 207 use_locking=self._use_locking)
208 else:
209 vhat = self.get_slot(var, 'vhat')
/tensorflow-2.1.0/python3.6/tensorflow_core/python/training/gen_training_ops.py in resource_apply_adam(var, m, v, beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad, use_locking, use_nesterov, name)
1421 pass # Add nodes to the TensorFlow graph.
1422 except _core._NotOkStatusException as e:
-> 1423 _ops.raise_from_not_ok_status(e, name)
1424 # Add nodes to the TensorFlow graph.
1425 if use_locking is None:
/tensorflow-2.1.0/python3.6/tensorflow_core/python/framework/ops.py in raise_from_not_ok_status(e, name)
6604 message = e.message + (" name: " + name if name is not None else "")
6605 # pylint: disable=protected-access
-> 6606 six.raise_from(core._status_to_exception(e.code, message), None)
6607 # pylint: enable=protected-access
6608
/usr/local/lib/python3.6/dist-packages/six.py in raise_from(value, from_value)
InvalidArgumentError: Shapes must be equal rank, but are 0 and 4 for 'ResourceApplyAdam' (op: 'ResourceApplyAdam') with input shapes: [], [], [], [], [], [], [], [], [], [3,3,3,32]. [Op:ResourceApplyAdam]
I also run this code
[5]
with tf.GradientTape() as tape:
p = net(temp_x)
loss = tf.reduce_sum(loss_func(temp_y, p))
grad = tape.gradient(loss, net.trainable_variables)
for i in range(len(grad)):
print(grad[i].shape, net.trainable_variables[i].shape)
then out is below, and it seems that the shapes of gradients and parameters is equal.
(3, 3, 3, 32) (3, 3, 3, 32)
(32,) (32,)
(32,) (32,)
(32,) (32,)
(3, 3, 32, 64) (3, 3, 32, 64)
(64,) (64,)
(64,) (64,)
(64,) (64,)
(3, 3, 64, 128) (3, 3, 64, 128)
(128,) (128,)
(128,) (128,)
(128,) (128,)
(3, 3, 128, 256) (3, 3, 128, 256)
(256,) (256,)
(256,) (256,)
(256,) (256,)
(256, 10) (256, 10)
(10,) (10,)
I tried to chage optimizers and found that RMSprop was able to work.
But, I want to use Adam and custom_training because I want to run GAN network.
Please tell me what can I do to solve this problem.
Tags: exception, pythonpython, tensorflow