18.2 Debugging Models in TensorFlow
import os
import numpy as np
import tensorflow as tf
tf.set_random_seed(123)
print("TensorFlow:{}".format(tf.__version__))
TensorFlow:1.4.1
DATASETSLIB_HOME = '../datasetslib'
import sys
if not DATASETSLIB_HOME in sys.path:
sys.path.append(DATASETSLIB_HOME)
%reload_ext autoreload
%autoreload 2
import datasetslib
datasetslib.datasets_root = os.path.join(os.path.expanduser('~'),'datasets')
MNIST Dataset
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets(os.path.join(datasetslib.datasets_root,'mnist'), one_hot=True)
x_train = mnist.train.images
x_test = mnist.test.images
y_train = mnist.train.labels
y_test = mnist.test.labels
n_y = 10
n_x = 784
Extracting /home/armando/datasets/mnist/train-images-idx3-ubyte.gz
Extracting /home/armando/datasets/mnist/train-labels-idx1-ubyte.gz
Extracting /home/armando/datasets/mnist/t10k-images-idx3-ubyte.gz
Extracting /home/armando/datasets/mnist/t10k-labels-idx1-ubyte.gz
tf.Print()
def mlp(x, num_inputs, num_outputs,num_layers,num_neurons):
w=[]
b=[]
for i in range(num_layers):
w.append(tf.Variable(tf.random_normal(
[num_inputs if i==0 else num_neurons[i-1],
num_neurons[i]]),
name="w_{0:04d}".format(i)
)
)
b.append(tf.Variable(tf.random_normal(
[num_neurons[i]]),
name="b_{0:04d}".format(i)
)
)
w.append(tf.Variable(tf.random_normal(
[num_neurons[num_layers-1] if num_layers > 0 else num_inputs,
num_outputs]),name="w_out"))
b.append(tf.Variable(tf.random_normal([num_outputs]),name="b_out"))
layer = x
for i in range(num_layers):
layer = tf.nn.relu(tf.matmul(layer, w[i]) + b[i])
layer = tf.matmul(layer, w[num_layers]) + b[num_layers]
return layer
tf.reset_default_graph()
num_layers = 2
num_neurons = [16,32]
learning_rate = 0.01
n_epochs = 10
batch_size = 100
n_batches = int(mnist.train.num_examples/batch_size)
x_p = tf.placeholder(dtype=tf.float32, name="x_p", shape=[None, n_x])
y_p = tf.placeholder(dtype=tf.float32, name="y_p", shape=[None, n_y])
model = mlp(x=x_p,
num_inputs=n_x,
num_outputs=n_y,
num_layers=num_layers,
num_neurons=num_neurons)
model = tf.Print(input_=model,
data=[tf.argmax(model,1)],
message='y_hat=',
summarize=10,
first_n=5
)
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=model, labels=y_p))
optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
optimizer = optimizer.minimize(loss)
with tf.Session() as tfs:
tfs.run(tf.global_variables_initializer())
for epoch in range(n_epochs):
epoch_loss = 0.0
for batch in range(n_batches):
X_batch, Y_batch = mnist.train.next_batch(batch_size)
feed_dict={x_p: X_batch, y_p: Y_batch}
_,batch_loss = tfs.run([optimizer,loss],
feed_dict = feed_dict
)
epoch_loss += batch_loss
average_loss = epoch_loss / n_batches
print("epoch: {0:04d} loss = {1:0.6f}".format(epoch,average_loss))
epoch: 0000 loss = 6.716061
epoch: 0001 loss = 2.002921
epoch: 0002 loss = 1.733159
epoch: 0003 loss = 1.596843
epoch: 0004 loss = 1.504387
epoch: 0005 loss = 1.435767
epoch: 0006 loss = 1.378511
epoch: 0007 loss = 1.329088
epoch: 0008 loss = 1.287037
epoch: 0009 loss = 1.249800
tf.Assert()
def mlp(x, num_inputs, num_outputs,num_layers,num_neurons):
w=[]
b=[]
for i in range(num_layers):
w.append(tf.Variable(tf.random_normal(
[num_inputs if i==0 else num_neurons[i-1],
num_neurons[i]]),
name="w_{0:04d}".format(i)
)
)
b.append(tf.Variable(tf.random_normal(
[num_neurons[i]]),
name="b_{0:04d}".format(i)
)
)
w.append(tf.Variable(tf.random_normal(
[num_neurons[num_layers-1] if num_layers > 0 else num_inputs,
num_outputs]),name="w_out"))
b.append(tf.Variable(tf.random_normal([num_outputs]),name="b_out"))
assert_op = tf.Assert(tf.reduce_all(tf.greater_equal(x,0)),[x])
with tf.control_dependencies([assert_op]):
layer = x
for i in range(num_layers):
layer = tf.nn.relu(tf.matmul(layer, w[i]) + b[i])
layer = tf.matmul(layer, w[num_layers]) + b[num_layers]
return layer
tf.reset_default_graph()
num_layers = 2
num_neurons = [16,32]
learning_rate = 0.01
n_epochs = 10
batch_size = 100
n_batches = int(mnist.train.num_examples/batch_size)
x_p = tf.placeholder(dtype=tf.float32, name="x_p", shape=[None, n_x])
y_p = tf.placeholder(dtype=tf.float32, name="y_p", shape=[None, n_y])
model = mlp(x=x_p,
num_inputs=n_x,
num_outputs=n_y,
num_layers=num_layers,
num_neurons=num_neurons)
model = tf.Print(input_=model,
data=[tf.argmax(model,1)],
message='y_hat=',
summarize=10,
first_n=5
)
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=model, labels=y_p))
optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
optimizer = optimizer.minimize(loss)
with tf.Session() as tfs:
tfs.run(tf.global_variables_initializer())
for epoch in range(n_epochs):
epoch_loss = 0.0
for batch in range(n_batches):
X_batch, Y_batch = mnist.train.next_batch(batch_size)
if epoch > 5:
X_batch = np.copy(X_batch)
X_batch[0,0]=-2
feed_dict={x_p: X_batch, y_p: Y_batch}
_,batch_loss = tfs.run([optimizer,loss],
feed_dict = feed_dict
)
epoch_loss += batch_loss
average_loss = epoch_loss / n_batches
print("epoch: {0:04d} loss = {1:0.6f}".format(epoch,average_loss))
epoch: 0000 loss = 7.803496
epoch: 0001 loss = 2.405806
epoch: 0002 loss = 2.057570
epoch: 0003 loss = 1.884273
epoch: 0004 loss = 1.768692
epoch: 0005 loss = 1.672444
---------------------------------------------------------------------------
InvalidArgumentError Traceback (most recent call last)
/usr/local/lib/python3.5/dist-packages/tensorflow/python/client/session.py in _do_call(self, fn, *args)
1322 try:
-> 1323 return fn(*args)
1324 except errors.OpError as e:
/usr/local/lib/python3.5/dist-packages/tensorflow/python/client/session.py in _run_fn(session, feed_dict, fetch_list, target_list, options, run_metadata)
1301 feed_dict, fetch_list, target_list,
-> 1302 status, run_metadata)
1303
/usr/local/lib/python3.5/dist-packages/tensorflow/python/framework/errors_impl.py in __exit__(self, type_arg, value_arg, traceback_arg)
472 compat.as_text(c_api.TF_Message(self.status.status)),
--> 473 c_api.TF_GetCode(self.status.status))
474 # Delete the underlying status object from memory otherwise it stays alive
InvalidArgumentError: assertion failed: [[-2 0 0]...]
[[Node: Assert/AssertGuard/Assert = Assert[T=[DT_FLOAT], summarize=3, _device="/job:localhost/replica:0/task:0/device:CPU:0"](Assert/AssertGuard/Assert/Switch/_7, Assert/AssertGuard/Assert/Switch_1)]]
[[Node: Assert/AssertGuard/Assert/_10 = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/device:GPU:0", send_device="/job:localhost/replica:0/task:0/device:CPU:0", send_device_incarnation=1, tensor_name="edge_18_Assert/AssertGuard/Assert", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:GPU:0"]()]]
During handling of the above exception, another exception occurred:
InvalidArgumentError Traceback (most recent call last)
<ipython-input-10-265ff61fe705> in <module>()
44 feed_dict={x_p: X_batch, y_p: Y_batch}
45 _,batch_loss = tfs.run([optimizer,loss],
---> 46 feed_dict = feed_dict
47 )
48 epoch_loss += batch_loss
/usr/local/lib/python3.5/dist-packages/tensorflow/python/client/session.py in run(self, fetches, feed_dict, options, run_metadata)
887 try:
888 result = self._run(None, fetches, feed_dict, options_ptr,
--> 889 run_metadata_ptr)
890 if run_metadata:
891 proto_data = tf_session.TF_GetBuffer(run_metadata_ptr)
/usr/local/lib/python3.5/dist-packages/tensorflow/python/client/session.py in _run(self, handle, fetches, feed_dict, options, run_metadata)
1118 if final_fetches or final_targets or (handle and feed_dict_tensor):
1119 results = self._do_run(handle, final_targets, final_fetches,
-> 1120 feed_dict_tensor, options, run_metadata)
1121 else:
1122 results = []
/usr/local/lib/python3.5/dist-packages/tensorflow/python/client/session.py in _do_run(self, handle, target_list, fetch_list, feed_dict, options, run_metadata)
1315 if handle is None:
1316 return self._do_call(_run_fn, self._session, feeds, fetches, targets,
-> 1317 options, run_metadata)
1318 else:
1319 return self._do_call(_prun_fn, self._session, handle, feeds, fetches)
/usr/local/lib/python3.5/dist-packages/tensorflow/python/client/session.py in _do_call(self, fn, *args)
1334 except KeyError:
1335 pass
-> 1336 raise type(e)(node_def, op, message)
1337
1338 def _extend_graph(self):
InvalidArgumentError: assertion failed: [[-2 0 0]...]
[[Node: Assert/AssertGuard/Assert = Assert[T=[DT_FLOAT], summarize=3, _device="/job:localhost/replica:0/task:0/device:CPU:0"](Assert/AssertGuard/Assert/Switch/_7, Assert/AssertGuard/Assert/Switch_1)]]
[[Node: Assert/AssertGuard/Assert/_10 = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/device:GPU:0", send_device="/job:localhost/replica:0/task:0/device:CPU:0", send_device_incarnation=1, tensor_name="edge_18_Assert/AssertGuard/Assert", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:GPU:0"]()]]
Caused by op 'Assert/AssertGuard/Assert', defined at:
File "/usr/lib/python3.5/runpy.py", line 184, in _run_module_as_main
"__main__", mod_spec)
File "/usr/lib/python3.5/runpy.py", line 85, in _run_code
exec(code, run_globals)
File "/usr/local/lib/python3.5/dist-packages/ipykernel_launcher.py", line 16, in <module>
app.launch_new_instance()
File "/usr/local/lib/python3.5/dist-packages/traitlets/config/application.py", line 658, in launch_instance
app.start()
File "/usr/local/lib/python3.5/dist-packages/ipykernel/kernelapp.py", line 477, in start
ioloop.IOLoop.instance().start()
File "/usr/local/lib/python3.5/dist-packages/zmq/eventloop/ioloop.py", line 177, in start
super(ZMQIOLoop, self).start()
File "/usr/local/lib/python3.5/dist-packages/tornado/ioloop.py", line 888, in start
handler_func(fd_obj, events)
File "/usr/local/lib/python3.5/dist-packages/tornado/stack_context.py", line 277, in null_wrapper
return fn(*args, **kwargs)
File "/usr/local/lib/python3.5/dist-packages/zmq/eventloop/zmqstream.py", line 440, in _handle_events
self._handle_recv()
File "/usr/local/lib/python3.5/dist-packages/zmq/eventloop/zmqstream.py", line 472, in _handle_recv
self._run_callback(callback, msg)
File "/usr/local/lib/python3.5/dist-packages/zmq/eventloop/zmqstream.py", line 414, in _run_callback
callback(*args, **kwargs)
File "/usr/local/lib/python3.5/dist-packages/tornado/stack_context.py", line 277, in null_wrapper
return fn(*args, **kwargs)
File "/usr/local/lib/python3.5/dist-packages/ipykernel/kernelbase.py", line 283, in dispatcher
return self.dispatch_shell(stream, msg)
File "/usr/local/lib/python3.5/dist-packages/ipykernel/kernelbase.py", line 235, in dispatch_shell
handler(stream, idents, msg)
File "/usr/local/lib/python3.5/dist-packages/ipykernel/kernelbase.py", line 399, in execute_request
user_expressions, allow_stdin)
File "/usr/local/lib/python3.5/dist-packages/ipykernel/ipkernel.py", line 196, in do_execute
res = shell.run_cell(code, store_history=store_history, silent=silent)
File "/usr/local/lib/python3.5/dist-packages/ipykernel/zmqshell.py", line 533, in run_cell
return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
File "/usr/local/lib/python3.5/dist-packages/IPython/core/interactiveshell.py", line 2728, in run_cell
interactivity=interactivity, compiler=compiler, result=result)
File "/usr/local/lib/python3.5/dist-packages/IPython/core/interactiveshell.py", line 2850, in run_ast_nodes
if self.run_code(code, result):
File "/usr/local/lib/python3.5/dist-packages/IPython/core/interactiveshell.py", line 2910, in run_code
exec(code_obj, self.user_global_ns, self.user_ns)
File "<ipython-input-10-265ff61fe705>", line 17, in <module>
num_neurons=num_neurons)
File "<ipython-input-8-437a0fa9085d>", line 23, in mlp
assert_op = tf.Assert(tf.reduce_all(tf.greater_equal(x,0)),[x])
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/util/tf_should_use.py", line 107, in wrapped
return _add_should_use_warning(fn(*args, **kwargs))
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/ops/control_flow_ops.py", line 134, in Assert
condition, no_op, true_assert, name="AssertGuard")
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/util/deprecation.py", line 316, in new_func
return func(*args, **kwargs)
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/ops/control_flow_ops.py", line 1864, in cond
orig_res_f, res_f = context_f.BuildCondBranch(false_fn)
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/ops/control_flow_ops.py", line 1725, in BuildCondBranch
original_result = fn()
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/ops/control_flow_ops.py", line 132, in true_assert
condition, data, summarize, name="Assert")
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/ops/gen_logging_ops.py", line 47, in _assert
name=name)
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/framework/op_def_library.py", line 787, in _apply_op_helper
op_def=op_def)
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/framework/ops.py", line 2956, in create_op
op_def=op_def)
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/framework/ops.py", line 1470, in __init__
self._traceback = self._graph._extract_stack() # pylint: disable=protected-access
InvalidArgumentError (see above for traceback): assertion failed: [[-2 0 0]...]
[[Node: Assert/AssertGuard/Assert = Assert[T=[DT_FLOAT], summarize=3, _device="/job:localhost/replica:0/task:0/device:CPU:0"](Assert/AssertGuard/Assert/Switch/_7, Assert/AssertGuard/Assert/Switch_1)]]
[[Node: Assert/AssertGuard/Assert/_10 = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/device:GPU:0", send_device="/job:localhost/replica:0/task:0/device:CPU:0", send_device_incarnation=1, tensor_name="edge_18_Assert/AssertGuard/Assert", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:GPU:0"]()]]