"""
 Deeper Multi-Layer Pecptron with XAVIER Init
 Xavier init from {Project: https://github.com/aymericdamien/TensorFlow-Examples/}
 @Sungjoon Choi ([email protected])
"""
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
from tensorflow.examples.tutorials.mnist import input_data
%matplotlib inline

mnist = input_data.read_data_sets('data/', one_hot=True)

Extracting data/train-images-idx3-ubyte.gz
Extracting data/train-labels-idx1-ubyte.gz
Extracting data/t10k-images-idx3-ubyte.gz
Extracting data/t10k-labels-idx1-ubyte.gz

# Xavier Init
def xavier_init(n_inputs, n_outputs, uniform=True):
  """Set the parameter initialization using the method described.
  This method is designed to keep the scale of the gradients roughly the same
  in all layers.
  Xavier Glorot and Yoshua Bengio (2010):
           Understanding the difficulty of training deep feedforward neural
           networks. International conference on artificial intelligence and
           statistics.
  Args:
    n_inputs: The number of input nodes into each output.
    n_outputs: The number of output nodes for each input.
    uniform: If true use a uniform distribution, otherwise use a normal.
  Returns:
    An initializer.
  """
  if uniform:
    # 6 was used in the paper.
    init_range = tf.sqrt(6.0 / (n_inputs + n_outputs))
    return tf.random_uniform_initializer(-init_range, init_range)
  else:
    # 3 gives us approximately the same limits as above since this repicks
    # values greater than 2 standard deviations from the mean.
    stddev = tf.sqrt(3.0 / (n_inputs + n_outputs))
    return tf.truncated_normal_initializer(stddev=stddev)

# Parameters
learning_rate   = 0.001
training_epochs = 50
batch_size      = 100
display_step    = 1

# Network Parameters
n_input    = 784 # MNIST data input (img shape: 28*28)
n_hidden_1 = 256 # 1st layer num features
n_hidden_2 = 256 # 2nd layer num features
n_hidden_3 = 256 # 3rd layer num features
n_hidden_4 = 256 # 4th layer num features
n_classes  = 10 # MNIST total classes (0-9 digits)

# tf Graph input
x = tf.placeholder("float", [None, n_input])
y = tf.placeholder("float", [None, n_classes])
dropout_keep_prob = tf.placeholder("float")

# Create model
def multilayer_perceptron(_X, _weights, _biases, _keep_prob):
    layer_1 = tf.nn.dropout(tf.nn.relu(tf.add(tf.matmul(_X, _weights['h1']), _biases['b1'])), _keep_prob)
    layer_2 = tf.nn.dropout(tf.nn.relu(tf.add(tf.matmul(layer_1, _weights['h2']), _biases['b2'])), _keep_prob)
    layer_3 = tf.nn.dropout(tf.nn.relu(tf.add(tf.matmul(layer_2, _weights['h3']), _biases['b3'])), _keep_prob) 
    layer_4 = tf.nn.dropout(tf.nn.relu(tf.add(tf.matmul(layer_3, _weights['h4']), _biases['b4'])), _keep_prob) 
    return (tf.matmul(layer_4, _weights['out']) + _biases['out']) # No need to use softmax??

# Store layers weight & bias
weights = {
    'h1': tf.get_variable("h1", shape=[n_input, n_hidden_1],    initializer=xavier_init(n_input,n_hidden_1)),
    'h2': tf.get_variable("h2", shape=[n_hidden_1, n_hidden_2], initializer=xavier_init(n_hidden_1,n_hidden_2)),
    'h3': tf.get_variable("h3", shape=[n_hidden_2, n_hidden_3], initializer=xavier_init(n_hidden_2,n_hidden_3)),
    'h4': tf.get_variable("h4", shape=[n_hidden_3, n_hidden_4], initializer=xavier_init(n_hidden_3,n_hidden_4)),
    'out': tf.get_variable("out", shape=[n_hidden_4, n_classes], initializer=xavier_init(n_hidden_4,n_classes))
}
biases = {
    'b1': tf.Variable(tf.random_normal([n_hidden_1])),
    'b2': tf.Variable(tf.random_normal([n_hidden_2])),
    'b3': tf.Variable(tf.random_normal([n_hidden_3])),
    'b4': tf.Variable(tf.random_normal([n_hidden_4])),
    'out': tf.Variable(tf.random_normal([n_classes]))
}

# Construct model
pred = multilayer_perceptron(x, weights, biases, dropout_keep_prob)

# Define loss and optimizer
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(pred, y)) # Softmax loss
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost) # Adam Optimizer
# optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate, momentum=0.8).minimize(cost) # Adam Optimizer

# Accuracy 
correct_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))

# Initializing the variables
init = tf.initialize_all_variables()

print ("Network Ready")

Network Ready

# Launch the graph
sess = tf.Session()
sess.run(init)

# Training cycle
for epoch in range(training_epochs):
    avg_cost = 0.
    total_batch = int(mnist.train.num_examples/batch_size)
    # Loop over all batches
    for i in range(total_batch):
        batch_xs, batch_ys = mnist.train.next_batch(batch_size)
        # Fit training using batch data
        sess.run(optimizer, feed_dict={x: batch_xs, y: batch_ys, dropout_keep_prob: 0.7})
        # Compute average loss
        avg_cost += sess.run(cost, feed_dict={x: batch_xs, y: batch_ys, dropout_keep_prob:1.})/total_batch
    # Display logs per epoch step
    if epoch % display_step == 0:
        print ("Epoch: %03d/%03d cost: %.9f" % (epoch, training_epochs, avg_cost))
        train_acc = sess.run(accuracy, feed_dict={x: batch_xs, y: batch_ys, dropout_keep_prob:1.})
        print ("Training accuracy: %.3f" % (train_acc))

print ("Optimization Finished!")

Epoch: 000/050 cost: 0.418188300
Training accuracy: 0.970
Epoch: 001/050 cost: 0.135868739
Training accuracy: 0.960
Epoch: 002/050 cost: 0.095398066
Training accuracy: 0.980
Epoch: 003/050 cost: 0.075697627
Training accuracy: 0.970
Epoch: 004/050 cost: 0.061306230
Training accuracy: 0.980
Epoch: 005/050 cost: 0.052467199
Training accuracy: 0.970
Epoch: 006/050 cost: 0.046108751
Training accuracy: 0.970
Epoch: 007/050 cost: 0.040648738
Training accuracy: 1.000
Epoch: 008/050 cost: 0.034662794
Training accuracy: 0.980
Epoch: 009/050 cost: 0.030895058
Training accuracy: 0.980
Epoch: 010/050 cost: 0.027404196
Training accuracy: 0.990
Epoch: 011/050 cost: 0.025599543
Training accuracy: 0.990
Epoch: 012/050 cost: 0.022524802
Training accuracy: 0.990
Epoch: 013/050 cost: 0.020978481
Training accuracy: 1.000
Epoch: 014/050 cost: 0.018992848
Training accuracy: 1.000
Epoch: 015/050 cost: 0.018180760
Training accuracy: 1.000
Epoch: 016/050 cost: 0.015807947
Training accuracy: 1.000
Epoch: 017/050 cost: 0.015066529
Training accuracy: 1.000
Epoch: 018/050 cost: 0.013667117
Training accuracy: 0.990
Epoch: 019/050 cost: 0.012571550
Training accuracy: 1.000
Epoch: 020/050 cost: 0.011941066
Training accuracy: 0.990
Epoch: 021/050 cost: 0.011349857
Training accuracy: 0.990
Epoch: 022/050 cost: 0.010185919
Training accuracy: 1.000
Epoch: 023/050 cost: 0.010433348
Training accuracy: 1.000
Epoch: 024/050 cost: 0.009212835
Training accuracy: 1.000
Epoch: 025/050 cost: 0.008935386
Training accuracy: 0.990
Epoch: 026/050 cost: 0.007533159
Training accuracy: 1.000
Epoch: 027/050 cost: 0.008139531
Training accuracy: 1.000
Epoch: 028/050 cost: 0.007539478
Training accuracy: 1.000
Epoch: 029/050 cost: 0.007227370
Training accuracy: 1.000
Epoch: 030/050 cost: 0.007473362
Training accuracy: 1.000
Epoch: 031/050 cost: 0.006676663
Training accuracy: 0.980
Epoch: 032/050 cost: 0.005731412
Training accuracy: 1.000
Epoch: 033/050 cost: 0.005602606
Training accuracy: 0.990
Epoch: 034/050 cost: 0.005471543
Training accuracy: 1.000
Epoch: 035/050 cost: 0.005467167
Training accuracy: 1.000
Epoch: 036/050 cost: 0.006077243
Training accuracy: 1.000
Epoch: 037/050 cost: 0.005801255
Training accuracy: 1.000
Epoch: 038/050 cost: 0.005572326
Training accuracy: 1.000
Epoch: 039/050 cost: 0.005355799
Training accuracy: 1.000
Epoch: 040/050 cost: 0.004890651
Training accuracy: 1.000
Epoch: 041/050 cost: 0.004345889
Training accuracy: 1.000
Epoch: 042/050 cost: 0.004596357
Training accuracy: 1.000
Epoch: 043/050 cost: 0.003729049
Training accuracy: 1.000
Epoch: 044/050 cost: 0.004191519
Training accuracy: 1.000
Epoch: 045/050 cost: 0.004694648
Training accuracy: 1.000
Epoch: 046/050 cost: 0.003776975
Training accuracy: 1.000
Epoch: 047/050 cost: 0.004078514
Training accuracy: 0.990
Epoch: 048/050 cost: 0.003377332
Training accuracy: 1.000
Epoch: 049/050 cost: 0.003732143
Training accuracy: 1.000
Optimization Finished!

test_acc = sess.run(accuracy, feed_dict={x: mnist.test.images, y: mnist.test.labels, dropout_keep_prob:1.})
print ("Training accuracy: %.3f" % (test_acc))

Training accuracy: 0.982

Xavier Init MNIST

results matching ""

No results matching ""