TRAIN HANGUL-RNN
import numpy as np
import tensorflow as tf
import collections
import argparse
import time
import os
from six.moves import cPickle
from TextLoader import *
from Hangulpy import *
print ("Packages Imported")
Packages Imported
LOAD DATASET WITH TEXTLOADER
data_dir = "data/nine_dreams"
batch_size = 50
seq_length = 50
data_loader = TextLoader(data_dir, batch_size, seq_length)
loading preprocessed files
VOCAB AND CHARS
vocab_size = data_loader.vocab_size
vocab = data_loader.vocab
chars = data_loader.chars
print ( "type of 'data_loader.vocab' is %s, length is %d"
% (type(data_loader.vocab), len(data_loader.vocab)) )
print ( "type of 'data_loader.chars' is %s, length is %d"
% (type(data_loader.chars), len(data_loader.chars)) )
type of 'data_loader.vocab' is <type 'dict'>, length is 76
type of 'data_loader.chars' is <type 'tuple'>, length is 76
VOCAB: DICTIONARY (CHAR->INDEX)
print (data_loader.vocab)
{u'_': 69, u'6': 59, u':': 57, u'\n': 19, u'4': 67, u'5': 63, u'>': 75, u'!': 52, u' ': 1, u'"': 28, u'\u1d25': 0, u"'": 49, u')': 46, u'(': 45, u'-': 65, u',': 27, u'.': 24, u'\u3131': 7, u'0': 73, u'\u3133': 60, u'\u3132': 29, u'\u3135': 50, u'\u3134': 4, u'\u3137': 13, u'\u3136': 44, u'\u3139': 5, u'\u3138': 32, u'\u313b': 55, u'\u313a': 48, u'\u313c': 54, u'?': 41, u'3': 66, u'\u3141': 12, u'\u3140': 51, u'\u3143': 47, u'\u3142': 17, u'\u3145': 10, u'\u3144': 43, u'\u3147': 2, u'\u3146': 22, u'\u3149': 40, u'\u3148': 15, u'\u314b': 42, u'\u314a': 23, u'\u314d': 31, u'\u314c': 30, u'\u314f': 3, u'\u314e': 14, u'\u3151': 34, u'\u3150': 21, u'\u3153': 11, u'\u3152': 74, u'\u3155': 18, u'\u3154': 20, u'\u3157': 9, u'\u3156': 39, u'\u3159': 53, u'\u3158': 26, u'\u315b': 38, u'\u315a': 33, u'\u315d': 36, u'\u315c': 16, u'\u315f': 35, u'\u315e': 61, u'\u3161': 8, u'\u3160': 37, u'\u3163': 6, u'\u3162': 25, u'\x1a': 72, u'9': 64, u'7': 71, u'2': 62, u'1': 58, u'\u313f': 56, u'\u313e': 70, u'8': 68}
CHARS: LIST (INDEX->CHAR)
print (data_loader.chars)
print (data_loader.chars[0])
(u'\u1d25', u' ', u'\u3147', u'\u314f', u'\u3134', u'\u3139', u'\u3163', u'\u3131', u'\u3161', u'\u3157', u'\u3145', u'\u3153', u'\u3141', u'\u3137', u'\u314e', u'\u3148', u'\u315c', u'\u3142', u'\u3155', u'\n', u'\u3154', u'\u3150', u'\u3146', u'\u314a', u'.', u'\u3162', u'\u3158', u',', u'"', u'\u3132', u'\u314c', u'\u314d', u'\u3138', u'\u315a', u'\u3151', u'\u315f', u'\u315d', u'\u3160', u'\u315b', u'\u3156', u'\u3149', u'?', u'\u314b', u'\u3144', u'\u3136', u'(', u')', u'\u3143', u'\u313a', u"'", u'\u3135', u'\u3140', u'!', u'\u3159', u'\u313c', u'\u313b', u'\u313f', u':', u'1', u'6', u'\u3133', u'\u315e', u'2', u'5', u'9', u'-', u'3', u'4', u'8', u'_', u'\u313e', u'7', u'\x1a', u'0', u'\u3152', u'>')
ᴥ
TRAINING BATCH (IMPORTANT!!)
x, y = data_loader.next_batch()
print ("Type of 'x' is %s. Shape is %s" % (type(x), x.shape,))
print ("x looks like \n%s" % (x))
print
print ("Type of 'y' is %s. Shape is %s" % (type(y), y.shape,))
print ("y looks like \n%s" % (y))
Type of 'x' is <type 'numpy.ndarray'>. Shape is (50, 50)
x looks like
[[ 3 5 0 ..., 3 4 0]
[20 0 1 ..., 13 3 0]
[10 11 2 ..., 1 7 3]
...,
[ 1 17 6 ..., 0 1 7]
[ 0 14 3 ..., 12 3 4]
[ 0 7 3 ..., 1 15 3]]
Type of 'y' is <type 'numpy.ndarray'>. Shape is (50, 50)
y looks like
[[ 5 0 1 ..., 4 0 15]
[ 0 1 7 ..., 3 0 24]
[11 2 0 ..., 7 3 0]
...,
[17 6 0 ..., 1 7 9]
[14 3 0 ..., 3 4 0]
[ 7 3 0 ..., 15 3 2]]
DEFINE A MULTILAYER LSTM NETWORK
rnn_size = 512
num_layers = 3
grad_clip = 5.
vocab_size = data_loader.vocab_size
unitcell = tf.nn.rnn_cell.BasicLSTMCell(rnn_size)
cell = tf.nn.rnn_cell.MultiRNNCell([unitcell] * num_layers)
input_data = tf.placeholder(tf.int32, [batch_size, seq_length])
targets = tf.placeholder(tf.int32, [batch_size, seq_length])
initial_state = cell.zero_state(batch_size, tf.float32)
with tf.variable_scope('rnnlm'):
softmax_w = tf.get_variable("softmax_w", [rnn_size, vocab_size])
softmax_b = tf.get_variable("softmax_b", [vocab_size])
with tf.device("/cpu:0"):
embedding = tf.get_variable("embedding", [vocab_size, rnn_size])
inputs = tf.split(1, seq_length, tf.nn.embedding_lookup(
embedding, input_data))
inputs = [tf.squeeze(input_, [1]) for input_ in inputs]
print ("Network ready")
Network ready
Define functions
outputs, last_state = tf.nn.seq2seq.rnn_decoder(inputs, initial_state
, cell, loop_function=None, scope='rnnlm')
output = tf.reshape(tf.concat(1, outputs), [-1, rnn_size])
logits = tf.nn.xw_plus_b(output, softmax_w, softmax_b)
probs = tf.nn.softmax(logits)
print ("FUNCTIONS READY")
FUNCTIONS READY
DEFINE LOSS FUNCTION
loss = tf.nn.seq2seq.sequence_loss_by_example([logits],
[tf.reshape(targets, [-1])],
[tf.ones([batch_size * seq_length])],
vocab_size)
print ("LOSS FUNCTION")
LOSS FUNCTION
DEFINE COST FUNCTION
cost = tf.reduce_sum(loss) / batch_size / seq_length
lr = tf.Variable(0.0, trainable=False)
tvars = tf.trainable_variables()
grads, _ = tf.clip_by_global_norm(tf.gradients(cost, tvars), grad_clip)
_optm = tf.train.AdamOptimizer(lr)
optm = _optm.apply_gradients(zip(grads, tvars))
final_state = last_state
print ("NETWORK READY")
NETWORK READY
OPTIMIZE NETWORK WITH LR SCHEDULING
num_epochs = 500
save_every = 1000
learning_rate = 0.0002
decay_rate = 0.97
save_dir = 'data/nine_dreams'
sess = tf.Session()
sess.run(tf.initialize_all_variables())
summary_writer = tf.train.SummaryWriter(save_dir
, graph=sess.graph)
saver = tf.train.Saver(tf.all_variables())
for e in range(num_epochs):
sess.run(tf.assign(lr, learning_rate * (decay_rate ** e)))
data_loader.reset_batch_pointer()
state = sess.run(initial_state)
for b in range(data_loader.num_batches):
start = time.time()
x, y = data_loader.next_batch()
feed = {input_data: x, targets: y, initial_state: state}
train_loss, state, _ = sess.run([cost, final_state, optm], feed)
end = time.time()
if b % 100 == 0:
print ("%d/%d (epoch: %d), loss: %.3f, time/batch: %.3f"
% (e * data_loader.num_batches + b
, num_epochs * data_loader.num_batches
, e, train_loss, end - start))
if (e * data_loader.num_batches + b) % save_every == 0:
checkpoint_path = os.path.join(save_dir, 'model.ckpt')
saver.save(sess, checkpoint_path
, global_step = e * data_loader.num_batches + b)
print("model saved to {}".format(checkpoint_path))