TRAIN HANGUL-RNN

# -*- coding: utf-8 -*-
# Import Packages
import numpy as np
import tensorflow as tf
import collections
import argparse
import time
import os
from six.moves import cPickle
from TextLoader import *
from Hangulpy import *
print ("Packages Imported")
Packages Imported

LOAD DATASET WITH TEXTLOADER

data_dir    = "data/nine_dreams"
batch_size  = 50
seq_length  = 50
data_loader = TextLoader(data_dir, batch_size, seq_length)
# This makes "vocab.pkl" and "data.npy" in "data/nine_dreams"   
#  from "data/nine_dreams/input.txt"
loading preprocessed files

VOCAB AND CHARS

vocab_size = data_loader.vocab_size
vocab = data_loader.vocab
chars = data_loader.chars
print ( "type of 'data_loader.vocab' is %s, length is %d" 
       % (type(data_loader.vocab), len(data_loader.vocab)) )
print ( "type of 'data_loader.chars' is %s, length is %d" 
       % (type(data_loader.chars), len(data_loader.chars)) )
type of 'data_loader.vocab' is <type 'dict'>, length is 76
type of 'data_loader.chars' is <type 'tuple'>, length is 76

VOCAB: DICTIONARY (CHAR->INDEX)

print (data_loader.vocab)
{u'_': 69, u'6': 59, u':': 57, u'\n': 19, u'4': 67, u'5': 63, u'>': 75, u'!': 52, u' ': 1, u'"': 28, u'\u1d25': 0, u"'": 49, u')': 46, u'(': 45, u'-': 65, u',': 27, u'.': 24, u'\u3131': 7, u'0': 73, u'\u3133': 60, u'\u3132': 29, u'\u3135': 50, u'\u3134': 4, u'\u3137': 13, u'\u3136': 44, u'\u3139': 5, u'\u3138': 32, u'\u313b': 55, u'\u313a': 48, u'\u313c': 54, u'?': 41, u'3': 66, u'\u3141': 12, u'\u3140': 51, u'\u3143': 47, u'\u3142': 17, u'\u3145': 10, u'\u3144': 43, u'\u3147': 2, u'\u3146': 22, u'\u3149': 40, u'\u3148': 15, u'\u314b': 42, u'\u314a': 23, u'\u314d': 31, u'\u314c': 30, u'\u314f': 3, u'\u314e': 14, u'\u3151': 34, u'\u3150': 21, u'\u3153': 11, u'\u3152': 74, u'\u3155': 18, u'\u3154': 20, u'\u3157': 9, u'\u3156': 39, u'\u3159': 53, u'\u3158': 26, u'\u315b': 38, u'\u315a': 33, u'\u315d': 36, u'\u315c': 16, u'\u315f': 35, u'\u315e': 61, u'\u3161': 8, u'\u3160': 37, u'\u3163': 6, u'\u3162': 25, u'\x1a': 72, u'9': 64, u'7': 71, u'2': 62, u'1': 58, u'\u313f': 56, u'\u313e': 70, u'8': 68}

CHARS: LIST (INDEX->CHAR)

print (data_loader.chars)
# USAGE
print (data_loader.chars[0])
(u'\u1d25', u' ', u'\u3147', u'\u314f', u'\u3134', u'\u3139', u'\u3163', u'\u3131', u'\u3161', u'\u3157', u'\u3145', u'\u3153', u'\u3141', u'\u3137', u'\u314e', u'\u3148', u'\u315c', u'\u3142', u'\u3155', u'\n', u'\u3154', u'\u3150', u'\u3146', u'\u314a', u'.', u'\u3162', u'\u3158', u',', u'"', u'\u3132', u'\u314c', u'\u314d', u'\u3138', u'\u315a', u'\u3151', u'\u315f', u'\u315d', u'\u3160', u'\u315b', u'\u3156', u'\u3149', u'?', u'\u314b', u'\u3144', u'\u3136', u'(', u')', u'\u3143', u'\u313a', u"'", u'\u3135', u'\u3140', u'!', u'\u3159', u'\u313c', u'\u313b', u'\u313f', u':', u'1', u'6', u'\u3133', u'\u315e', u'2', u'5', u'9', u'-', u'3', u'4', u'8', u'_', u'\u313e', u'7', u'\x1a', u'0', u'\u3152', u'>')
ᴥ

TRAINING BATCH (IMPORTANT!!)

x, y = data_loader.next_batch()
print ("Type of 'x' is %s. Shape is %s" % (type(x), x.shape,))
print ("x looks like \n%s" % (x))
print
print ("Type of 'y' is %s. Shape is %s" % (type(y), y.shape,))
print ("y looks like \n%s" % (y))
Type of 'x' is <type 'numpy.ndarray'>. Shape is (50, 50)
x looks like 
[[ 3  5  0 ...,  3  4  0]
 [20  0  1 ..., 13  3  0]
 [10 11  2 ...,  1  7  3]
 ..., 
 [ 1 17  6 ...,  0  1  7]
 [ 0 14  3 ..., 12  3  4]
 [ 0  7  3 ...,  1 15  3]]

Type of 'y' is <type 'numpy.ndarray'>. Shape is (50, 50)
y looks like 
[[ 5  0  1 ...,  4  0 15]
 [ 0  1  7 ...,  3  0 24]
 [11  2  0 ...,  7  3  0]
 ..., 
 [17  6  0 ...,  1  7  9]
 [14  3  0 ...,  3  4  0]
 [ 7  3  0 ..., 15  3  2]]

DEFINE A MULTILAYER LSTM NETWORK

rnn_size   = 512
num_layers = 3
grad_clip  = 5. # <= GRADIENT CLIPPING (PRACTICALLY IMPORTANT)
vocab_size = data_loader.vocab_size

# SELECT RNN CELL (MULTI LAYER LSTM)
unitcell = tf.nn.rnn_cell.BasicLSTMCell(rnn_size)
cell = tf.nn.rnn_cell.MultiRNNCell([unitcell] * num_layers)

# Set paths to the graph
input_data = tf.placeholder(tf.int32, [batch_size, seq_length])
targets    = tf.placeholder(tf.int32, [batch_size, seq_length])
initial_state = cell.zero_state(batch_size, tf.float32)

# Set Network
with tf.variable_scope('rnnlm'):
    softmax_w = tf.get_variable("softmax_w", [rnn_size, vocab_size])
    softmax_b = tf.get_variable("softmax_b", [vocab_size])
    with tf.device("/cpu:0"):
        embedding = tf.get_variable("embedding", [vocab_size, rnn_size])
        inputs = tf.split(1, seq_length, tf.nn.embedding_lookup(
                embedding, input_data))
        inputs = [tf.squeeze(input_, [1]) for input_ in inputs]
print ("Network ready")
Network ready

Define functions

# Output of RNN
outputs, last_state = tf.nn.seq2seq.rnn_decoder(inputs, initial_state
                        , cell, loop_function=None, scope='rnnlm')
output = tf.reshape(tf.concat(1, outputs), [-1, rnn_size])
logits = tf.nn.xw_plus_b(output, softmax_w, softmax_b)

# Next word probability
probs = tf.nn.softmax(logits)
print ("FUNCTIONS READY")
FUNCTIONS READY

DEFINE LOSS FUNCTION

loss = tf.nn.seq2seq.sequence_loss_by_example([logits], # Input
    [tf.reshape(targets, [-1])], # Target
    [tf.ones([batch_size * seq_length])], # Weight
    vocab_size)
print ("LOSS FUNCTION")
LOSS FUNCTION

DEFINE COST FUNCTION

cost = tf.reduce_sum(loss) / batch_size / seq_length

# GRADIENT CLIPPING ! 
lr = tf.Variable(0.0, trainable=False) # <= LEARNING RATE 
tvars = tf.trainable_variables()
grads, _ = tf.clip_by_global_norm(tf.gradients(cost, tvars), grad_clip)
_optm = tf.train.AdamOptimizer(lr)
optm = _optm.apply_gradients(zip(grads, tvars))

final_state = last_state
print ("NETWORK READY")
NETWORK READY

OPTIMIZE NETWORK WITH LR SCHEDULING

num_epochs    = 500
save_every    = 1000
learning_rate = 0.0002
decay_rate    = 0.97

save_dir = 'data/nine_dreams'
sess = tf.Session()
sess.run(tf.initialize_all_variables())
summary_writer = tf.train.SummaryWriter(save_dir
                    , graph=sess.graph)
saver = tf.train.Saver(tf.all_variables())
for e in range(num_epochs): # for all epochs

    # LEARNING RATE SCHEDULING 
    sess.run(tf.assign(lr, learning_rate * (decay_rate ** e)))

    data_loader.reset_batch_pointer()
    state = sess.run(initial_state)
    for b in range(data_loader.num_batches):
        start = time.time()
        x, y = data_loader.next_batch()
        feed = {input_data: x, targets: y, initial_state: state}
        # Train!
        train_loss, state, _ = sess.run([cost, final_state, optm], feed)
        end = time.time()
        # PRINT 
        if b % 100 == 0:
            print ("%d/%d (epoch: %d), loss: %.3f, time/batch: %.3f"  
                   % (e * data_loader.num_batches + b
                    , num_epochs * data_loader.num_batches
                    , e, train_loss, end - start))
        # SAVE MODEL
        if (e * data_loader.num_batches + b) % save_every == 0:
            checkpoint_path = os.path.join(save_dir, 'model.ckpt')
            saver.save(sess, checkpoint_path
                       , global_step = e * data_loader.num_batches + b)
            print("model saved to {}".format(checkpoint_path))
# IT TAKE A LOOOOOOOOT OF TIME

results matching ""

    No results matching ""