import numpy as np
import tensorflow as tf
import collections
import argparse
import time
import os
from six.moves import cPickle
print ("Packages Imported")
Packages Imported
data_dir = "data/linux_kernel"
save_dir = "data/linux_kernel"
input_file = os.path.join(data_dir, "input.txt")
with open(input_file, "r") as f:
data = f.read()
print ("Text loaded from '%s'" % (input_file))
Text loaded from 'data/linux_kernel/input.txt'
counter = collections.Counter(data)
count_pairs = sorted(counter.items(), key=lambda x: -x[1])
print ("Type of 'counter.items()' is %s and length is %d"
% (type(counter.items()), len(counter.items())))
for i in range(5):
print ("[%d/%d]" % (i, 3)),
print (list(counter.items())[i])
print (" ")
print ("Type of 'count_pairs' is %s and length is %d"
% (type(count_pairs), len(count_pairs)))
for i in range(5):
print ("[%d/%d]" % (i, 3)),
print (count_pairs[i])
Type of 'counter.items()' is <type 'list'> and length is 99
[0/3] (' ', 171222)
[1/3] ('$', 61)
[2/3] ('(', 23412)
[3/3] (',', 17025)
[4/3] ('0', 4322)
Type of 'count_pairs' is <type 'list'> and length is 99
[0/3] (' ', 171222)
[1/3] ('e', 113021)
[2/3] ('t', 102154)
[3/3] ('r', 76185)
[4/3] ('i', 75486)
chars, counts = zip(*count_pairs)
vocab = dict(zip(chars, range(len(chars))))
print ("Type of 'chars' is %s and length is %d"
% (type(chars), len(chars)))
for i in range(5):
print ("[%d/%d]" % (i, 3)),
print ("chars[%d] is '%s'" % (i, chars[i]))
print ("")
print ("Type of 'vocab' is %s and length is %d"
% (type(vocab), len(vocab)))
for i in range(5):
print ("[%d/%d]" % (i, 3)),
print ("vocab['%s'] is %s" % (chars[i], vocab[chars[i]]))
with open(os.path.join(save_dir, 'chars_vocab.pkl'), 'wb') as f:
cPickle.dump((chars, vocab), f)
Type of 'chars' is <type 'tuple'> and length is 99
[0/3] chars[0] is ' '
[1/3] chars[1] is 'e'
[2/3] chars[2] is 't'
[3/3] chars[3] is 'r'
[4/3] chars[4] is 'i'
Type of 'vocab' is <type 'dict'> and length is 99
[0/3] vocab[' '] is 0
[1/3] vocab['e'] is 1
[2/3] vocab['t'] is 2
[3/3] vocab['r'] is 3
[4/3] vocab['i'] is 4
chars[0] converts index to char
vocab['a'] converts char to index
corpus = np.array(list(map(vocab.get, data)))
print ("Type of 'corpus' is %s, shape is %s, and length is %d"
% (type(corpus), corpus.shape, len(corpus)))
check_len = 10
print ("\n'corpus' looks like %s" % (corpus[0:check_len]))
for i in range(check_len):
_wordidx = corpus[i]
print ("[%d/%d] chars[%02d] corresponds to '%s'"
% (i, check_len, _wordidx, chars[_wordidx]))
Type of 'corpus' is <type 'numpy.ndarray'>, shape is (1708871,), and length is 1708871
'corpus' looks like [36 22 7 0 22 0 0 13 4 8]
[0/10] chars[36] corresponds to '/'
[1/10] chars[22] corresponds to '*'
[2/10] chars[07] corresponds to '
'
[3/10] chars[00] corresponds to ' '
[4/10] chars[22] corresponds to '*'
[5/10] chars[00] corresponds to ' '
[6/10] chars[00] corresponds to ' '
[7/10] chars[13] corresponds to 'l'
[8/10] chars[04] corresponds to 'i'
[9/10] chars[08] corresponds to 'n'
batch_size = 50
seq_length = 200
num_batches = int(corpus.size / (batch_size * seq_length))
corpus_reduced = corpus[:(num_batches*batch_size*seq_length)]
xdata = corpus_reduced
ydata = np.copy(xdata)
ydata[:-1] = xdata[1:]
ydata[-1] = xdata[0]
print ('xdata is ... %s and length is %d' % (xdata, xdata.size))
print ('ydata is ... %s and length is %d' % (ydata, xdata.size))
print ("")
xbatches = np.split(xdata.reshape(batch_size, -1), num_batches, 1)
ybatches = np.split(ydata.reshape(batch_size, -1), num_batches, 1)
print ("Type of 'xbatches' is %s and length is %d"
% (type(xbatches), len(xbatches)))
print ("Type of 'ybatches' is %s and length is %d"
% (type(ybatches), len(ybatches)))
print ("")
nbatch = 5
temp = xbatches[0:nbatch]
print ("Type of 'temp' is %s and length is %d"
% (type(temp), len(temp)))
for i in range(nbatch):
temp2 = temp[i]
print ("Type of 'temp[%d]' is %s and shape is %s" % (i, type(temp2), temp2.shape,))
xdata is ... [36 22 7 ..., 11 25 3] and length is 1700000
ydata is ... [22 7 0 ..., 25 3 36] and length is 1700000
Type of 'xbatches' is <type 'list'> and length is 170
Type of 'ybatches' is <type 'list'> and length is 170
Type of 'temp' is <type 'list'> and length is 5
Type of 'temp[0]' is <type 'numpy.ndarray'> and shape is (50, 200)
Type of 'temp[1]' is <type 'numpy.ndarray'> and shape is (50, 200)
Type of 'temp[2]' is <type 'numpy.ndarray'> and shape is (50, 200)
Type of 'temp[3]' is <type 'numpy.ndarray'> and shape is (50, 200)
Type of 'temp[4]' is <type 'numpy.ndarray'> and shape is (50, 200)
Now, we are ready to make our RNN model with seq2seq
vocab_size = len(vocab)
rnn_size = 128
num_layers = 2
grad_clip = 5.
unitcell = tf.nn.rnn_cell.BasicLSTMCell(rnn_size)
cell = tf.nn.rnn_cell.MultiRNNCell([unitcell] * num_layers)
input_data = tf.placeholder(tf.int32, [batch_size, seq_length])
targets = tf.placeholder(tf.int32, [batch_size, seq_length])
istate = cell.zero_state(batch_size, tf.float32)
with tf.variable_scope('rnnlm'):
softmax_w = tf.get_variable("softmax_w", [rnn_size, vocab_size])
softmax_b = tf.get_variable("softmax_b", [vocab_size])
with tf.device("/cpu:0"):
embedding = tf.get_variable("embedding", [vocab_size, rnn_size])
inputs = tf.split(1, seq_length, tf.nn.embedding_lookup(embedding, input_data))
inputs = [tf.squeeze(_input, [1]) for _input in inputs]
def loop(prev, _):
prev = tf.nn.xw_plus_b(prev, softmax_w, softmax_b)
prev_symbol = tf.stop_gradient(tf.argmax(prev, 1))
return tf.nn.embedding_lookup(embedding, prev_symbol)
"""
loop_function: If not None, this function will be applied to the i-th output
in order to generate the i+1-st input, and decoder_inputs will be ignored,
except for the first element ("GO" symbol).
"""
outputs, last_state = tf.nn.seq2seq.rnn_decoder(inputs, istate, cell
, loop_function=None, scope='rnnlm')
output = tf.reshape(tf.concat(1, outputs), [-1, rnn_size])
logits = tf.nn.xw_plus_b(output, softmax_w, softmax_b)
probs = tf.nn.softmax(logits)
loss = tf.nn.seq2seq.sequence_loss_by_example([logits],
[tf.reshape(targets, [-1])],
[tf.ones([batch_size * seq_length])],
vocab_size)
cost = tf.reduce_sum(loss) / batch_size / seq_length
final_state = last_state
lr = tf.Variable(0.0, trainable=False)
tvars = tf.trainable_variables()
grads, _ = tf.clip_by_global_norm(tf.gradients(cost, tvars), grad_clip)
_optm = tf.train.AdamOptimizer(lr)
optm = _optm.apply_gradients(zip(grads, tvars))
print ("Network Ready")
Network Ready
num_epochs = 50
save_every = 500
learning_rate = 0.002
decay_rate = 0.97
sess = tf.Session()
sess.run(tf.initialize_all_variables())
summary_writer = tf.train.SummaryWriter(save_dir, graph=sess.graph)
saver = tf.train.Saver(tf.all_variables())
init_time = time.time()
for epoch in range(num_epochs):
sess.run(tf.assign(lr, learning_rate * (decay_rate ** epoch)))
state = sess.run(istate)
batchidx = 0
for iteration in range(num_batches):
start_time = time.time()
randbatchidx = np.random.randint(num_batches)
xbatch = xbatches[batchidx]
ybatch = ybatches[batchidx]
batchidx = batchidx + 1
train_loss, state, _ = sess.run([cost, final_state, optm]
, feed_dict={input_data: xbatch, targets: ybatch, istate: state})
total_iter = epoch*num_batches + iteration
end_time = time.time();
duration = end_time - start_time
if total_iter % 100 == 0:
print ("[%d/%d] cost: %.4f / Each batch learning took %.4f sec"
% (total_iter, num_epochs*num_batches, train_loss, duration))
if total_iter % save_every == 0:
ckpt_path = os.path.join(save_dir, 'model.ckpt')
saver.save(sess, ckpt_path, global_step = total_iter)
print("model saved to '%s'" % (ckpt_path))
[0/8500] cost: 5.1518 / Each batch learning took 6.2978 sec
model saved to 'data/linux_kernel/model.ckpt'
[100/8500] cost: 3.0705 / Each batch learning took 0.3866 sec
[200/8500] cost: 2.5382 / Each batch learning took 0.3910 sec
[300/8500] cost: 2.3884 / Each batch learning took 0.5311 sec
[400/8500] cost: 2.2029 / Each batch learning took 0.3930 sec
[500/8500] cost: 1.9560 / Each batch learning took 0.5088 sec
model saved to 'data/linux_kernel/model.ckpt'
[600/8500] cost: 1.9134 / Each batch learning took 0.3861 sec
[700/8500] cost: 1.7579 / Each batch learning took 0.5502 sec
[800/8500] cost: 1.7580 / Each batch learning took 0.4546 sec
[900/8500] cost: 1.6952 / Each batch learning took 0.3958 sec
[1000/8500] cost: 1.5991 / Each batch learning took 0.4516 sec
model saved to 'data/linux_kernel/model.ckpt'
[1100/8500] cost: 1.6036 / Each batch learning took 0.3708 sec
[1200/8500] cost: 1.4374 / Each batch learning took 0.4035 sec
[1300/8500] cost: 1.5513 / Each batch learning took 0.4629 sec
[1400/8500] cost: 1.4814 / Each batch learning took 0.5162 sec
[1500/8500] cost: 1.4986 / Each batch learning took 0.4023 sec
model saved to 'data/linux_kernel/model.ckpt'
[1600/8500] cost: 1.4957 / Each batch learning took 0.5584 sec
[1700/8500] cost: 1.4569 / Each batch learning took 0.5504 sec
[1800/8500] cost: 1.3966 / Each batch learning took 0.4409 sec
[1900/8500] cost: 1.3742 / Each batch learning took 0.8715 sec
[2000/8500] cost: 1.4071 / Each batch learning took 0.7707 sec
model saved to 'data/linux_kernel/model.ckpt'
[2100/8500] cost: 1.4037 / Each batch learning took 0.4636 sec
[2200/8500] cost: 1.3220 / Each batch learning took 0.6967 sec
[2300/8500] cost: 1.3267 / Each batch learning took 0.7644 sec
[2400/8500] cost: 1.2870 / Each batch learning took 0.5228 sec
[2500/8500] cost: 1.3171 / Each batch learning took 0.5671 sec
model saved to 'data/linux_kernel/model.ckpt'
[2600/8500] cost: 1.2876 / Each batch learning took 0.5576 sec
[2700/8500] cost: 1.2571 / Each batch learning took 0.4314 sec
[2800/8500] cost: 1.3123 / Each batch learning took 0.5939 sec
[2900/8500] cost: 1.1588 / Each batch learning took 0.6087 sec
[3000/8500] cost: 1.2834 / Each batch learning took 0.5066 sec
model saved to 'data/linux_kernel/model.ckpt'
[3100/8500] cost: 1.2362 / Each batch learning took 0.4319 sec
[3200/8500] cost: 1.2768 / Each batch learning took 0.4418 sec
[3300/8500] cost: 1.2836 / Each batch learning took 0.6158 sec
[3400/8500] cost: 1.2830 / Each batch learning took 0.7412 sec
[3500/8500] cost: 1.2296 / Each batch learning took 0.7596 sec
model saved to 'data/linux_kernel/model.ckpt'
[3600/8500] cost: 1.2142 / Each batch learning took 0.8046 sec
[3700/8500] cost: 1.2474 / Each batch learning took 0.8149 sec
[3800/8500] cost: 1.2455 / Each batch learning took 0.9514 sec
[3900/8500] cost: 1.1910 / Each batch learning took 1.0230 sec
[4000/8500] cost: 1.1874 / Each batch learning took 0.7037 sec
model saved to 'data/linux_kernel/model.ckpt'
[4100/8500] cost: 1.1602 / Each batch learning took 0.6907 sec
[4200/8500] cost: 1.1896 / Each batch learning took 0.6589 sec
[4300/8500] cost: 1.1680 / Each batch learning took 0.6051 sec
[4400/8500] cost: 1.1472 / Each batch learning took 0.4314 sec
[4500/8500] cost: 1.2073 / Each batch learning took 0.7571 sec
model saved to 'data/linux_kernel/model.ckpt'
[4600/8500] cost: 1.0601 / Each batch learning took 0.8487 sec
[4700/8500] cost: 1.1822 / Each batch learning took 0.5197 sec
[4800/8500] cost: 1.1427 / Each batch learning took 0.5184 sec
[4900/8500] cost: 1.1774 / Each batch learning took 0.4620 sec
[5000/8500] cost: 1.1902 / Each batch learning took 0.4941 sec
model saved to 'data/linux_kernel/model.ckpt'
[5100/8500] cost: 1.1960 / Each batch learning took 0.7985 sec
[5200/8500] cost: 1.1568 / Each batch learning took 0.7381 sec
[5300/8500] cost: 1.1487 / Each batch learning took 0.5911 sec
[5400/8500] cost: 1.1710 / Each batch learning took 0.8420 sec
[5500/8500] cost: 1.1684 / Each batch learning took 0.7788 sec
model saved to 'data/linux_kernel/model.ckpt'
[5600/8500] cost: 1.1337 / Each batch learning took 0.7290 sec
[5700/8500] cost: 1.1234 / Each batch learning took 1.0153 sec
[5800/8500] cost: 1.1034 / Each batch learning took 0.7469 sec
[5900/8500] cost: 1.1276 / Each batch learning took 0.7259 sec
[6000/8500] cost: 1.1073 / Each batch learning took 0.7722 sec
model saved to 'data/linux_kernel/model.ckpt'
[6100/8500] cost: 1.0955 / Each batch learning took 0.7700 sec
[6200/8500] cost: 1.1489 / Each batch learning took 0.4165 sec
[6300/8500] cost: 1.0120 / Each batch learning took 0.7359 sec
[6400/8500] cost: 1.1296 / Each batch learning took 0.6871 sec
[6500/8500] cost: 1.0963 / Each batch learning took 0.6530 sec
model saved to 'data/linux_kernel/model.ckpt'
[6600/8500] cost: 1.1259 / Each batch learning took 0.4506 sec
[6700/8500] cost: 1.1422 / Each batch learning took 0.3957 sec
[6800/8500] cost: 1.1431 / Each batch learning took 0.4530 sec
[6900/8500] cost: 1.1168 / Each batch learning took 0.4068 sec
[7000/8500] cost: 1.1119 / Each batch learning took 1.0343 sec
model saved to 'data/linux_kernel/model.ckpt'
[7100/8500] cost: 1.1255 / Each batch learning took 0.4080 sec
[7200/8500] cost: 1.1266 / Each batch learning took 0.3840 sec
[7300/8500] cost: 1.1036 / Each batch learning took 0.8628 sec
[7400/8500] cost: 1.0860 / Each batch learning took 0.4150 sec
[7500/8500] cost: 1.0681 / Each batch learning took 0.4738 sec
model saved to 'data/linux_kernel/model.ckpt'
[7600/8500] cost: 1.0921 / Each batch learning took 0.4141 sec
[7700/8500] cost: 1.0728 / Each batch learning took 0.3944 sec
[7800/8500] cost: 1.0644 / Each batch learning took 0.4473 sec
[7900/8500] cost: 1.1155 / Each batch learning took 0.4841 sec
[8000/8500] cost: 0.9819 / Each batch learning took 0.4198 sec
model saved to 'data/linux_kernel/model.ckpt'
[8100/8500] cost: 1.0945 / Each batch learning took 0.4452 sec
[8200/8500] cost: 1.0682 / Each batch learning took 0.4038 sec
[8300/8500] cost: 1.0939 / Each batch learning took 0.4889 sec
[8400/8500] cost: 1.1111 / Each batch learning took 0.3995 sec
Run the command line
tensorboard --logdir=/tmp/tf_logs/char_rnn_tutorial
print ("Done!! It took %.4f second. " %(time.time() - init_time))
Done!! It took 5238.4040 second.