7.1 RNN in TensorFlow - TimeSeries Data

import math
import os

import numpy as np
np.random.seed(123)
print("NumPy:{}".format(np.__version__))

import pandas as pd
print("Pandas:{}".format(pd.__version__))

import sklearn as sk
from sklearn import preprocessing as skpp
print("sklearn:{}".format(sk.__version__))

import matplotlib as mpl
import matplotlib.pyplot as plt
mpl.rcParams.update({'font.size': 20,
                     'figure.figsize': [15,10] 
                    })
print("Matplotlib:{}".format(mpl.__version__))

import tensorflow as tf
tf.set_random_seed(123)
print("TensorFlow:{}".format(tf.__version__))
NumPy:1.13.1
Pandas:0.21.0
sklearn:0.19.1
Matplotlib:2.1.0
TensorFlow:1.4.1
DATASETSLIB_HOME = '../datasetslib'
import sys
if not DATASETSLIB_HOME in sys.path:
    sys.path.append(DATASETSLIB_HOME)
%reload_ext autoreload
%autoreload 2
import datasetslib

from datasetslib import util as dsu
datasetslib.datasets_root = os.path.join(os.path.expanduser('~'),'datasets')

Read and pre-process the dataset

filepath = os.path.join(datasetslib.datasets_root,
                        'ts-data',
                        'international-airline-passengers-cleaned.csv'
                       ) 
dataframe = pd.read_csv(filepath,
                        usecols=[1],
                        header=0)
dataset = dataframe.values
dataset = dataset.astype(np.float32)
plt.plot(dataset,label='Original Data')
plt.legend()
plt.xlabel('Timesteps')
plt.ylabel('Total Passengers')
plt.show()

png

# normalize the dataset
scaler = skpp.MinMaxScaler(feature_range=(0, 1))
normalized_dataset = scaler.fit_transform(dataset)
# split into train and test sets
train,test=dsu.train_test_split(normalized_dataset,train_size=0.67)
# convert into supervised learning set of input data and label
n_x=1
n_y=1
X_train, Y_train, X_test, Y_test = dsu.mvts_to_xy(train,test,n_x=n_x,n_y=n_y)

TensorFlow SimpleRNN for TimeSeries Data

state_size = 4
n_epochs = 100
n_timesteps = n_x   # number of time steps
n_x_vars = 1  # number of x variables
n_y_vars = 1  # number of y variables
learning_rate = 0.1
tf.reset_default_graph()
X_p = tf.placeholder(tf.float32, [None, n_timesteps, n_x_vars], name='X_p') 
# (batch_size, n_steps, n_input)
# n_steps tensors list of shape (batch_size, n_input)
Y_p = tf.placeholder(tf.float32, [None, n_timesteps, n_y_vars], name='Y_p')

# make a list of tensors of length n_x
rnn_inputs = tf.unstack(X_p,axis=1)
cell = tf.nn.rnn_cell.BasicRNNCell(state_size)
rnn_outputs, final_state = tf.nn.static_rnn(cell, 
                                            rnn_inputs,
                                            dtype=tf.float32
                                           )
W = tf.get_variable('W', [state_size, n_y_vars])
b = tf.get_variable('b', [n_y_vars], initializer=tf.constant_initializer(0.0))
predictions = [tf.matmul(rnn_output, W) + b for rnn_output in rnn_outputs]
print(predictions)
[<tf.Tensor 'add:0' shape=(?, 1) dtype=float32>]
y_as_list = tf.unstack(Y_p, num=n_timesteps, axis=1)
print(y_as_list)
[<tf.Tensor 'unstack_1:0' shape=(?, 1) dtype=float32>]
mse = tf.losses.mean_squared_error
losses = [mse(labels=label, predictions=prediction) 
          for prediction, label in zip(predictions, y_as_list)
         ]
total_loss = tf.reduce_mean(losses)
optimizer = tf.train.AdagradOptimizer(learning_rate).minimize(total_loss)
with tf.Session() as tfs:
    tfs.run(tf.global_variables_initializer())
    epoch_loss = 0.0
    for epoch in range(n_epochs):
        feed_dict={X_p: X_train.reshape(-1, n_timesteps, 
                                        n_x_vars), 
                   Y_p: Y_train.reshape(-1, n_timesteps, 
                                        n_x_vars)
                  }
        epoch_loss,y_train_pred,_=tfs.run([total_loss,predictions,optimizer], 
                                            feed_dict=feed_dict
                                           )
    print("train mse = {}".format(epoch_loss))
    feed_dict={X_p: X_test.reshape(-1, n_timesteps, 
                                    n_x_vars), 
               Y_p: Y_test.reshape(-1, n_timesteps, 
                                    n_y_vars)
              }
    test_loss, y_test_pred = tfs.run([total_loss,predictions], 
                                     feed_dict=feed_dict
                                    )
    print('test mse = {}'.format(test_loss))
    print('test rmse = {}'.format(math.sqrt(test_loss)))
train mse = 0.002138530369848013
test mse = 0.014468207024037838
test rmse = 0.12028386019760855
y_train_pred=y_train_pred[0]
y_test_pred=y_test_pred[0]
#invert predictions
y_train_pred = scaler.inverse_transform(y_train_pred)
y_test_pred = scaler.inverse_transform(y_test_pred)

#invert originals
y_train_orig = scaler.inverse_transform(Y_train)
y_test_orig = scaler.inverse_transform(Y_test)
# shift train predictions for plotting
trainPredictPlot = np.empty_like(dataset)
trainPredictPlot[:, :] = np.nan
trainPredictPlot[n_x-1:len(y_train_pred)+n_x-1, :] = y_train_pred
# shift test predictions for plotting
testPredictPlot = np.empty_like(dataset)
testPredictPlot[:, :] = np.nan
testPredictPlot[len(y_train_pred)+(n_x*2)-1:len(dataset)-1, :] = y_test_pred
# plot baseline and predictions
plt.plot(dataset,label='Original Data')
plt.plot(trainPredictPlot,label='y_train_pred')
plt.plot(testPredictPlot,label='y_test_pred')
plt.legend()
plt.xlabel('Timesteps')
plt.ylabel('Total Passengers')
plt.show()

png

TensorFlow LSTM for TimeSeries Data

n_epochs = 600
n_timesteps = n_x   # number of time steps
n_x_vars = 1  # number of x variables
n_y_vars = 1  # number of y variables
learning_rate = 0.1

tf.reset_default_graph()
X_p = tf.placeholder(tf.float32, [None, n_timesteps, n_x_vars], name='X_p') 
Y_p = tf.placeholder(tf.float32, [None, n_timesteps, n_y_vars], name='Y_p')

# make a list of tensors of length n_x
rnn_inputs = tf.unstack(X_p,axis=1)

cell = tf.nn.rnn_cell.LSTMCell(state_size)
rnn_outputs, final_state = tf.nn.static_rnn(cell, rnn_inputs,dtype=tf.float32)

W = tf.get_variable('W', [state_size, n_y_vars])
b = tf.get_variable('b', [n_y_vars], initializer=tf.constant_initializer(0.0))

predictions = [tf.matmul(rnn_output, W) + b for rnn_output in rnn_outputs]

y_as_list = tf.unstack(Y_p, num=n_timesteps, axis=1)

mse = tf.losses.mean_squared_error
losses = [mse(labels=label, predictions=prediction) for 
          prediction, label in zip(predictions, y_as_list)
         ]
total_loss = tf.reduce_mean(losses)
optimizer = tf.train.AdagradOptimizer(learning_rate).minimize(total_loss)

with tf.Session() as tfs:
    tfs.run(tf.global_variables_initializer())
    epoch_loss = 0.0
    for epoch in range(n_epochs):
        feed_dict={X_p: X_train.reshape(-1, n_timesteps, 
                                        n_x_vars), 
                   Y_p: Y_train.reshape(-1, n_timesteps, 
                                        n_x_vars)
                  }
        epoch_loss,y_train_pred,_=tfs.run([total_loss,
                                           predictions,
                                           optimizer], 
                                          feed_dict=feed_dict
                                         )
    print("train mse = {}".format(epoch_loss))
    feed_dict={X_p: X_test.reshape(-1, n_timesteps, 
                                    n_x_vars), 
               Y_p: Y_test.reshape(-1, n_timesteps, 
                                    n_y_vars)
              }
    test_loss, y_test_pred = tfs.run([total_loss,predictions], 
                                feed_dict=feed_dict)

    print('test mse = {}'.format(test_loss))
    print('test rmse = {}'.format(math.sqrt(test_loss)))

y_train_pred=y_train_pred[0]
y_test_pred=y_test_pred[0]

#invert predictions
y_train_pred = scaler.inverse_transform(y_train_pred)
y_test_pred = scaler.inverse_transform(y_test_pred)

#invert originals
y_train_orig = scaler.inverse_transform(Y_train)
y_test_orig = scaler.inverse_transform(Y_test)

# shift train predictions for plotting
trainPredictPlot = np.empty_like(dataset)
trainPredictPlot[:, :] = np.nan
trainPredictPlot[n_x-1:len(y_train_pred)+n_x-1, :] = y_train_pred
# shift test predictions for plotting
testPredictPlot = np.empty_like(dataset)
testPredictPlot[:, :] = np.nan
testPredictPlot[len(y_train_pred)+(n_x*2)-1:len(dataset)-1, :]=y_test_pred
# plot baseline and predictions
plt.plot(dataset,label='Original Data')
plt.plot(trainPredictPlot,label='y_train_pred')
plt.plot(testPredictPlot,label='y_test_pred')
plt.legend()
plt.xlabel('Timesteps')
plt.ylabel('Total Passengers')
plt.show()
train mse = 0.0019662242848426104
test mse = 0.012980078347027302
test rmse = 0.11393014678752636

png

TensorFlow GRU for TimeSeries Data

state_size = 4
n_epochs = 600
n_timesteps = n_x   # number of time steps
n_x_vars = 1  # number of x variables
n_y_vars = 1  # number of y variables
learning_rate = 0.1

tf.reset_default_graph()
X_p = tf.placeholder(tf.float32, [None, n_timesteps, n_x_vars], name='X_p') 
Y_p = tf.placeholder(tf.float32, [None, n_timesteps, n_y_vars], name='Y_p')

# make a list of tensors of length n_x
rnn_inputs = tf.unstack(X_p,axis=1)

cell = tf.nn.rnn_cell.GRUCell(state_size)
rnn_outputs, final_state = tf.nn.static_rnn(cell, rnn_inputs,dtype=tf.float32)

W = tf.get_variable('W', [state_size, n_y_vars])
b = tf.get_variable('b', [n_y_vars], initializer=tf.constant_initializer(0.0))

predictions = [tf.matmul(rnn_output, W) + b for rnn_output in rnn_outputs]

y_as_list = tf.unstack(Y_p, num=n_timesteps, axis=1)

mse = tf.losses.mean_squared_error
losses = [mse(labels=label, predictions=prediction) for 
          prediction, label in zip(predictions, y_as_list)
         ]
total_loss = tf.reduce_mean(losses)
optimizer = tf.train.AdagradOptimizer(learning_rate).minimize(total_loss)

with tf.Session() as tfs:
    tfs.run(tf.global_variables_initializer())
    epoch_loss = 0.0
    for epoch in range(n_epochs):
        feed_dict={X_p: X_train.reshape(-1, 
                               n_timesteps,
                               n_x_vars
                              ),
                   Y_p: Y_train.reshape(-1, 
                               n_timesteps,
                               n_y_vars
                              )
                  }
        epoch_loss,y_train_pred,_ = tfs.run([total_loss,
                                             predictions,
                                             optimizer], 
                                            feed_dict=feed_dict
                                           )
    print("train mse = {}".format(epoch_loss))
    feed_dict={X_p: X_test.reshape(-1, 
                                   n_timesteps,
                                   n_x_vars
                                  ),
               Y_p: Y_test.reshape(-1, 
                                   n_timesteps,
                                   n_y_vars
                                  )
              }
    test_loss, y_test_pred = tfs.run([total_loss,predictions], 
                                feed_dict=feed_dict)

    print('test mse = {}'.format(test_loss))
    print('test rmse = {}'.format(math.sqrt(test_loss)))

y_train_pred=y_train_pred[0]
y_test_pred=y_test_pred[0]

#invert predictions
y_train_pred = scaler.inverse_transform(y_train_pred)
y_test_pred = scaler.inverse_transform(y_test_pred)

#invert originals
y_train_orig = scaler.inverse_transform(Y_train)
y_test_orig = scaler.inverse_transform(Y_test)

# shift train predictions for plotting
trainPredictPlot = np.empty_like(dataset)
trainPredictPlot[:, :] = np.nan
trainPredictPlot[n_x-1:len(y_train_pred)+n_x-1, :] = y_train_pred
# shift test predictions for plotting
testPredictPlot = np.empty_like(dataset)
testPredictPlot[:, :] = np.nan
testPredictPlot[len(y_train_pred)+(n_x*2)-1:len(dataset)-1, :]=y_test_pred
# plot baseline and predictions
plt.plot(dataset,label='Original Data')
plt.plot(trainPredictPlot,label='y_train_pred')
plt.plot(testPredictPlot,label='y_test_pred')
plt.legend()
plt.xlabel('Timesteps')
plt.ylabel('Total Passengers')
plt.show()
train mse = 0.002038003643974662
test mse = 0.015027225948870182
test rmse = 0.12258558621987407

png

results matching ""

    No results matching ""