# 多层神经网络，Sequential 和 Module

## 多层神经网络

### 理解神经网络

sigmoid 激活函数

$$\sigma(x) = \frac{1}{1 + e^{-x}}$$

tanh 激活函数

$$tanh(x) = 2 \sigma(2x) - 1$$

ReLU 激活函数

$$ReLU(x) = max(0, x)$$

## 为什么要使用激活函数

$$y = w_2 A(w_1 x)$$

$$y = w_2 (w_1 x) = (w_2 w_1) x = \bar{w} x$$

import torch
import numpy as np
from torch import nn
import torch.nn.functional as F

import matplotlib.pyplot as plt
%matplotlib inline

def plot_decision_boundary(model, x, y):
# Set min and max values and give it some padding
x_min, x_max = x[:, 0].min() - 1, x[:, 0].max() + 1
y_min, y_max = x[:, 1].min() - 1, x[:, 1].max() + 1
h = 0.01
# Generate a grid of points with distance h between them
xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
# Predict the function value for the whole grid
Z = model(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)
# Plot the contour and training examples
plt.contourf(xx, yy, Z, cmap=plt.cm.Spectral)
plt.ylabel('x2')
plt.xlabel('x1')
plt.scatter(x[:, 0], x[:, 1], c=y.reshape(-1), s=40, cmap=plt.cm.Spectral)


np.random.seed(1)
m = 400 # 样本数量
N = int(m/2) # 每一类的点的个数
D = 2 # 维度
x = np.zeros((m, D))
y = np.zeros((m, 1), dtype='uint8') # label 向量，0 表示红色，1 表示蓝色
a = 4

for j in range(2):
ix = range(N*j,N*(j+1))
t = np.linspace(j*3.12,(j+1)*3.12,N) + np.random.randn(N)*0.2 # theta
r = a*np.sin(4*t) + np.random.randn(N)*0.2 # radius
x[ix] = np.c_[r*np.sin(t), r*np.cos(t)]
y[ix] = j

plt.scatter(x[:, 0], x[:, 1], c=y.reshape(-1), s=40, cmap=plt.cm.Spectral)

<matplotlib.collections.PathCollection at 0x1170f3908>


x = torch.from_numpy(x).float()
y = torch.from_numpy(y).float()

w = nn.Parameter(torch.randn(2, 1))
b = nn.Parameter(torch.zeros(1))

optimizer = torch.optim.SGD([w, b], 1e-1)

def logistic_regression(x):

criterion = nn.BCEWithLogitsLoss()

for e in range(100):
out = logistic_regression(Variable(x))
loss = criterion(out, Variable(y))
loss.backward()
optimizer.step()
if (e + 1) % 20 == 0:
print('epoch: {}, loss: {}'.format(e+1, loss.data[0]))

epoch: 20, loss: 0.7033562064170837
epoch: 40, loss: 0.6739853024482727
epoch: 60, loss: 0.6731640696525574
epoch: 80, loss: 0.6731465458869934
epoch: 100, loss: 0.6731461882591248

def plot_logistic(x):
x = Variable(torch.from_numpy(x).float())
out = F.sigmoid(logistic_regression(x))
out = (out > 0.5) * 1
return out.data.numpy()

plot_decision_boundary(lambda x: plot_logistic(x), x.numpy(), y.numpy())
plt.title('logistic regression')

Text(0.5,1,'logistic regression')


# 定义两层神经网络的参数
w1 = nn.Parameter(torch.randn(2, 4) * 0.01) # 隐藏层神经元个数 2
b1 = nn.Parameter(torch.zeros(4))

w2 = nn.Parameter(torch.randn(4, 1) * 0.01)
b2 = nn.Parameter(torch.zeros(1))

# 定义模型
def two_network(x):
x1 = torch.mm(x, w1) + b1
x1 = F.tanh(x1) # 使用 PyTorch 自带的 tanh 激活函数
x2 = torch.mm(x1, w2) + b2
return x2

optimizer = torch.optim.SGD([w1, w2, b1, b2], 1.)

criterion = nn.BCEWithLogitsLoss()

# 我们训练 10000 次
for e in range(10000):
out = two_network(Variable(x))
loss = criterion(out, Variable(y))
loss.backward()
optimizer.step()
if (e + 1) % 1000 == 0:
print('epoch: {}, loss: {}'.format(e+1, loss.data[0]))

epoch: 1000, loss: 0.29002276062965393
epoch: 2000, loss: 0.276983380317688
epoch: 3000, loss: 0.26818233728408813
epoch: 4000, loss: 0.2620616555213928
epoch: 5000, loss: 0.2571246325969696
epoch: 6000, loss: 0.23155273497104645
epoch: 7000, loss: 0.2241673469543457
epoch: 8000, loss: 0.220903217792511
epoch: 9000, loss: 0.21872615814208984
epoch: 10000, loss: 0.2170446664094925

def plot_network(x):
x = Variable(torch.from_numpy(x).float())
x1 = torch.mm(x, w1) + b1
x1 = F.tanh(x1)
x2 = torch.mm(x1, w2) + b2
out = F.sigmoid(x2)
out = (out > 0.5) * 1
return out.data.numpy()

plot_decision_boundary(lambda x: plot_network(x), x.numpy(), y.numpy())
plt.title('2 layer network')

Text(0.5,1,'2 layer network')


## Sequential 和 Module

Sequential 允许我们构建序列化的模块，而 Module 是一种更加灵活的模型定义方式，我们下面分别用 Sequential 和 Module 来定义上面的神经网络。

# Sequential
seq_net = nn.Sequential(
nn.Linear(2, 4), # PyTorch 中的线性层，wx + b
nn.Tanh(),
nn.Linear(4, 1)
)

# 序列模块可以通过索引访问每一层

seq_net[0] # 第一层

Linear(in_features=2, out_features=4)

# 打印出第一层的权重

w0 = seq_net[0].weight
print(w0)

Parameter containing:
-0.4964  0.3581
-0.0705  0.4262
0.0601  0.1988
0.6683 -0.4470
[torch.FloatTensor of size 4x2]

# 通过 parameters 可以取得模型的参数
param = seq_net.parameters()

# 定义优化器
optim = torch.optim.SGD(param, 1.)

# 我们训练 10000 次
for e in range(10000):
out = seq_net(Variable(x))
loss = criterion(out, Variable(y))
loss.backward()
optim.step()
if (e + 1) % 1000 == 0:
print('epoch: {}, loss: {}'.format(e+1, loss.data[0]))

epoch: 1000, loss: 0.2839296758174896
epoch: 2000, loss: 0.2716798782348633
epoch: 3000, loss: 0.2647360861301422
epoch: 4000, loss: 0.26001378893852234
epoch: 5000, loss: 0.2566395103931427
epoch: 6000, loss: 0.2541380524635315
epoch: 7000, loss: 0.25222381949424744
epoch: 8000, loss: 0.2507193386554718
epoch: 9000, loss: 0.24951006472110748
epoch: 10000, loss: 0.2485194206237793


def plot_seq(x):
out = F.sigmoid(seq_net(Variable(torch.from_numpy(x).float()))).data.numpy()
out = (out > 0.5) * 1
return out

plot_decision_boundary(lambda x: plot_seq(x), x.numpy(), y.numpy())
plt.title('sequential')

<matplotlib.text.Text at 0x118abf5f8>


# 将参数和模型保存在一起
torch.save(seq_net, 'save_seq_net.pth')


# 读取保存的模型

seq_net1

Sequential(
(0): Linear(in_features=2, out_features=4)
(1): Tanh()
(2): Linear(in_features=4, out_features=1)
)

print(seq_net1[0].weight)

Parameter containing:
-0.5532  -1.9916
0.0446   7.9446
10.3188 -12.9290
10.0688  11.7754
[torch.FloatTensor of size 4x2]


# 保存模型参数
torch.save(seq_net.state_dict(), 'save_seq_net_params.pth')


seq_net2 = nn.Sequential(
nn.Linear(2, 4),
nn.Tanh(),
nn.Linear(4, 1)
)


seq_net2

Sequential(
(0): Linear(in_features=2, out_features=4)
(1): Tanh()
(2): Linear(in_features=4, out_features=1)
)

print(seq_net2[0].weight)

Parameter containing:
-0.5532  -1.9916
0.0446   7.9446
10.3188 -12.9290
10.0688  11.7754
[torch.FloatTensor of size 4x2]


class 网络名字(nn.Module):
def __init__(self, 一些定义的参数):
super(网络名字, self).__init__()
self.layer1 = nn.Linear(num_input, num_hidden)
self.layer2 = nn.Sequential(...)
...

定义需要用的网络层

def forward(self, x): # 定义前向传播
x1 = self.layer1(x)
x2 = self.layer2(x)
x = x1 + x2
...
return x


class module_net(nn.Module):
def __init__(self, num_input, num_hidden, num_output):
super(module_net, self).__init__()
self.layer1 = nn.Linear(num_input, num_hidden)

self.layer2 = nn.Tanh()

self.layer3 = nn.Linear(num_hidden, num_output)

def forward(self, x):
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
return x

mo_net = module_net(2, 4, 1)

# 访问模型中的某层可以直接通过名字

# 第一层
l1 = mo_net.layer1
print(l1)

Linear(in_features=2, out_features=4)

# 打印出第一层的权重
print(l1.weight)

Parameter containing:
0.1492  0.4150
0.3403 -0.4084
-0.3114 -0.0584
0.5668  0.2063
[torch.FloatTensor of size 4x2]

# 定义优化器
optim = torch.optim.SGD(mo_net.parameters(), 1.)

# 我们训练 10000 次
for e in range(10000):
out = mo_net(Variable(x))
loss = criterion(out, Variable(y))
loss.backward()
optim.step()
if (e + 1) % 1000 == 0:
print('epoch: {}, loss: {}'.format(e+1, loss.data[0]))

epoch: 1000, loss: 0.2618132531642914
epoch: 2000, loss: 0.2421271800994873
epoch: 3000, loss: 0.23346386849880219
epoch: 4000, loss: 0.22809192538261414
epoch: 5000, loss: 0.224302738904953
epoch: 6000, loss: 0.2214415818452835
epoch: 7000, loss: 0.21918588876724243
epoch: 8000, loss: 0.21736061573028564
epoch: 9000, loss: 0.21585838496685028
epoch: 10000, loss: 0.21460506319999695

# 保存模型
torch.save(mo_net.state_dict(), 'module_net.pth')


net = nn.Sequential(
nn.Linear(2, 10),
nn.Tanh(),
nn.Linear(10, 10),
nn.Tanh(),
nn.Linear(10, 10),
nn.Tanh(),
nn.Linear(10, 1)
)

optim = torch.optim.SGD(net.parameters(), 0.1)

# 我们训练 20000 次
for e in range(20000):
out = net(Variable(x))
loss = criterion(out, Variable(y))
loss.backward()
optim.step()
if (e + 1) % 1000 == 0:
print('epoch: {}, loss: {}'.format(e+1, loss.data[0]))

epoch: 1000, loss: 0.3165791928768158
epoch: 2000, loss: 0.25367119908332825
epoch: 3000, loss: 0.22129501402378082
epoch: 4000, loss: 0.20364265143871307
epoch: 5000, loss: 0.19186729192733765
epoch: 6000, loss: 0.18199527263641357
epoch: 7000, loss: 0.173702672123909
epoch: 8000, loss: 0.16727975010871887
epoch: 9000, loss: 0.16238373517990112
epoch: 10000, loss: 0.15855807065963745
epoch: 11000, loss: 0.15542374551296234
epoch: 12000, loss: 0.1527201235294342
epoch: 13000, loss: 0.15030623972415924
epoch: 14000, loss: 0.14812862873077393
epoch: 15000, loss: 0.1461697667837143
epoch: 16000, loss: 0.14440736174583435
epoch: 17000, loss: 0.14280635118484497
epoch: 18000, loss: 0.1413293182849884
epoch: 19000, loss: 0.13908402621746063
epoch: 20000, loss: 0.13768813014030457

def plot_net(x):
out = F.sigmoid(net(Variable(torch.from_numpy(x).float()))).data.numpy()
out = (out > 0.5) * 1
return out

plot_decision_boundary(lambda x: plot_net(x), x.numpy(), y.numpy())
plt.title('sequential')

<matplotlib.text.Text at 0x10abaf518>