PyTorch/[PyTorch 学习笔记] 8.4 手动实现 RNN

PyTorch/[PyTorch 学习笔记] 8.4 手动实现 RNN

本章代码:https://github.com/zhangxiann/PyTorch_Practice/blob/master/lesson8/rnn_demo.py

这篇文章主要介绍了循环神经网络(Recurrent Neural Network),简称 RNN。

RNN 常用于处理不定长输入,常用于 NLP 以及时间序列的任务,这种数据一半具有前后关系。

RNN 网络结构如下:


上图的数据说明如下:

  • \(x_{t}\):时刻 t 的输入,\(shape=(1,57)\),表示 (batch_size, feature_dim)。57 表示词向量的长度。
  • \(s_{t}\):时刻 t 的状态值,\(shape=(1,128)\),表示 (batch_size, hidden_dim)。这个状态值有两个作用:经过一个全连接层得到输出;输入到下一个时刻,影响下一个时刻的状态值。也称为hedden_state,隐藏层状态信息,记录过往时刻的信息。第一个时刻的\(s_{t}\)会初始化为全 0 的向量。
  • \(o_{t}\):时刻 t 的输出,\(shape=(1,18)\),表示 (batch_size, classes)
  • \(U\):linear 层输入\(x_{t}\)的权重参数,\(shape=(57, 128)\),表示 (feature_dim, hidden_dim)
  • \(W\):linear 层状态值\(s_{t-1}\)的权重参数,\(shape=(128,128)\),表示 (hidden_dim, hidden_dim)
  • \(V\):linear 层状态值\(s_{t}\)的权重参数,\(shape=(128,18)\),表示 (hidden_dim, classes)

公式如下:

\(s_{t}=f\left(x_{t} U+s_{t-1} W \right)\)

\(o_{t}=\operatorname{softmax}\left(s_{t} V \right)\)

下面的例子是使用 RNN 实现人人名分类:输入任意长度姓名(字符串),输出姓名来自哪个国家(18 分类任务)。数据来源于:http://download.pytorch.org/tutorial/data.zip

1
2
3
4
# Chou(字符串) -> RNN -> Chinese(分类类别)
for string in [C,h,o,u]:
首先把每个字母转换成 one-hot -> [0,0,...,1,...,0]
y,h=model([0,0,...,1,...,0], h) # h 就是隐藏层的状态信息

这里没有使用 DataLoader 和 Dataset,而是手动构造了数据集的结构,训练数据使用 dict 存储,包括 18 个元素,每个元素是一个 list,存储了 18 个类别的名字列表。label 存放在一个 list 中。在迭代训练过程如下:

  • 首先随机选择 label 和名字,名字转换为 one-hot 的张量,形状为\([length,1,57]\),其中length表示名字的长度,label 也转换为张量,形状为 1。
  • 初始化隐藏层状态信息。
  • 循环把名字中的每个字符的 one-hot 向量输入到 RNN 中。
  • 最后得到 18 分类的 output。
  • 这里没有使用优化器,而是手动进行反向传播更新参数值。

代码如下:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
from io import open
import glob
import unicodedata
import string
import math
import os
import time
import torch.nn as nn
import torch
import random
import matplotlib.pyplot as plt
import torch.utils.data
from common_tools import set_seed
import enviroments

set_seed(1) # 设置随机种子
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device = torch.device("cpu")


# Read a file and split into lines
def readLines(filename):
lines = open(filename, encoding='utf-8').read().strip().split('\n')
return [unicodeToAscii(line) for line in lines]


def unicodeToAscii(s):
return ''.join(
c for c in unicodedata.normalize('NFD', s)
if unicodedata.category(c) != 'Mn'
and c in all_letters)


# Find letter index from all_letters, e.g. "a" = 0
def letterToIndex(letter):
return all_letters.find(letter)


# Just for demonstration, turn a letter into a <1 x n_letters> Tensor
def letterToTensor(letter):
tensor = torch.zeros(1, n_letters)
tensor[0][letterToIndex(letter)] = 1
return tensor


# Turn a line into a <line_length x 1 x n_letters>,
# or an array of one-hot letter vectors
def lineToTensor(line):
tensor = torch.zeros(len(line), 1, n_letters)
for li, letter in enumerate(line):
tensor[li][0][letterToIndex(letter)] = 1
return tensor


def categoryFromOutput(output):
top_n, top_i = output.topk(1)
category_i = top_i[0].item()
return all_categories[category_i], category_i


def randomChoice(l):
return l[random.randint(0, len(l) - 1)]


def randomTrainingExample():
category = randomChoice(all_categories) # 选类别
line = randomChoice(category_lines[category]) # 选一个样本
category_tensor = torch.tensor([all_categories.index(category)], dtype=torch.long)
line_tensor = lineToTensor(line) # str to one-hot
return category, line, category_tensor, line_tensor


def timeSince(since):
now = time.time()
s = now - since
m = math.floor(s / 60)
s -= m * 60
return '%dm %ds' % (m, s)


# Just return an output given a line
def evaluate(line_tensor):
hidden = rnn.initHidden()

for i in range(line_tensor.size()[0]):
output, hidden = rnn(line_tensor[i], hidden)

return output


def predict(input_line, n_predictions=3):
print('\n> %s' % input_line)
with torch.no_grad():
output = evaluate(lineToTensor(input_line))

# Get top N categories
topv, topi = output.topk(n_predictions, 1, True)

for i in range(n_predictions):
value = topv[0][i].item()
category_index = topi[0][i].item()
print('(%.2f) %s' % (value, all_categories[category_index]))


def get_lr(iter, learning_rate):
lr_iter = learning_rate if iter < n_iters else learning_rate*0.1
return lr_iter

class RNN(nn.Module):
def __init__(self, input_size, hidden_size, output_size):
super(RNN, self).__init__()

self.hidden_size = hidden_size

self.u = nn.Linear(input_size, hidden_size)
self.w = nn.Linear(hidden_size, hidden_size)
self.v = nn.Linear(hidden_size, output_size)

self.tanh = nn.Tanh()
self.softmax = nn.LogSoftmax(dim=1)

def forward(self, inputs, hidden):

u_x = self.u(inputs)

hidden = self.w(hidden)
hidden = self.tanh(hidden + u_x)

output = self.softmax(self.v(hidden))

return output, hidden

def initHidden(self):
return torch.zeros(1, self.hidden_size)


def train(category_tensor, line_tensor):
hidden = rnn.initHidden()

rnn.zero_grad()

line_tensor = line_tensor.to(device)
hidden = hidden.to(device)
category_tensor = category_tensor.to(device)

for i in range(line_tensor.size()[0]):
output, hidden = rnn(line_tensor[i], hidden)

loss = criterion(output, category_tensor)
loss.backward()

# Add parameters' gradients to their values, multiplied by learning rate
for p in rnn.parameters():
p.data.add_(-learning_rate, p.grad.data)

return output, loss.item()


if __name__ == "__main__":
# config
path_txt = os.path.join(enviroments.names,"*.txt")
all_letters = string.ascii_letters + " .,;'"
n_letters = len(all_letters) # 52 + 5 字符总数
print_every = 5000
plot_every = 5000
learning_rate = 0.005
n_iters = 200000

# step 1 data
# Build the category_lines dictionary, a list of names per language
category_lines = {}
all_categories = []
for filename in glob.glob(path_txt):
category = os.path.splitext(os.path.basename(filename))[0]
all_categories.append(category)
lines = readLines(filename)
category_lines[category] = lines

n_categories = len(all_categories)

# step 2 model
n_hidden = 128
# rnn = RNN(n_letters, n_hidden, n_categories)
rnn = RNN(n_letters, n_hidden, n_categories)

rnn.to(device)

# step 3 loss
criterion = nn.NLLLoss()

# step 4 optimize by hand

# step 5 iteration
current_loss = 0
all_losses = []
start = time.time()
for iter in range(1, n_iters + 1):
# sample
category, line, category_tensor, line_tensor = randomTrainingExample()

# training
output, loss = train(category_tensor, line_tensor)

current_loss += loss

# Print iter number, loss, name and guess
if iter % print_every == 0:
guess, guess_i = categoryFromOutput(output)
correct = '✓' if guess == category else '✗ (%s)' % category
print('Iter: {:<7} time: {:>8s} loss: {:.4f} name: {:>10s} pred: {:>8s} label: {:>8s}'.format(
iter, timeSince(start), loss, line, guess, correct))

# Add current loss avg to list of losses
if iter % plot_every == 0:
all_losses.append(current_loss / plot_every)
current_loss = 0
path_model = os.path.join(BASE_DIR, "rnn_state_dict.pkl")
torch.save(rnn.state_dict(), path_model)
plt.plot(all_losses)
plt.show()

predict('Yue Tingsong')
predict('Yue tingsong')
predict('yutingsong')

predict('test your name')

参考资料


如果你觉得这篇文章对你有帮助,不妨点个赞,让我有更多动力写出好文章。

我的文章会首发在公众号上,欢迎扫码关注我的公众号张贤同学


评论