LSTM module

Parameters that

The input parameter list includes:

input_size: Characteristic dimension of input data
hidden_size: Dimension of hidden layer in LSTM
num_layerS: the number of layers of the circulating neural network
bias: Indicates whether to use bias. The default value is True
batch_first: Specifies whether to set batch to the first digit of input data. Output also follows this rule. The default is False
dropoutThe default is 0, which means no dropout
bidirectionalThe default value is false, indicating that bidirectional LSTM is not used

Input data: (h_0,c_0):

inputShape is:(seq_length,batch_size,input_size)The tensor
h_0Shape is:(num_layers*num_directions,batch,hidden_size), which contains the initial hidden state of each sentence in the current batch_size, num_layers is the number of LSTM layers, ifbidirectional=TrueDirections = 1, num_directions=2
c_0andh_0Contains the initial cell state of each sentence in the current batCH_size.h_0.c_0If not, the default is 0

Output data includes output,(h_n,c_n):

output: shape (seq_length batch_size, num_directions * hidden_size),

It contains the output characteristic of the last layer of LSTM (h_T), where T is the length of each sentence in batch_size.

h_n: shape (num_directions * num_layers, Batch,hidden_size)
c_n.shape==h_n.shape
h_nContains the hidden state of the last word of the sentence,c_nContains the cell state of the last word of the sentence, so they are all related to the length of the sentenceseq_lengthHas nothing to do.

Python function verification

import torch


def layer_output(input_data, w_ii, w_hi, b_ii, b_hi, h, fn='sigmoid') :
    output = torch.matmul(w_ii, input_data.T)+b_ii.view(-1.1) + \
        torch.matmul(w_hi, h)+b_hi.view(-1.1)
    if fn == 'sigmoid':
        return torch.sigmoid(output)
    else:
        return torch.tanh(output)


def lstm_output(input_data, hh_weight, ih_weight, hh_bias, ih_bias, hidden_data, current) :
    seq_length, batch, input_num = input_data.shape
    stack, _, hidden = hidden_data.shape
    for i in range(seq_length):
        input_data_0 = input_data[i][:][:]
        w_ii, w_if, w_ig, w_io = ih_weight[:hidden][:], ih_weight[hidden:2 *
                                                                  hidden][:], ih_weight[2*hidden:3*hidden][:], ih_weight[3*hidden:4*hidden][:]
        w_hi, w_hf, w_hg, w_ho = hh_weight[:hidden][:], hh_weight[hidden:2 *
                                                               hidden][:], hh_weight[2*hidden:3*hidden][:], hh_weight[3*hidden:4*hidden][:]
        b_ii, b_if, b_ig, b_io = ih_bias[:hidden], ih_bias[hidden:2 *
                                                           hidden], ih_bias[2*hidden:3*hidden], ih_bias[3*hidden:4*hidden]
        b_hi, b_hf, b_hg, b_ho = hh_bias[:hidden], hh_bias[hidden:2 *
                                                           hidden], hh_bias[2*hidden:3*hidden], hh_bias[3*hidden:4*hidden]

        h = hidden_data.view(-1.1)
        i_t = layer_output(input_data_0, w_ii, w_hi, b_ii, b_hi, h)
        f_t = layer_output(input_data_0, w_if, w_hf, b_if, b_hf, h)
        g_t = layer_output(input_data_0, w_ig, w_hg, b_ig, b_hg, h, 'tanh')

        o_t = layer_output(input_data_0, w_io, w_ho, b_io, b_ho, h)
        c_t = f_t*current.view(-1.1)+i_t*g_t
        h_t = o_t*torch.tanh(c_t)
        input_data_0 = h_t
        hidden_data = h_t
        current = c_t
    output = h_t.resize(stack,batch,hidden)
    return output, (c_t.resize(stack,batch,hidden), h_t.resize(stack,batch,hidden))
def compare(my_data,torch_data) :
    res = torch.sum(my_data-torch_data)
    if res<1e-5:
        print("Verify passed")
    else:
        print("Verify Faied")

input_num = 57
hidden_num = 64
torch.manual_seed(1)
lstm = torch.nn.LSTM(input_size=input_num, hidden_size=hidden_num,
                     num_layers=1, bias=True)

hh_weight, ih_weight = lstm.weight_hh_l0, lstm.weight_ih_l0
hh_bias, ih_bias = lstm.bias_hh_l0, lstm.bias_ih_l0
current = torch.randn(1.1, hidden_num)
hidden = torch.randn(1.1, hidden_num)
input_data = torch.rand(5.1.57)
o_t, (m_c, m_h) = lstm_output(input_data, hh_weight, ih_weight,
                            hh_bias, ih_bias, hidden, current)
# weights_shape = [weights.shape for weights in weights]

# data = torch.ones(size=(1, 1, input_num), dtype=torch.float)
output_res, (hn_res, cn_res) = lstm(
    input_data, (hidden, current))
print("Torch output:", output_res.shape, hn_res.shape, cn_res.shape)
print("My output:", o_t,m_h.shape, m_c.shape)
compare(output_res,o_t)
compare(hn_res,m_h)
compare(cn_res,m_c)


Copy the code

GRU helped structurePyTorch calculation:

import torch
from torch import nn
def dense(input_data,weight,bias) :
    return torch.matmul(weight,input_data.view(-1.1))+bias.view(-1.1)
def my_gru(input_data,hidden,weight_hh_l0,weight_ih_l0,bias_ih_l0,bias_hh_l0) :
    W_ir,W_iz,W_in = weight_ih_l0[:2,:],weight_ih_l0[2:4,:],weight_ih_l0[4:,:]
    b_ir,b_iz,b_in = bias_ih_l0[:2],bias_ih_l0[2:4],bias_ih_l0[4:]
    
    W_hr,W_hz,W_hn = weight_hh_l0[:2,:],weight_hh_l0[2:4,:],weight_hh_l0[4:,:]
    b_hr,b_hz,b_hn = bias_hh_l0[:2],bias_hh_l0[2:4],bias_hh_l0[4:]

    r_t = torch.sigmoid(dense(input_data,W_ir,b_ir)+dense(hidden,W_hr,b_hr))
    z_t = torch.sigmoid(dense(input_data,W_iz,b_iz)+dense(hidden,W_hz,b_hz))
    n_t = torch.tanh(dense(input_data,W_in,b_in)+r_t*dense(hidden,W_hn,b_hn))
    h_t = (1-z_t)*n_t+z_t*hidden.view(-1.1)
    return h_t

gru = nn.GRU(input_size=3,hidden_size=2,num_layers=1)
input_data = torch.randn(1.1.3)
init_hidden = torch.randn(1.1.2)
output, hn = gru(input_data, init_hidden)
weight_hh_l0,weight_ih_l0,bias_ih_l0,bias_hh_l0 = gru.weight_hh_l0,gru.weight_ih_l0,gru.bias_ih_l0,gru.bias_hh_l0
my_output = my_gru(input_data,init_hidden,weight_hh_l0,weight_ih_l0,bias_ih_l0,bias_hh_l0)
print("PyTorch output:{} my output:{}".format(output,my_output))
Copy the code

RNN module

def my_rnn(input_data, weight_ih_l0, weight_hh_l0, bias_ih_l0, bias_hh_l0, h_0):
    input_data_0 = input_data[0][:][:].reshape(1, -1)
    h_0 = h_0.reshape(1, -1)

    h_output_0 = torch.matmul(weight_hh_l0, h_0.T)+bias_hh_l0.reshape(-1, 1) # update hidden_0 ==> output_hidden_0
    h_1 = torch.tanh(torch.matmul(weight_ih_l0, input_data_0.T) +
                          bias_ih_l0.reshape(-1, 1)+h_output_0).T # hidden_0 ==> hidden_1

    input_data_1 = input_data[1][:][:].reshape(1, -1)
    h_output_1 = torch.matmul(weight_hh_l0, h_1.T)+bias_hh_l0.reshape(-1, 1)

    output_2 = torch.tanh(torch.matmul(weight_ih_l0, input_data_1.T) +
                        bias_ih_l0.reshape(-1, 1)+h_output_1).T

    return (h_1, output_2), output_2
Copy the code

mo4tech.com (Moment For Technology) is a global community with thousands techies from across the global hang out!Passionate technologists, be it gadget freaks, tech enthusiasts, coders, technopreneurs, or CIOs, you would find them all here.

PyTorch RNN module parsing

LSTM module

Parameters that

Python function verification

RNN module

PyTorch RNN module parsing

LSTM module

Parameters that

Python function verification

RNN module

Related Posts

Unsupervised learning algorithms in Python

PyTorch source code interpretation cpp_extension: an inside look at the C++/CUDA operator implementation and call flow

Spark Machine Learning Combat (12) – Recommended systems combat