我建模了一个双层LSTM Keras模型,然后通过输入相同的权重和输入,将第一个LSTM层的输出与LSTM层的简单python实现进行比较 . 批次的第一个序列的结果相似但不相同,并且从第二个序列得到的结果偏差太大 . 下面是我的keras模型:
为了比较Keras模型,我首先创建了一个中间层,其中中间层输出第一层的结果,第一个序列的结果为 print(intermediate_output[0,0]) ,同一批次的第二个序列的 print(intermediate_output[0][1]) ,然后是最后一个序列的 print(intermediate_output[0][127]) .

inputs = Input(shape=(128,9))
f1=LSTM((n_hidden),return_sequences=True,name='lstm1')(inputs)
f2=LSTM((n_hidden), return_sequences=False,name='lstm2')(f1)
 fc=Dense(6,activation='softmax',kernel_regularizer=regularizers.l2(lambda_loss_amount),name='fc')(f2)
    model2 = Model(inputs=inputs, outputs=fc)

layer_name = 'lstm1'
intermediate_layer_model = Model(inputs=model2.input,
                                 outputs=model2.get_layer(layer_name).output)
intermediate_output = intermediate_layer_model.predict(X_single_sequence[0,:,:])
print(intermediate_output[0,0]) # takes input[0][9]
print(intermediate_output[0][1]) # takes input[1][9] and hidden layer output of intermediate_output[0,0]
print(intermediate_output[0][127])

重新实现相同模型的第一层:我定义了 LSTMlayer 函数,它执行相同的计算....之后 weightLSTM 加载保存的权重和 x_t 相同的输入序列,稍后 h_t 包含下一个序列的输出 . intermediate_out 是与LSTM层相对应的函数 .

def sigmoid(x):
    return(1.0/(1.0+np.exp(-x)))
def LSTMlayer(warr,uarr, barr,x_t,h_tm1,c_tm1):
    '''
    c_tm1 = np.array([0,0]).reshape(1,2)
    h_tm1 = np.array([0,0]).reshape(1,2)
    x_t   = np.array([1]).reshape(1,1)

    warr.shape = (nfeature,hunits*4)
    uarr.shape = (hunits,hunits*4)
    barr.shape = (hunits*4,)
    '''

    s_t = (x_t.dot(warr) + h_tm1.dot(uarr) + barr)
    hunit = uarr.shape[0]
    i  = sigmoid(s_t[:,:hunit])
    f  = sigmoid(s_t[:,1*hunit:2*hunit])
    _c = np.tanh(s_t[:,2*hunit:3*hunit])
    o  = sigmoid(s_t[:,3*hunit:])
    c_t = i*_c + f*c_tm1
    h_t = o*np.tanh(c_t)

    return(h_t,c_t)

weightLSTM = model2.layers[1].get_weights()
warr,uarr, barr = weightLSTM
warr.shape,uarr.shape,barr.shape

def intermediate_out(n,warr,uarr,barr,X_test):
    for i in range(0, n+1):
        if i==0:
            c_tm1 = np.array([0]*hunits, dtype=np.float32).reshape(1,32)
            h_tm1 = np.array([0]*hunits, dtype=np.float32).reshape(1,32)
            h_t,ct = LSTMlayer(warr,uarr, barr,X_test[0][0:1][0:9],h_tm1,c_tm1)
        else:
            h_t,ct = LSTMlayer(warr,uarr, barr,X_test[0][i:i+1][0:9],h_t,ct)
    return h_t

# 1st sequence

ht0 = intermediate_out(0,warr,uarr,barr,X_test)

# 2nd sequence

ht1 = intermediate_out(1,warr,uarr,barr,X_test)
# 128th sequence 

ht127 = intermediate_out(127,warr,uarr,barr,X_test)

来自 print(intermediate_output[0,0]) 的keras LSTM层的输出如下:

array([-0.05616369, -0.02299516, -0.00801201,  0.03872827,  0.07286803,
   -0.0081161 ,  0.05235862, -0.02240333,  0.0533984 , -0.08501752,
   -0.04866522,  0.00254417, -0.05269946,  0.05809477, -0.08961852,
    0.03975506,  0.00334282, -0.02813114,  0.01677909, -0.04411673,
   -0.06751891, -0.02771493, -0.03293832,  0.04311397, -0.09430656,
   -0.00269871, -0.07775293, -0.11201388, -0.08271968, -0.07464679,
   -0.03533605, -0.0112953 ], dtype=float32)

我从 print(ht0) 执行的输出是:

array([[-0.05591469, -0.02280132, -0.00797964,  0.03681555,  0.06771626,
    -0.00855897,  0.05160453, -0.02309707,  0.05746563, -0.08988875,
    -0.05093143,  0.00264367, -0.05087904,  0.06033305, -0.0944235 ,
     0.04066657,  0.00344291, -0.02881387,  0.01696692, -0.04101779,
    -0.06718517, -0.02798996, -0.0346873 ,  0.04402719, -0.10021093,
    -0.00276826, -0.08390114, -0.1111543 , -0.08879325, -0.07953986,
    -0.03261982, -0.01175724]], dtype=float32)

来自 print(intermediate_output[0][1]) 的输出:

array([-0.13193817, -0.03231169, -0.02096735,  0.07571879,  0.12657365,
    0.00067896,  0.09008797, -0.05597101,  0.09581321, -0.1696091 ,
   -0.08893952, -0.0352162 , -0.07936387,  0.11100324, -0.19354928,
    0.09691346, -0.0057206 , -0.03619875,  0.05680932, -0.08598096,
   -0.13047703, -0.06360915, -0.05707538,  0.09686109, -0.18573627,
    0.00711019, -0.1934243 , -0.21811798, -0.15629804, -0.17204499,
   -0.07108577,  0.01727455], dtype=float32)

print(ht1)

array([[-1.34333193e-01, -3.36792655e-02, -2.06091907e-02,
     7.15097040e-02,  1.18231244e-01,  7.98894180e-05,
     9.03479978e-02, -5.85013032e-02,  1.06357656e-01,
    -1.82848617e-01, -9.50253978e-02, -3.67032290e-02,
    -7.70251378e-02,  1.16113290e-01, -2.08772928e-01,
     9.89214852e-02, -5.82863577e-03, -3.79538871e-02,
     6.01535551e-02, -7.99121782e-02, -1.31876275e-01,
    -6.66067824e-02, -6.15542643e-02,  9.91254672e-02,
    -2.00229391e-01,  7.51443207e-03, -2.13641390e-01,
    -2.18286291e-01, -1.70858681e-01, -1.88928470e-01,
    -6.49823472e-02,  1.72227081e-02]], dtype=float32)

print(intermediate_output[0][127])

array([-0.46212202,  0.280646  ,  0.514289  , -0.21109435,  0.53513926,
        0.20116206,  0.24579187,  0.10773794, -0.6350403 , -0.0052841 ,
       -0.15971565,  0.00309152,  0.04909453,  0.29789132,  0.24909772,
        0.12323025,  0.15282209,  0.34281147, -0.2948742 ,  0.03674917,
       -0.22213924,  0.17646286, -0.12948939,  0.06568322,  0.04172657,
       -0.28638166, -0.29086435, -0.6872528 , -0.12620741,  0.63395363,
       -0.37212485, -0.6649531 ], dtype=float32)

print(ht127)

array([[-0.47431907,  0.29702517,  0.5428258 , -0.21381126,  0.6053808 ,
         0.22849198,  0.25656056,  0.10378123, -0.6960949 , -0.09966939,
        -0.20533416, -0.01677105,  0.02512029,  0.37508538,  0.35703233,
         0.14703275,  0.24901289,  0.35873395, -0.32249793,  0.04093777,
        -0.20691746,  0.20096642, -0.11741923,  0.06169611,  0.01019177,
        -0.33316574, -0.08499744, -0.6748463 , -0.06659956,  0.71961826,
        -0.4071832 , -0.6804066 ]], dtype=float32)

来自( print(intermediate_output[0,0])print(h_t[0]) )和( print(intermediate_output[0][1])print(h_t1) )的输出类似......但 print(intermediate_output[0][127])print(h_t127) 的输出不相同,并且两种算法都在同一个gpu上实现...

我看到了keras文档,对我来说,似乎我没有做错任何事情......请对此发表评论并让我知道我还缺少什么?