我已经创建了一个设置为零的张量,除了在[:, - 1,-1,0]的单个条目设置为1e10 . 然后我用随机内核对张量进行卷积 . 当批量大小(例如5)时,[:,0,0,:]的输出为零(因为输入数组为零),但当批量大(例如100)时,这些条目包含数字明显大于零 . 这只发生在GPU上 .

import tensorflow as tf
import numpy as np


def test(sess):
    w = np.random.uniform(size=(5,5,64,64)).astype(np.float32)
    x_t = tf.placeholder(dtype=tf.float32, shape=(None, 32, 32, 64))
    w_t = tf.constant(w)
    o_t = tf.nn.conv2d(x_t, w_t, [1,1,1,1], 'VALID', data_format='NHWC')
    x_good = np.zeros((5,32,32, 64), dtype=np.float32)
    x_good[:,-1,-1,0] = 1e10
    x_bad = np.zeros((100,32,32, 64), dtype=np.float32)
    x_bad[:,-1,-1,0] = 1e10
    o_good = sess.run(o_t, feed_dict={x_t : x_good})
    o_bad = sess.run(o_t, feed_dict={x_t : x_bad})
    print('Number nonzero (good): ', np.count_nonzero(o_good[:,0,0,:]))
    print('Number nonzero (bad): ', np.count_nonzero(o_bad[:,0,0,:]))
    if np.count_nonzero(o_bad[:,0,0,:]) > 0:
        print(o_bad[0,0,0,:])

with tf.Session() as sess:
    with tf.device('cpu:0'):
        print('Testing on cpu -- should succeed')
        test(sess)
    with tf.device('gpu:0'):
        print('Testing on gpu -- typically fails')
        test(sess)