我正在尝试训练一个类似于Facenet和Openface的卷积神经网络 . 我的模型灵感来自VGG-16(如下所示) .
问题是,当我在返回输出之前使用 output = tf.nn.l2_normalize(output, 0)
时,精度会显着下降 . 但是, I get near 98 percent accuracy without l2_normalize 描述的开放面模型使用它 .
这些是 output = tf.nn.l2_normalize(output, 0)
的结果:
epoch 0 loss 0.36241 acc 45.59
nr_test_examples 6400
total_batch_test 100
TEST epoch 0 loss 0.20000 acc 48.45
epoch 1 loss 0.20000 acc 48.62
nr_test_examples 6400
total_batch_test 100
TEST epoch 1 loss 0.20000 acc 57.81
epoch 2 loss 0.20000 acc 49.34
nr_test_examples 6400
total_batch_test 100
TEST epoch 2 loss 0.20000 acc 43.75
epoch 3 loss 0.20000 acc 48.97
nr_test_examples 6400
total_batch_test 100
TEST epoch 3 loss 0.20000 acc 53.12
epoch 4 loss 0.20000 acc 48.16
nr_test_examples 6400
total_batch_test 100
TEST epoch 4 loss 0.20000 acc 53.12
epoch 5 loss 0.20000 acc 49.45
nr_test_examples 6400
total_batch_test 100
TEST epoch 5 loss 0.20000 acc 56.25
epoch 6 loss 0.20000 acc 48.75
nr_test_examples 6400
total_batch_test 100
TEST epoch 6 loss 0.20000 acc 53.12
epoch 7 loss 0.20000 acc 48.58
nr_test_examples 6400
EDIT - 这些是没有 tf.nn.l2_normalize(output,0)
的结果
epoch 0 loss 0.20137 acc 56.56
nr_test_examples 6400
total_batch_test 100
TEST epoch 0 loss 0.15097 acc 73.44
epoch 1 loss 0.20044 acc 57.64
nr_test_examples 6400
total_batch_test 100
TEST epoch 1 loss 0.10509 acc 82.81
epoch 2 loss 0.19985 acc 58.14
nr_test_examples 6400
total_batch_test 100
TEST epoch 2 loss 0.09480 acc 78.12
epoch 3 loss 0.19978 acc 58.89
nr_test_examples 6400
total_batch_test 100
TEST epoch 3 loss 0.07886 acc 82.81
epoch 4 loss 0.20060 acc 59.12
nr_test_examples 6400
total_batch_test 100
TEST epoch 4 loss 0.05395 acc 85.94
epoch 5 loss 0.19938 acc 59.39
nr_test_examples 6400
total_batch_test 100
TEST epoch 5 loss 0.07320 acc 87.50
epoch 6 loss 0.20056 acc 59.14
nr_test_examples 6400
total_batch_test 100
Why does this happen? 作为损失函数我正在使用tripletloss(我只考虑损失大于零的三元组) .
def siamese_convnet(x):
w_conv1_1 = tf.get_variable(name='w_conv1_1', initializer=tf.contrib.layers.xavier_initializer(), shape=[3, 3, 1, 64])
w_conv1_2 = tf.get_variable(name='w_conv1_2', initializer=tf.contrib.layers.xavier_initializer(), shape=[3, 3, 64, 64])
w_conv2_1 = tf.get_variable(name='w_conv2_1', initializer=tf.contrib.layers.xavier_initializer(), shape=[3, 3, 64, 128])
w_conv2_2 = tf.get_variable(name='w_conv2_2', initializer=tf.contrib.layers.xavier_initializer(), shape=[3, 3, 128, 128])
w_conv3_1 = tf.get_variable(name='w_conv3_1', initializer=tf.contrib.layers.xavier_initializer(), shape=[3, 3, 128, 256])
w_conv3_2 = tf.get_variable(name='w_conv3_2', initializer=tf.contrib.layers.xavier_initializer(), shape=[3, 3, 256, 256])
w_conv3_3 = tf.get_variable(name='w_conv3_3', initializer=tf.contrib.layers.xavier_initializer(), shape=[3, 3, 256, 256])
w_conv4_1 = tf.get_variable(name='w_conv4_1', initializer=tf.contrib.layers.xavier_initializer(), shape=[3, 3, 256, 512])
w_conv4_2 = tf.get_variable(name='w_conv4_2', initializer=tf.contrib.layers.xavier_initializer(), shape=[3, 3, 512, 512])
w_conv4_3 = tf.get_variable(name='w_conv4_3', initializer=tf.contrib.layers.xavier_initializer(), shape=[1, 1, 512, 512])
w_conv5_1 = tf.get_variable(name='w_conv5_1', initializer=tf.contrib.layers.xavier_initializer(), shape=[3, 3, 512, 512])
w_conv5_2 = tf.get_variable(name='w_conv5_2', initializer=tf.contrib.layers.xavier_initializer(), shape=[3, 3, 512, 512])
w_conv5_3 = tf.get_variable(name='w_conv5_3', initializer=tf.contrib.layers.xavier_initializer(), shape=[1, 1, 512, 512])
w_fc_1 = tf.get_variable(name='w_fc_1', initializer=tf.contrib.layers.xavier_initializer(), shape=[5*5*512, 2048])
w_fc_2 = tf.get_variable(name='w_fc_2', initializer=tf.contrib.layers.xavier_initializer(), shape=[2048, 1024])
w_out = tf.get_variable(name='w_out', initializer=tf.contrib.layers.xavier_initializer(), shape=[1024, 128])
bias_conv1_1 = tf.get_variable(name='bias_conv1_1', initializer=tf.constant(0.01, shape=[64]))
bias_conv1_2 = tf.get_variable(name='bias_conv1_2', initializer=tf.constant(0.01, shape=[64]))
bias_conv2_1 = tf.get_variable(name='bias_conv2_1', initializer=tf.constant(0.01, shape=[128]))
bias_conv2_2 = tf.get_variable(name='bias_conv2_2', initializer=tf.constant(0.01, shape=[128]))
bias_conv3_1 = tf.get_variable(name='bias_conv3_1', initializer=tf.constant(0.01, shape=[256]))
bias_conv3_2 = tf.get_variable(name='bias_conv3_2', initializer=tf.constant(0.01, shape=[256]))
bias_conv3_3 = tf.get_variable(name='bias_conv3_3', initializer=tf.constant(0.01, shape=[256]))
bias_conv4_1 = tf.get_variable(name='bias_conv4_1', initializer=tf.constant(0.01, shape=[512]))
bias_conv4_2 = tf.get_variable(name='bias_conv4_2', initializer=tf.constant(0.01, shape=[512]))
bias_conv4_3 = tf.get_variable(name='bias_conv4_3', initializer=tf.constant(0.01, shape=[512]))
bias_conv5_1 = tf.get_variable(name='bias_conv5_1', initializer=tf.constant(0.01, shape=[512]))
bias_conv5_2 = tf.get_variable(name='bias_conv5_2', initializer=tf.constant(0.01, shape=[512]))
bias_conv5_3 = tf.get_variable(name='bias_conv5_3', initializer=tf.constant(0.01, shape=[512]))
bias_fc_1 = tf.get_variable(name='bias_fc_1', initializer=tf.constant(0.01, shape=[2048]))
bias_fc_2 = tf.get_variable(name='bias_fc_2', initializer=tf.constant(0.01, shape=[1024]))
out = tf.get_variable(name='out', initializer=tf.constant(0.01, shape=[128]))
x = tf.reshape(x , [-1, 160, 160, 1]);
conv1_1 = tf.nn.relu(conv2d(x, w_conv1_1) + bias_conv1_1);
conv1_2= tf.nn.relu(conv2d(conv1_1, w_conv1_2) + bias_conv1_2);
max_pool1 = max_pool(conv1_2);
conv2_1 = tf.nn.relu( conv2d(max_pool1, w_conv2_1) + bias_conv2_1 );
conv2_2 = tf.nn.relu( conv2d(conv2_1, w_conv2_2) + bias_conv2_2 );
max_pool2 = max_pool(conv2_2)
conv3_1 = tf.nn.relu( conv2d(max_pool2, w_conv3_1) + bias_conv3_1 );
conv3_2 = tf.nn.relu( conv2d(conv3_1, w_conv3_2) + bias_conv3_2 );
conv3_3 = tf.nn.relu( conv2d(conv3_2, w_conv3_3) + bias_conv3_3 );
max_pool3 = max_pool(conv3_3)
conv4_1 = tf.nn.relu( conv2d(max_pool3, w_conv4_1) + bias_conv4_1 );
conv4_2 = tf.nn.relu( conv2d(conv4_1, w_conv4_2) + bias_conv4_2 );
conv4_3 = tf.nn.relu( conv2d(conv4_2, w_conv4_3) + bias_conv4_3 );
max_pool4 = max_pool(conv4_3)
conv5_1 = tf.nn.relu( conv2d(max_pool4, w_conv5_1) + bias_conv5_1 );
conv5_2 = tf.nn.relu( conv2d(conv5_1, w_conv5_2) + bias_conv5_2 );
conv5_3 = tf.nn.relu( conv2d(conv5_2, w_conv5_3) + bias_conv5_3 );
max_pool5 = max_pool(conv5_3)
fc_helper = tf.reshape(max_pool5, [-1, 5*5*512]);
fc_1 = tf.nn.relu( tf.matmul(fc_helper, w_fc_1) + bias_fc_1 );
fc_2 = tf.nn.relu( tf.matmul(fc_1, w_fc_2) + bias_fc_2 );
output = tf.matmul(fc_2, w_out) + out
output = tf.nn.l2_normalize(output, 0)
return output
LATER EDIT
在我发送给卷积神经网络之前,我已经对图像进行了规范化,那么为什么需要对输出进行归一化呢?
但是,我使用输出来编码包含面的图像 128 values . 然后我将原始图像与其他人的图像进行比较,并确定谁是原始图像 by using Euclidean distance between the 128 features of each image . 所以我认为对输出进行标准化有助于进行这些比较(计算网络为每个图像生成的特征之间的欧氏距离) .
所以,考虑到这一点,我应该使用 tf.nn.l2_normalize
吗?
def get_opencv_image_casia(self, file): #unde file e calea catre poza
img_helper_1 = cv2.imread(file, cv2.IMREAD_GRAYSCALE)
img_helper_1 = cv2.resize(img_helper_1, (160, 160))
img1 = np.reshape(img_helper_1, (25600))
img1 = np.array(img1, dtype=np.uint8)
img1 = img1.astype('float32')
img1_pos = (img1 - img1.mean()) / (img1.std() + 1e-8)
return (img1_pos, file)