我正在尝试 Build 一个基于哈希匹配的人体姿势估计的连体神经网络 .

我的previous post中描述了 basic concept and references

我修复了“没有为任何变量提供的渐变”错误,但发现无论训练时间多长,我模型的损失值都没有提高 .

我在第100次,第10000次和第500000次迭代中保存了检查点,并且恢复的模型具有相同的损失值 .


1.Bad loss function design:

设计借鉴了Openpose,这是“距离图”和“标签图”之间的区别 .

与openpose项目不同,“距离图”是通过计算汉明距离到“提示哈希”获得的 . 这个过程包含许多非常规操作和二值化,这可能导致模型无法训练 .

2.Wrong traing loop design:

在培训期间,我发现每个保存的检查点文件都被命名为“XXXX.ckpt.data- 00000-of-00001 ", the XXXX part will change, but the suffix " 00000-of-00001” .

我怀疑我的模型或训练循环有什么问题导致它重复做 first step of training



import tensorflow as tf
import numpy as np
import time
from imageLoader import getPaddedROI,training_data_feeder
import math
import cv2


def truncated_normal_var(name,shape,dtype):
    return(tf.get_variable(name=name, shape=shape, dtype=dtype,     initializer=tf.truncated_normal_initializer(stddev=0.01)))
def zero_var(name,shape,dtype):
    return(tf.get_variable(name=name, shape=shape, dtype=dtype, initializer=tf.constant_initializer(0.0)))

roi_size = 23
image_input_size = 301

#input placeholders
#batch1 hints
inputs_b1h1 = tf.placeholder(tf.float32, ( 16, roi_size, roi_size, 3), name='inputs_b1h1')
#inputs_b1h2 = tf.placeholder(tf.float32, ( 16, roi_size, roi_size, 3), name='inputs_b1h2')

inputs_s = tf.placeholder(tf.float32, (None, image_input_size, image_input_size, 3), name='inputs_s')
labels = tf.placeholder(tf.float32,(16,76,76), name='labels')

#define the model

def paraNet(inputs, inputs_s , ground_truth_labels ):
    with tf.variable_scope('conv'):
        out_l1 = tf.layers.conv2d(inputs, 16, [3, 3],strides=(2, 2), padding ='valid' ,name='para_conv_1')
        out_l1r = tf.nn.relu(out_l1)
        out_l2 = tf.layers.conv2d(out_l1r, 48, [3, 3],strides=(2, 2), padding ='valid' ,name='para_conv_2')
        out_l2r = tf.nn.relu(out_l2)
        out_l3 = tf.layers.conv2d(out_l2r, 96, [5, 5],strides=(1, 1), padding ='valid' ,name='para_conv_3')
        out_l3r = tf.nn.relu(out_l3)
        out_l4 = tf.layers.conv2d(out_l3r, 32, [1, 1],strides=(1, 1), padding ='valid' ,name='para_conv_4')
        hint = tf.squeeze(  tf.sign( tf.sigmoid(out_l4) ) )

    with tf.variable_scope('conv', reuse=tf.AUTO_REUSE ):
        out_2_l1 = tf.layers.conv2d(inputs_s,  16, [3, 3],strides=(2, 2), padding ='same' ,name='para_conv_1')
        out_2_l1r = tf.nn.relu(out_2_l1)
        out_2_l2 = tf.layers.conv2d(out_2_l1r, 48, [3, 3],strides=(2, 2), padding ='same' ,name='para_conv_2')
        out_2_l2r = tf.nn.relu(out_2_l2)
        out_2_l3 = tf.layers.conv2d(out_2_l2r, 96, [5, 5],strides=(1, 1), padding ='same' ,name='para_conv_3')
        out_2_l3r = tf.nn.relu(out_2_l3)
        out_2_l4 = tf.layers.conv2d(out_2_l3r, 32, [1, 1],strides=(1, 1), padding ='same' ,name='para_conv_4')
        sample =tf.sign( tf.sigmoid(out_2_l4))

    map0 = tf.reduce_sum ( tf.abs (tf.subtract( hint[0] , sample ) ) , axis=3 )  
    map1 = tf.reduce_sum ( tf.abs (tf.subtract( hint[1] , sample ) ) , axis=3 )  
    map2 = tf.reduce_sum ( tf.abs (tf.subtract( hint[2] , sample ) ) , axis=3 )  
    map3 = tf.reduce_sum ( tf.abs (tf.subtract( hint[3] , sample ) ) , axis=3 )  
    map4 = tf.reduce_sum ( tf.abs (tf.subtract( hint[4] , sample ) ) , axis=3 )  
    map5 = tf.reduce_sum ( tf.abs (tf.subtract( hint[5] , sample ) ) , axis=3 )  
    map6 = tf.reduce_sum ( tf.abs (tf.subtract( hint[6] , sample ) ) , axis=3 )  
    map7 = tf.reduce_sum ( tf.abs (tf.subtract( hint[7] , sample ) ) , axis=3 )  
    map8 = tf.reduce_sum ( tf.abs (tf.subtract( hint[8] , sample ) ) , axis=3 )  
    map9 = tf.reduce_sum ( tf.abs (tf.subtract( hint[9] , sample ) ) , axis=3 )  
    map10 = tf.reduce_sum ( tf.abs (tf.subtract( hint[10] , sample ) ) , axis=3 )  
    map11 = tf.reduce_sum ( tf.abs (tf.subtract( hint[11] , sample ) ) , axis=3 )  
    map12 = tf.reduce_sum ( tf.abs (tf.subtract( hint[12] , sample ) ) , axis=3 )  
    map13 = tf.reduce_sum ( tf.abs (tf.subtract( hint[13] , sample ) ) , axis=3 )  
    map14 = tf.reduce_sum ( tf.abs (tf.subtract( hint[14] , sample ) ) , axis=3 )  
    map15 = tf.reduce_sum ( tf.abs (tf.subtract( hint[15] , sample ) ) , axis=3 )  

    totoal_map =tf.div( tf.concat([map0, map1, map2, map3, map4, map5, map6, map7,
                               map8, map9, map10,map11,map12, map13, map14, map15], 0) , 64)
    loss = tf.nn.l2_loss( totoal_map -  ground_truth_labels , name = 'loss'  )

    return loss, totoal_map

loss, totoal_map = paraNet(inputs_b1h1, inputs_s, labels)
train_step = tf.train.GradientDescentOptimizer(0.1).minimize(loss)

init =  tf.global_variables_initializer()

saver = tf.train.Saver()

with tf.Session() as sess:    
    #writer = tf.summary.FileWriter("./variable_graph",graph = sess.graph)

    #load image from dataset(train set)
    joint_data_path = "./custom_data.json"
    train_val_path = "./train_val_indices.json"
    imgpath = "./000/"
    input_size = 301
    hint_roi_size = 23

    #saver.restore(sess, "./temp_model/model5.ckpt")

    for i in range(5000):

        #load data
        hintSet01,hintSet02,t_img,t_label_norm = training_data_feeder(joint_data_path, train_val_path, imgpath, input_size, hint_roi_size )
        #Normalize the image pixel values to 0~1
        hintSet01_norm = []
        hintSet02_norm = []

        t_img =[ np.float32(t_img /255.0) ]

        for rois in hintSet01:
            tmp = np.float32(rois / 255.0)
        for rois in hintSet02:
            tmp = np.float32(rois / 255.0)
        loss_val, _ = sess.run([loss, train_step] , 
                      feed_dict = {inputs_s:  t_img, 
                                   inputs_b1h1: hintSet01_norm, 
                                   labels: t_label_norm })
        if i % 50 == 0:

    #save_path = saver.save(sess, "./temp_model/model" + '5' + ".ckpt")

Here是github repo,数据集链接在github here上 .


我借用了“二值化神经网络”中的二值化方法 .


def ste_binarize( value ):
Clip and binarize tensor using the straight through estimator (STE) for the gradient.
The gradient of tf.sign(x) will always be zero thus it use:
g.gradient_override_map({"Sign" : "Identity"}) 
This will replace the gradient of tf.sign(x) with the original gradient of x(which is the Identity). 
g = tf.get_default_graph()

with ops.name_scope("Binarized") as name:
    with g.gradient_override_map({"Sign" : "Identity"}):
        return tf.sign(value)

这个方法在Github上的原始项目中运行良好 . 我用这种方法替换了原始的tf.sign()并再次尝试了训练 . 可悲的是,它没有改变模型的结构并试图记录梯度值 . 如果我取得任何进展,我会更新这篇文章 .

2018.11.22更新:我改变了模型结构并再次训练它 . 这次我发现了一种奇怪的损失 Value 行为:在训练时,损失值实际上在某个点上升并冻结 .


import tensorflow as tf

from tensorflow.python.framework import ops

from tensorflow.python.platform import gfile

from progress.bar import Bar

import numpy as np
import time
from datetime import datetime
import math
import matplotlib.pyplot as plt
import cv2

from imageLoader import getPaddedROI,training_batch_generator


#load image from dataset(train set)
joint_data_path = "./custom_data.json"
train_val_path = "./train_val_indices.json"
imgpath = "./000/"
input_size = 301
hint_roi_size = 23

roi_size = 23
image_input_size = 301
batch_number =10

def truncated_normal_var(name,shape,dtype):
    return(tf.get_variable(name=name, shape=shape, dtype=dtype, initializer=tf.truncated_normal_initializer(stddev=0.01)))
def zero_var(name,shape,dtype):
    return(tf.get_variable(name=name, shape=shape, dtype=dtype, initializer=tf.constant_initializer(0.0)))

def ste_binarize( value ):
Clip and binarize tensor using the straight through estimator (STE) for the gradient.
The gradient of tf.sign(x) will always be zero so it use:
g.gradient_override_map({"Sign" : "Identity"}) 
This will replace the gradient of tf.sign(x) with the original gradient of x(which is the Identity). 
    g = tf.get_default_graph()

    with ops.name_scope("Binarized") as name:
        with g.gradient_override_map({"Sign" : "Identity"}):
            return tf.sign(value)

#define the model
def paraNet(hint_inputs, sample_inputs):
    def paraConv(inputs):
        out_l1 = tf.layers.conv2d(inputs , 16, [3, 3],strides=(2, 2), padding ='valid' ,name='para_conv_1')
        out_l1r = tf.nn.relu(out_l1)
        out_l2 = tf.layers.conv2d(out_l1r, 48, [3, 3],strides=(2, 2), padding ='valid' ,name='para_conv_2')
        out_l2r = tf.nn.relu(out_l2)
        out_l3 = tf.layers.conv2d(out_l2r, 96, [5, 5],strides=(1, 1), padding ='valid' ,name='para_conv_3')
        out_l3r = tf.nn.relu(out_l3)
        out_l4 = tf.layers.conv2d(out_l3r, 32, [1, 1],strides=(1, 1), padding ='valid' ,name='para_conv_4')
        hint =tf.reshape( tf.squeeze(  ste_binarize( out_l4 ) ), [batch_number, 1 , 1 , 32] )
        return hint

with tf.variable_scope('conv'):
    hint00 = paraConv(hint_inputs[:,0,:,:,:])
with tf.variable_scope('conv', reuse= True ):
    hint01 = paraConv(hint_inputs[:,1,:,:,:])
    hint02 = paraConv(hint_inputs[:,2,:,:,:])
    hint03 = paraConv(hint_inputs[:,3,:,:,:])
    hint04 = paraConv(hint_inputs[:,4,:,:,:])
    hint05 = paraConv(hint_inputs[:,5,:,:,:])
    hint06 = paraConv(hint_inputs[:,6,:,:,:])
    hint07 = paraConv(hint_inputs[:,7,:,:,:])
    hint08 = paraConv(hint_inputs[:,8,:,:,:])
    hint09 = paraConv(hint_inputs[:,9,:,:,:])
    hint10 = paraConv(hint_inputs[:,10,:,:,:])
    hint11 = paraConv(hint_inputs[:,11,:,:,:])
    hint12 = paraConv(hint_inputs[:,12,:,:,:])
    hint13 = paraConv(hint_inputs[:,13,:,:,:])
    hint14 = paraConv(hint_inputs[:,14,:,:,:])
    hint15 = paraConv(hint_inputs[:,15,:,:,:]) 

    out_2_l1 = tf.layers.conv2d(sample_inputs,  16, [3, 3],strides=(2, 2), padding ='same' ,name='para_conv_1')
    out_2_l1r = tf.nn.relu(out_2_l1)
    out_2_l2 = tf.layers.conv2d(out_2_l1r, 48, [3, 3],strides=(2, 2), padding ='same' ,name='para_conv_2')
    out_2_l2r = tf.nn.relu(out_2_l2)
    out_2_l3 = tf.layers.conv2d(out_2_l2r, 96, [5, 5],strides=(1, 1), padding ='same' ,name='para_conv_3')
    out_2_l3r = tf.nn.relu(out_2_l3)
    out_2_l4 = tf.layers.conv2d(out_2_l3r, 32, [1, 1],strides=(1, 1), padding ='same' ,name='para_conv_4')
    #sample =tf.sign( tf.sigmoid(out_2_l4))
    sample =ste_binarize( out_2_l4)

    #originalMap = tf.reduce_sum ( tf.abs (tf.subtract( hint00 , sample ) ) , axis=3 )

    map0 = tf.reshape( tf.reduce_sum ( tf.abs (tf.subtract( hint00 , sample ) ) , axis=3 ), [batch_number, 1, 76, 76]  )
    map1 = tf.reshape( tf.reduce_sum ( tf.abs (tf.subtract( hint01 , sample ) ) , axis=3 ), [batch_number, 1, 76, 76]  )  
    map2 = tf.reshape( tf.reduce_sum ( tf.abs (tf.subtract( hint02 , sample ) ) , axis=3 ), [batch_number, 1, 76, 76]  )  
    map3 = tf.reshape( tf.reduce_sum ( tf.abs (tf.subtract( hint03 , sample ) ) , axis=3 ), [batch_number, 1, 76, 76]  )  
    map4 = tf.reshape( tf.reduce_sum ( tf.abs (tf.subtract( hint04 , sample ) ) , axis=3 ), [batch_number, 1, 76, 76]  )  
    map5 = tf.reshape( tf.reduce_sum ( tf.abs (tf.subtract( hint05 , sample ) ) , axis=3 ), [batch_number, 1, 76, 76]  )  
    map6 = tf.reshape( tf.reduce_sum ( tf.abs (tf.subtract( hint06 , sample ) ) , axis=3 ), [batch_number, 1, 76, 76]  )  
    map7 = tf.reshape( tf.reduce_sum ( tf.abs (tf.subtract( hint07 , sample ) ) , axis=3 ), [batch_number, 1, 76, 76]  )  
    map8 = tf.reshape( tf.reduce_sum ( tf.abs (tf.subtract( hint08 , sample ) ) , axis=3 ), [batch_number, 1, 76, 76]  )  
    map9 = tf.reshape( tf.reduce_sum ( tf.abs (tf.subtract( hint09 , sample ) ) , axis=3 ), [batch_number, 1, 76, 76]  )  
    map10 = tf.reshape( tf.reduce_sum ( tf.abs (tf.subtract( hint10 , sample ) ) , axis=3 ), [batch_number, 1, 76, 76]  )  
    map11 = tf.reshape( tf.reduce_sum ( tf.abs (tf.subtract( hint11 , sample ) ) , axis=3 ), [batch_number, 1, 76, 76]  )  
    map12 = tf.reshape( tf.reduce_sum ( tf.abs (tf.subtract( hint12 , sample ) ) , axis=3 ), [batch_number, 1, 76, 76]  )  
    map13 = tf.reshape( tf.reduce_sum ( tf.abs (tf.subtract( hint13 , sample ) ) , axis=3 ), [batch_number, 1, 76, 76]  )  
    map14 = tf.reshape( tf.reduce_sum ( tf.abs (tf.subtract( hint14 , sample ) ) , axis=3 ), [batch_number, 1, 76, 76]  )  
    map15 = tf.reshape( tf.reduce_sum ( tf.abs (tf.subtract( hint15 , sample ) ) , axis=3 ), [batch_number, 1, 76, 76]  )  

    totoal_map =tf.div( tf.concat([map0, map1, map2, map3, map4, map5, map6, map7,
                               map8, map9, map10,map11,map12, map13, map14, map15], 1) , 32)
return totoal_map

inputs_b1h1 = tf.placeholder(tf.float32, ( None, 16, roi_size, roi_size, 3), name='inputs_b1h1')
inputs_s = tf.placeholder(tf.float32, (None, image_input_size, image_input_size, 3), name='inputs_s')
ground_truth_labels = tf.placeholder(tf.float32,(None, 16,76,76), name='labels')

mtotoal_map = paraNet(inputs_b1h1 , inputs_s )
mloss = tf.nn.l2_loss( ground_truth_labels -  mtotoal_map, name = 'loss'  )
train_step = tf.train.GradientDescentOptimizer(0.0001).minimize(mloss)

init =  tf.global_variables_initializer()

saver = tf.train.Saver()

with tf.Session() as sess:
    #writer = tf.summary.FileWriter("./variable_graph",graph = sess.graph)
    #saver.restore(sess, "./temp_model/model7.ckpt")

    loss_per_iteration = []
    for i in range(100):

        #load data
        hintSet01,hintSet02,t_img,t_label_norm = training_batch_generator(joint_data_path, train_val_path, imgpath, input_size, hint_roi_size,batch_number)

        loss_val, _ = sess.run([mloss, train_step] , 
                      feed_dict = {inputs_s:  t_img, 
                                   inputs_b1h1: hintSet01, 
                                   ground_truth_labels: t_label_norm })
        if i % 5 == 0:

    save_path = saver.save(sess, "./temp_model/model" + '7' + ".ckpt")