作为调试我的主项目的一部分,我尝试使用以下设置学习单个层神经网络的AND功能 .
class OneLayerNet(object):
def __init__(self, num_examples, num_feats, num_outputs, act_func, threshold, loss_func, optimizer, batch_size=16, epochs=100, eta=0.01, reg_const=0, sparseX=True):
self.batch_size = batch_size
self.epochs = epochs
self.eta = eta
self.reg_const = reg_const
self.sparseX = sparseX
if sparseX:
self.x = tf.sparse_placeholder(tf.float64, name="placeholderx") # num_sents x num_feats
else:
self.x = tf.placeholder(tf.float64, name="placeholderx")
self.y = tf.placeholder(tf.float64, name="placeholdery") # 1 x num_sents
self.w = tf.get_variable("W", shape=[num_feats, num_outputs], initializer=tf.contrib.layers.xavier_initializer(), dtype=tf.float64)
self.b = tf.Variable(tf.zeros([num_outputs], dtype=tf.float64))
self.probs = act_func(self.x, self.w, self.b)
self.loss = loss_func(self.y, self.probs, threshold)
self.optimizer = tf.train.AdamOptimizer(learning_rate=self.eta).minimize(self.loss)
self.session = tf.Session()
self.session.run(tf.global_variables_initializer())
def train(self, x, y, loss_graph_file):
session = self.session
num_batches = y.shape[0] // self.batch_size
loss_vect = []
for epoch in range(self.epochs):
avg_loss = 0
for i in range(num_batches):
if self.sparseX:
batch_x, batch_y = get_sparse_batch(i, [x, y], self.batch_size, y.shape[0])
else:
batch_x, batch_y = get_batch(i, [x, y], self.batch_size, y.shape[0])
_, loss, w, b = session.run([self.optimizer, self.loss, self.w, self.b], {self.x: batch_x, self.y: batch_y})
avg_loss += loss/num_batches
loss_vect.append(avg_loss)
if epoch % 100 == 0 or epoch == self.epochs-1:
print("Epoch {}: loss = {}".format(epoch, avg_loss))
print("Weights: {}".format(w))
print("Bias: {}".format(b))
act_func
是一个返回sigmoid Tensorflow操作的函数, loss_func
是一个返回平方误差Tensorflow操作之和的函数 . 上述设置无法学习AND功能 . 它在2000年的时期内从25%准确度提高到75%准确度 . 将AdamOptimizer替换为GradientDescentOptimizer并没有帮助 .
但是,如果我通过调用以下函数替换AdamOptimizer调用,我可以在AND函数上获得100%的准确性 .
def optimizer(X, W, B, Y, learning_rate):
yHat = tf.sigmoid( tf.add(tf.matmul(X, W), B) ) # 4x1
err = Y - yHat
deltaW = tf.matmul(tf.transpose(X), err ) # have to be 2x1
deltaB = tf.reduce_sum(err, 0) # 4, have to 1x1. sum all the biases? yes
W_ = W + learning_rate * deltaW
B_ = B + learning_rate * deltaB
return tf.group(W.assign(W_), B.assign(B_))
我确定我调用AdamOptimizer的方式有问题,但我不确定是什么 . 如果它有用,我已经包含了下面调用上述功能的完整代码 .
def get_batch(index, tensors, batch_size, nItems):
'''
tensors is list of tensors that have same dimension 0
'''
batch_tensors = []
for tensor in tensors:
end = min((index+1)*batch_size, nItems)
batch_tensors.append(tensor[index*batch_size:end])
return batch_tensors
def get_sparse_batch(index, tensors, batch_size, nItems):
xs, ys = tensors
begin = index * batch_size
end = min((index+1)*batch_size, nItems)
y_b = ys[begin:end]
(inds, vals, dsize) = xs
nInds = inds[(begin <= inds[:,0]) & (inds[:,0] < end)] - np.array([begin, 0])
nVals = vals[:nInds.shape[0]]
nDsize = (end - begin, dsize[1])
x_b = tf.SparseTensorValue(nInds, nVals, nDsize)
return (x_b, y_b)
class OneLayerNet(object):
def __init__(self, num_examples, num_feats, num_outputs, act_func, threshold, loss_func, optimizer, batch_size=16, epochs=100, eta=0.01, reg_const=0, sparseX=True):
self.batch_size = batch_size
self.epochs = epochs
self.eta = eta
self.reg_const = reg_const
self.sparseX = sparseX
if sparseX:
self.x = tf.sparse_placeholder(tf.float64, name="placeholderx") # num_sents x num_feats
else:
self.x = tf.placeholder(tf.float64, name="placeholderx")
self.y = tf.placeholder(tf.float64, name="placeholdery") # 1 x num_sents
self.w = tf.get_variable("W", shape=[num_feats, num_outputs], initializer=tf.contrib.layers.xavier_initializer(), dtype=tf.float64)
self.b = tf.Variable(tf.zeros([num_outputs], dtype=tf.float64))
self.probs = act_func(self.x, self.w, self.b)
self.loss = loss_func(self.y, self.probs, threshold)
ytrans = tf.reshape(self.y, (num_examples, num_outputs))
self.optimizer = optimizers.optimizer(self.x, self.w, self.b, ytrans, self.eta) # optimizer(self.eta, self.loss)
self.session = tf.Session()
self.session.run(tf.global_variables_initializer())
def train(self, x, y, loss_graph_file):
session = self.session
num_batches = y.shape[0] // self.batch_size
loss_vect = []
for epoch in range(self.epochs):
avg_loss = 0
for i in range(num_batches):
if self.sparseX:
batch_x, batch_y = get_sparse_batch(i, [x, y], self.batch_size, y.shape[0])
else:
batch_x, batch_y = get_batch(i, [x, y], self.batch_size, y.shape[0])
_, loss, w, b = session.run([self.optimizer, self.loss, self.w, self.b], {self.x: batch_x, self.y: batch_y})
avg_loss += loss/num_batches
loss_vect.append(avg_loss)
if epoch % 100 == 0 or epoch == self.epochs-1:
print("Epoch {}: loss = {}".format(epoch, avg_loss))
print("Weights: {}".format(w))
print("Bias: {}".format(b))
plt.plot(loss_vect)
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.savefig(loss_graph_file)
def eval(self, x, y, predictions_file):
session = self.session
num_batches = y.shape[0] // self.batch_size
num_correct = 0
with open(predictions_file, 'w') as f:
for i in range(num_batches + 1):
if self.sparseX:
batch_x, batch_y = get_sparse_batch(i, [x, y], self.batch_size, y.shape[0])
else:
batch_x, batch_y = get_batch(i, [x, y], self.batch_size, y.shape[0])
probs = session.run(self.probs, {self.x: batch_x})
predictions = np.transpose(probs >= 0.5)[0]
num_correct += np.sum(np.equal(predictions, batch_y))
for j in range(batch_y.shape[0]):
f.write('{}\t{}\t{}\n'.format(probs[j], int(predictions[j]), batch_y[j]))
accuracy = num_correct/len(y)
return accuracy