作为调试我的主项目的一部分,我尝试使用以下设置学习单个层神经网络的AND功能 .

class OneLayerNet(object):
    def __init__(self, num_examples, num_feats, num_outputs, act_func, threshold, loss_func, optimizer, batch_size=16, epochs=100, eta=0.01, reg_const=0, sparseX=True):
        self.batch_size = batch_size
        self.epochs = epochs
        self.eta = eta
        self.reg_const = reg_const
        self.sparseX = sparseX

        if sparseX:
            self.x = tf.sparse_placeholder(tf.float64, name="placeholderx") # num_sents x num_feats
        else:
            self.x = tf.placeholder(tf.float64, name="placeholderx")
        self.y = tf.placeholder(tf.float64, name="placeholdery") # 1 x num_sents
        self.w = tf.get_variable("W", shape=[num_feats, num_outputs], initializer=tf.contrib.layers.xavier_initializer(), dtype=tf.float64)
        self.b = tf.Variable(tf.zeros([num_outputs], dtype=tf.float64))

        self.probs = act_func(self.x, self.w, self.b)
        self.loss = loss_func(self.y, self.probs, threshold)
        self.optimizer = tf.train.AdamOptimizer(learning_rate=self.eta).minimize(self.loss)
        self.session = tf.Session()
        self.session.run(tf.global_variables_initializer())

    def train(self, x, y, loss_graph_file):
        session = self.session
        num_batches = y.shape[0] // self.batch_size
        loss_vect = []

        for epoch in range(self.epochs):
            avg_loss = 0
            for i in range(num_batches):
                if self.sparseX:
                    batch_x, batch_y = get_sparse_batch(i, [x, y], self.batch_size, y.shape[0])
                else:
                    batch_x, batch_y = get_batch(i, [x, y], self.batch_size, y.shape[0])
                _, loss, w, b = session.run([self.optimizer, self.loss, self.w, self.b], {self.x: batch_x, self.y: batch_y})
                avg_loss += loss/num_batches

            loss_vect.append(avg_loss)
            if epoch % 100 == 0 or epoch == self.epochs-1:
                print("Epoch {}: loss = {}".format(epoch, avg_loss))
                print("Weights: {}".format(w))
                print("Bias: {}".format(b))

act_func 是一个返回sigmoid Tensorflow操作的函数, loss_func 是一个返回平方误差Tensorflow操作之和的函数 . 上述设置无法学习AND功能 . 它在2000年的时期内从25%准确度提高到75%准确度 . 将AdamOptimizer替换为GradientDescentOptimizer并没有帮助 .

但是,如果我通过调用以下函数替换AdamOptimizer调用,我可以在AND函数上获得100%的准确性 .

def optimizer(X, W, B, Y, learning_rate):
    yHat = tf.sigmoid( tf.add(tf.matmul(X, W), B) ) # 4x1
    err = Y - yHat
    deltaW = tf.matmul(tf.transpose(X), err ) # have to be 2x1
    deltaB = tf.reduce_sum(err, 0) # 4, have to 1x1. sum all the biases? yes
    W_ = W + learning_rate * deltaW
    B_ = B + learning_rate * deltaB
    return tf.group(W.assign(W_), B.assign(B_))

我确定我调用AdamOptimizer的方式有问题,但我不确定是什么 . 如果它有用,我已经包含了下面调用上述功能的完整代码 .

def get_batch(index, tensors, batch_size, nItems):
    '''
    tensors is list of tensors that have same dimension 0
    '''
    batch_tensors = []
    for tensor in tensors:
        end = min((index+1)*batch_size, nItems)
        batch_tensors.append(tensor[index*batch_size:end])
    return batch_tensors

def get_sparse_batch(index, tensors, batch_size, nItems):
    xs, ys = tensors
    begin = index * batch_size
    end = min((index+1)*batch_size, nItems)
    y_b = ys[begin:end]

    (inds, vals, dsize) = xs
    nInds = inds[(begin <= inds[:,0]) & (inds[:,0] < end)] - np.array([begin, 0])
    nVals = vals[:nInds.shape[0]]
    nDsize = (end - begin, dsize[1])
    x_b = tf.SparseTensorValue(nInds, nVals, nDsize)
    return (x_b, y_b)

class OneLayerNet(object):
    def __init__(self, num_examples, num_feats, num_outputs, act_func, threshold, loss_func, optimizer, batch_size=16, epochs=100, eta=0.01, reg_const=0, sparseX=True):
        self.batch_size = batch_size
        self.epochs = epochs
        self.eta = eta
        self.reg_const = reg_const
        self.sparseX = sparseX

        if sparseX:
            self.x = tf.sparse_placeholder(tf.float64, name="placeholderx") # num_sents x num_feats
        else:
            self.x = tf.placeholder(tf.float64, name="placeholderx")
        self.y = tf.placeholder(tf.float64, name="placeholdery") # 1 x num_sents
        self.w = tf.get_variable("W", shape=[num_feats, num_outputs], initializer=tf.contrib.layers.xavier_initializer(), dtype=tf.float64)
        self.b = tf.Variable(tf.zeros([num_outputs], dtype=tf.float64))

        self.probs = act_func(self.x, self.w, self.b)
        self.loss = loss_func(self.y, self.probs, threshold)
        ytrans = tf.reshape(self.y, (num_examples, num_outputs))
        self.optimizer = optimizers.optimizer(self.x, self.w, self.b, ytrans, self.eta) # optimizer(self.eta, self.loss)
        self.session = tf.Session()
        self.session.run(tf.global_variables_initializer())

    def train(self, x, y, loss_graph_file):
        session = self.session
        num_batches = y.shape[0] // self.batch_size
        loss_vect = []

        for epoch in range(self.epochs):
            avg_loss = 0
            for i in range(num_batches):
                if self.sparseX:
                    batch_x, batch_y = get_sparse_batch(i, [x, y], self.batch_size, y.shape[0])
                else:
                    batch_x, batch_y = get_batch(i, [x, y], self.batch_size, y.shape[0])
                _, loss, w, b = session.run([self.optimizer, self.loss, self.w, self.b], {self.x: batch_x, self.y: batch_y})
                avg_loss += loss/num_batches

            loss_vect.append(avg_loss)
            if epoch % 100 == 0 or epoch == self.epochs-1:
                print("Epoch {}: loss = {}".format(epoch, avg_loss))
                print("Weights: {}".format(w))
                print("Bias: {}".format(b))

        plt.plot(loss_vect)
        plt.ylabel('Loss')
        plt.xlabel('Epoch')
        plt.savefig(loss_graph_file)

    def eval(self, x, y, predictions_file):
        session = self.session
        num_batches = y.shape[0] // self.batch_size
        num_correct = 0

        with open(predictions_file, 'w') as f:
            for i in range(num_batches + 1):
                if self.sparseX:
                    batch_x, batch_y = get_sparse_batch(i, [x, y], self.batch_size, y.shape[0])
                else:
                    batch_x, batch_y = get_batch(i, [x, y], self.batch_size, y.shape[0])
                probs = session.run(self.probs, {self.x: batch_x})
                predictions = np.transpose(probs >= 0.5)[0]
                num_correct += np.sum(np.equal(predictions, batch_y))
                for j in range(batch_y.shape[0]):
                    f.write('{}\t{}\t{}\n'.format(probs[j], int(predictions[j]), batch_y[j]))

        accuracy = num_correct/len(y)
        return accuracy