我在两个数据集上比较Tensorflow和sklearn的性能:

这是我的代码(Python):

from __future__ import print_function

    # Import MNIST data
    from tensorflow.examples.tutorials.mnist import input_data
    mnist = input_data.read_data_sets("/tmp/data/", one_hot=True)

    import tensorflow as tf

    from sklearn.datasets import load_digits
    import numpy as np

    # digits = load_digits()
    # data = digits.data
    # labels = digits.target

    # convert to binary labels
    # y = np.zeros((labels.shape[0],10))
    # y[np.arange(labels.shape[0]),labels] = 1

    x_train = mnist.train.images
    y_train = mnist.train.labels
    x_test = mnist.test.images
    y_test = mnist.test.labels
    n_train = mnist.train.images.shape[0]

    # import pdb;pdb.set_trace()

    # Parameters
    learning_rate = 1e-3
    lambda_val = 1e-5
    training_epochs = 30
    batch_size = 200
    display_step = 1

    # Network Parameters
    n_hidden_1 = 300 # 1st layer number of neurons
    n_input = x_train.shape[1] # MNIST data input (img shape: 28*28)
    n_classes = 10 # MNIST total classes (0-9 digits)

    # tf Graph input
    X = tf.placeholder("float", [None, n_input])
    Y = tf.placeholder("float", [None, n_classes])

    # Store layers weight & bias
    weights = {
        'h1': tf.Variable(tf.random_normal([n_input, n_hidden_1])),
        'out': tf.Variable(tf.random_normal([n_hidden_1, n_classes]))
    }
    biases = {
        'b1': tf.Variable(tf.random_normal([n_hidden_1])),
        'out': tf.Variable(tf.random_normal([n_classes]))
    }


    # Create model
    def multilayer_perceptron(x):
        # Hidden fully connected layer with 256 neurons
        layer_1 = tf.add(tf.matmul(x, weights['h1']), biases['b1'])
        # Activation
        layer_1_relu = tf.nn.relu(layer_1)
        # Output fully connected layer with a neuron for each class
        out_layer = tf.matmul(layer_1_relu, weights['out']) + biases['out']
        return out_layer

    # Construct model
    logits = multilayer_perceptron(X)

    # Define loss and optimizer
    loss_op = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=Y)) + lambda_val*tf.nn.l2_loss(weights['h1']) + lambda_val*tf.nn.l2_loss(weights['out'])
    optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
    train_op = optimizer.minimize(loss_op)

    # Test model
    pred = tf.nn.softmax(logits)  # Apply softmax to logits
    correct_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(Y, 1))
    # Calculate accuracy
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
    # Initializing the variables
    init = tf.global_variables_initializer()

    with tf.Session() as sess:
        sess.run(init)

        # Training cycle
        for epoch in range(training_epochs):
            avg_cost = 0.
            total_batch = int(n_train/batch_size)
            # Loop over all batches
            ptr = 0
            for i in range(total_batch):
                next_ptr = ptr + batch_size
                if next_ptr > len(x_train):
                    next_ptr = len(x_train)
                batch_x, batch_y = x_train[ptr:next_ptr],y_train[ptr:next_ptr]
                ptr += batch_size
                # Run optimization op (backprop) and cost op (to get loss value)
                _, c = sess.run([train_op, loss_op], feed_dict={X: batch_x,
                                                                Y: batch_y})
                # Compute average loss
                avg_cost += c / total_batch
            # Display logs per epoch step
            if epoch % display_step == 0:
                print("Epoch:", '%04d' % (epoch+1), "cost={:.9f}".format(avg_cost))
        print("Optimization Finished!")
        print("Accuracy on training set: ", accuracy.eval({X:x_train,Y:y_train}))
        print("Accuracy on testing set:", accuracy.eval({X: x_test, Y: y_test}))

    print("Experimenting sklearn...")
    # now experiment with sklearn
    from sklearn.datasets import load_digits
    import numpy as np
    from sklearn.neural_network import MLPClassifier
    import time

    # use MLP 
    t_start = time.time()
    print('fitting MLP...')
    clf = MLPClassifier(solver='adam', alpha=1e-5, hidden_layer_sizes=(300,),max_iter=training_epochs)
    clf.fit(x_train,y_train)
    print('fitted MLP in {:.2f} seconds'.format(time.time() - t_start))
    print('predicting...')
    labels_predicted = clf.predict(x_test)
    print('accuracy: {:.2f} %'.format(np.mean(np.argmax(y_test,axis=1) == np.argmax(labels_predicted,axis=1)) * 100))

代码改编自github repository . 对于这个测试,我使用的传统神经网络(MLP)只有一个大小为300的隐藏层 . 以下是两个数据集的结果:

  • sklearn数字:~83%(tensorflow),~90%(sklearn)

  • MNIST:~94%(张量流量),~97%(sklearn)

我对两个库使用相同的模型 . 所有参数(隐藏层数,隐藏单元数,learning_rate,l2正则化常数,训练时期数,批量大小)和优化算法是相同的(Adam优化器,Adam优化器的beta参数,没有动量等) . 我想知道sklearn是否在tensorflow上做了一个神奇的实现?有人可以帮忙解答吗?非常感谢你 .