首页 文章

如何使用hyperopt进行Keras深度学习网络的超参数优化?

提问于
浏览
4

我想用keras Build 一个非线性回归模型来预测一个连续变量 . 对于以下模型,如何选择以下超参数?

  • 隐藏层和神经元的数量

  • 辍学率

  • 是否使用BatchNormalization

  • 激活函数超出线性,relu,tanh,sigmoid

  • 在adam,rmsprog,sgd中使用的最佳优化器

Code

def dnn_reg():
    model = Sequential()
    #layer 1
    model.add(Dense(40, input_dim=13, kernel_initializer='normal'))
    model.add(Activation('tanh'))
    model.add(Dropout(0.2))
    #layer 2
    model.add(Dense(30, kernel_initializer='normal'))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    model.add(Dropout(0.4))
    #layer 3
    model.add(Dense(5, kernel_initializer='normal'))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    model.add(Dropout(0.4))

    model.add(Dense(1, kernel_initializer='normal'))
    model.add(Activation('relu'))
    # Compile model
    model.compile(loss='mean_squared_error', optimizer='adam')
    return model

我考虑过随机网格搜索,但我想使用hyperopt,我相信会更快 . 我最初使用https://github.com/maxpumperla/hyperas实现了调优 . Hyperas不使用最新版本的keras . 我怀疑keras正在快速发展,维护者很难使其兼容 . 所以我认为直接使用hyperopt将是一个更好的选择 .

PS:我是超级参数调优和hyperopt的贝叶斯优化新手 .

2 回答

  • 3

    我在Hyperas上取得了很大的成功 . 以下是我学会使其发挥作用的事情 .

    1)从终端(而不是从Ipython笔记本)运行它作为python脚本2)确保您的代码中没有任何注释(Hyperas不喜欢注释!)3)将数据和模型封装在一个功能如hyperas自述文件中所述 .

    下面是一个适用于我的Hyperas脚本示例(按照上面的说明) .

    from __future__ import print_function
    
    from hyperopt import Trials, STATUS_OK, tpe
    from keras.datasets import mnist
    from keras.layers.core import Dense, Dropout, Activation
    from keras.models import Sequential
    from keras.utils import np_utils
    import numpy as np
    from hyperas import optim
    from keras.models import model_from_json
    from keras.models import Sequential
    from keras.layers.core import Dense, Dropout, Activation, Flatten
    from keras.layers.convolutional import Convolution2D, MaxPooling2D
    from keras.optimizers import SGD , Adam
    import tensorflow as tf
    from hyperas.distributions import choice, uniform, conditional
    __author__ = 'JOnathan Hilgart'
    
    
    
    def data():
        """
        Data providing function:
    
        This function is separated from model() so that hyperopt
        won't reload data for each evaluation run.
        """
        import numpy as np
        x = np.load('training_x.npy')
        y = np.load('training_y.npy')
        x_train = x[:15000,:]
        y_train = y[:15000,:]
        x_test = x[15000:,:]
        y_test = y[15000:,:]
        return x_train, y_train, x_test, y_test
    
    
    def model(x_train, y_train, x_test, y_test):
        """
        Model providing function:
    
        Create Keras model with double curly brackets dropped-in as needed.
        Return value has to be a valid python dictionary with two customary keys:
            - loss: Specify a numeric evaluation metric to be minimized
            - status: Just use STATUS_OK and see hyperopt documentation if not feasible
        The last one is optional, though recommended, namely:
            - model: specify the model just created so that we can later use it again.
        """
        model_mlp = Sequential()
        model_mlp.add(Dense({{choice([32, 64,126, 256, 512, 1024])}},
                            activation='relu', input_shape= (2,)))
        model_mlp.add(Dropout({{uniform(0, .5)}}))
        model_mlp.add(Dense({{choice([32, 64, 126, 256, 512, 1024])}}))
        model_mlp.add(Activation({{choice(['relu', 'sigmoid'])}}))
        model_mlp.add(Dropout({{uniform(0, .5)}}))
        model_mlp.add(Dense({{choice([32, 64, 126, 256, 512, 1024])}}))
        model_mlp.add(Activation({{choice(['relu', 'sigmoid'])}}))
        model_mlp.add(Dropout({{uniform(0, .5)}}))
        model_mlp.add(Dense({{choice([32, 64, 126, 256, 512, 1024])}}))
        model_mlp.add(Activation({{choice(['relu', 'sigmoid'])}}))
        model_mlp.add(Dropout({{uniform(0, .5)}}))
        model_mlp.add(Dense(9))
        model_mlp.add(Activation({{choice(['softmax','linear'])}}))
        model_mlp.compile(loss={{choice(['categorical_crossentropy','mse'])}}, metrics=['accuracy'],
                      optimizer={{choice(['rmsprop', 'adam', 'sgd'])}})
    
    
    
        model_mlp.fit(x_train, y_train,
                  batch_size={{choice([16, 32, 64, 128])}},
                  epochs=50,
                  verbose=2,
                  validation_data=(x_test, y_test))
        score, acc = model_mlp.evaluate(x_test, y_test, verbose=0)
        print('Test accuracy:', acc)
        return {'loss': -acc, 'status': STATUS_OK, 'model': model_mlp}
    
        enter code here
    
    if __name__ == '__main__':
        import gc; gc.collect()
    
        with K.get_session(): ## TF session
            best_run, best_model = optim.minimize(model=model,
                                                  data=data,
                                                  algo=tpe.suggest,
                                                  max_evals=2,
                                                  trials=Trials())
            X_train, Y_train, X_test, Y_test = data()
            print("Evalutation of best performing model:")
            print(best_model.evaluate(X_test, Y_test))
            print("Best performing model chosen hyper-parameters:")
            print(best_run)
    

    它由不同的gc序列引起,如果先python收集会话,程序将退出成功,如果python首先收集swig内存(tf_session),程序退出失败 .

    你可以通过以下方式强制python到del session:

    del session
    

    或者如果您使用的是keras,则无法获取会话实例,您可以在代码末尾运行以下代码:

    import gc; gc.collect()
    
  • 9

    这也可以是另一种方法:

    from hyperopt import fmin, tpe, hp, STATUS_OK, Trials
    from sklearn.metrics import roc_auc_score
    import sys
    
    X = []
    y = []
    X_val = []
    y_val = []
    
    space = {'choice': hp.choice('num_layers',
                        [ {'layers':'two', },
                        {'layers':'three',
                        'units3': hp.uniform('units3', 64,1024), 
                        'dropout3': hp.uniform('dropout3', .25,.75)}
                        ]),
    
                'units1': hp.uniform('units1', 64,1024),
                'units2': hp.uniform('units2', 64,1024),
    
                'dropout1': hp.uniform('dropout1', .25,.75),
                'dropout2': hp.uniform('dropout2',  .25,.75),
    
                'batch_size' : hp.uniform('batch_size', 28,128),
    
                'nb_epochs' :  100,
                'optimizer': hp.choice('optimizer',['adadelta','adam','rmsprop']),
                'activation': 'relu'
            }
    
    def f_nn(params):   
        from keras.models import Sequential
        from keras.layers.core import Dense, Dropout, Activation
        from keras.optimizers import Adadelta, Adam, rmsprop
    
        print ('Params testing: ', params)
        model = Sequential()
        model.add(Dense(output_dim=params['units1'], input_dim = X.shape[1])) 
        model.add(Activation(params['activation']))
        model.add(Dropout(params['dropout1']))
    
        model.add(Dense(output_dim=params['units2'], init = "glorot_uniform")) 
        model.add(Activation(params['activation']))
        model.add(Dropout(params['dropout2']))
    
        if params['choice']['layers']== 'three':
            model.add(Dense(output_dim=params['choice']['units3'], init = "glorot_uniform")) 
            model.add(Activation(params['activation']))
            model.add(Dropout(params['choice']['dropout3']))    
    
        model.add(Dense(1))
        model.add(Activation('sigmoid'))
        model.compile(loss='binary_crossentropy', optimizer=params['optimizer'])
    
        model.fit(X, y, nb_epoch=params['nb_epochs'], batch_size=params['batch_size'], verbose = 0)
    
        pred_auc =model.predict_proba(X_val, batch_size = 128, verbose = 0)
        acc = roc_auc_score(y_val, pred_auc)
        print('AUC:', acc)
        sys.stdout.flush() 
        return {'loss': -acc, 'status': STATUS_OK}
    
    
    trials = Trials()
    best = fmin(f_nn, space, algo=tpe.suggest, max_evals=50, trials=trials)
    print 'best: '
    print best
    

    Source

相关问题