我正在使用keras(Tensorflow后端)VGG16模型和food-101图像数据集进行食品分类项目 . 但是,我遇到了一些与验证准确性有关的问题 . (我认为问题在于过度拟合) . 我的验证准确性没有增加,总是坚持到48-51%左右我有40个 class (40种不同的食物),700个火车图像和300个图像用于验证每种食物 . 我用一堆随机食物图像评估了我的模型 . 我试过了:

  • 降低学习率

  • 将Dropout Layer更改为0.75

  • 图像增强

虽然它对我有所帮助,但它并没有大大提高验证的准确性 . 我听说有人使用preprocess_input()函数来提高验证的准确性,但我不确定 .

这是我的代码:

import numpy as np
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.layers import Dropout, Flatten, Dense
from keras import applications
from keras.utils import to_categorical
from keras import optimizers

# Dimensions of images
img_width, img_height = 150, 150

top_model_weights_path = 'test2_classes.h5'
train_data_dir = 'D:\intallation\dataset\dataset-101/food/train'
validation_data_dir = 'D:\intallation\dataset\dataset-101/food/validation'
nb_train_samples = 28000 
nb_validation_samples = 12000
epochs = 80
batch_size = 32

def save_bottlebeck_features():
    datagen = ImageDataGenerator(rescale=1. / 255)

    # build the VGG16 network
    model = applications.VGG16(include_top=False, weights='imagenet')

    generator = datagen.flow_from_directory(
        train_data_dir,
        target_size=(img_width, img_height),
        batch_size=batch_size,
        class_mode=None,
        shuffle=False)
    bottleneck_features_train = model.predict_generator(
        generator, nb_train_samples // batch_size)
    np.save('test_trained.npy', bottleneck_features_train)

    generator = datagen.flow_from_directory(
        validation_data_dir,
        target_size=(img_width, img_height),
        batch_size=batch_size,
        class_mode=None,
        shuffle=False)
    bottleneck_features_validation = model.predict_generator(
        generator, nb_validation_samples // batch_size)
    np.save('test_validation.npy', bottleneck_features_validation)


def train_top_model():
    # Class Labels for Training Data
    datagen_top = ImageDataGenerator(rescale=1./255,
                                     width_shift_range=0.05,
                                     height_shift_range=0.05,
                                     shear_range=0.05,
                                     zoom_range=0.05,
                                     fill_mode='nearest',
                                     channel_shift_range=0.2*255)
    datagen_top_val = ImageDataGenerator(rescale=1./255)
    generator_top = datagen_top_val.flow_from_directory(
        train_data_dir,
        target_size=(img_width, img_height),
        batch_size=batch_size,
        class_mode='categorical',
        shuffle=False)
    np.save('test_class_indices.npy', generator_top.class_indices)

    num_classes = len(generator_top.class_indices)
    train_data = np.load('test_trained.npy')
    train_labels = generator_top.classes # Get Class Labels
    train_labels = to_categorical(train_labels, num_classes=num_classes)

    # Class Labels for Validation Data
    generator_top = datagen_top.flow_from_directory(
        validation_data_dir,
        target_size=(img_width, img_height),
        batch_size=batch_size,
        class_mode=None,
        shuffle=False)

    validation_data = np.load('test_validation.npy')
    validation_labels = generator_top.classes
    validation_labels = to_categorical(validation_labels, num_classes=num_classes)


    model = Sequential()
    model.add(Flatten(input_shape=train_data.shape[1:]))
    model.add(Dense(4096, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(num_classes, activation='softmax'))

    sgd = optimizers.SGD(lr=1e-4, momentum=0.9, nesterov=True)

    model.compile(optimizer=sgd,
                  loss='categorical_crossentropy', metrics=['accuracy'])

    model.fit(train_data, train_labels,
              epochs=epochs,
              batch_size=batch_size,
              validation_data=(validation_data, validation_labels))

    model.save_weights(top_model_weights_path)


save_bottlebeck_features()
train_top_model()