EDIT:
这篇文章的原始 Headers 是
“当学习率下降时,损失值会跳跃,然后慢慢回落”
但是我现在认为这是在恢复保存的模型时发生的,无论学习速率是否改变 .
在10个学期后,学习率为0.001,损失达到~10 . 然后我保存并恢复了模型,重新开始训练,第11纪元的损失是〜15 . 在另外5个时代,它再次没有达到10 .
我已经构建了一个Autoencoder模型,如下所示 . 它使用 tf.train.Saver()
将训练过的模型保存到内存中或从内存中恢复 . 我的学习率为 tf.placeholder
,因此允许在 feed_dict
中指定 .
在训练模型时,我最初以较高的学习速度开始,让它训练直到损失开始稳定,然后保存模型,手动降低学习速度,恢复它并重新开始训练 .
然而,当这种变化发生时,损失总是比变化前的变化高得多,然后缓慢但持续地再次减少 . 我无法理解为什么会发生这种跳跃,除非我的实现出现问题 .
import os
import pickle as pk
import tensorflow as tf
class Autoencoder:
def __init__(self, encoderDims, sparseInput=False, tiedWeights=False, denoise=False):
self.encoderDims = encoderDims
self.decoderDims = list(reversed(encoderDims))
self.sparseInput = sparseInput
self.tiedWeights = tiedWeights
self.denoise = denoise # Only works for greyscale image data
self.input = tf.placeholder(tf.float32, [None, encoderDims[0]])
self.learningRate = tf.placeholder(tf.float32, [])
self.activationFunction = tf.nn.sigmoid # TO DO: Allow to be specified by user
# self.activationFunction = tf.tanh
# self.activationFunction = tf.nn.selu
self.SGD = tf.train.AdamOptimizer(self.learningRate)
if self.denoise:
self.__addNoise()
self.__buildNetwork() # Constructs Encoder & Decoder
self.__buildTensorFlowGraph() # Creates sequential TensorFlow operations
self.session = tf.Session()
self.session.run(tf.global_variables_initializer()) # Initialise weights & biases
self.saver = tf.train.Saver()
self.session.graph.finalize() # Avoids memory leaks through duplicating graph nodes
def __addNoise(self):
# Create a tensor of random numbers with unit variance
# Then sets pixels to black where values of random tensor > 1
# (i.e. all values outside the std dev -> ~32% of pixels)
random = tf.random_normal(tf.shape(self.input))
mask = tf.greater(random, 1.0)
self.noisyInput = tf.where(
mask, tf.ones_like(self.input) * 255, self.input)
def __buildNetwork(self):
# Lists of weights and biases per layer of encoder and decoder
self.encoderWeights, self.encoderBiases = [], []
self.decoderWeights, self.decoderBiases = [], []
for layer in range(len(self.encoderDims) - 1):
self.encoderWeights.append(
tf.Variable(tf.random_normal(
[self.encoderDims[layer], self.encoderDims[layer + 1]]))
)
self.encoderBiases.append(
tf.Variable(tf.zeros([self.encoderDims[layer + 1]]))
)
# if layer != len(self.decoderDims) - 2: # BIAS IN OUTPUT LAYER????
self.decoderBiases.append(
tf.Variable(tf.zeros([self.decoderDims[layer + 1]]))
)
if not self.tiedWeights:
self.decoderWeights.append(
tf.Variable(tf.random_normal(
[self.decoderDims[layer], self.decoderDims[layer + 1]]))
)
if self.tiedWeights:
self.decoderWeights = [tf.transpose(
i) for i in reversed(self.encoderWeights)]
def __buildTensorFlowGraph(self):
self.encoded = self.encode() # Encoded/compressed data
self.decoded = self.decode() # Decoded/reconstructed data
self.loss = self.__calculateLoss()
self.train = self.SGD.minimize(self.loss)
def encode(self):
if self.denoise:
encoded = self.noisyInput
else:
encoded = self.input
for layer in range(len(self.encoderDims) - 1):
encoded = tf.matmul(encoded, self.encoderWeights[layer])
encoded = tf.add(encoded, self.encoderBiases[layer])
# if layer != len(self.encoderDims) - 2: # KEEP LAST LINEAR?
encoded = self.activationFunction(encoded)
return encoded
def decode(self):
decoded = self.encoded
for layer in range(len(self.decoderDims) - 1):
decoded = tf.matmul(decoded, self.decoderWeights[layer])
# if layer != len(self.decoderDims) - 2: # BIAS IN OUTPUT LAYER????
decoded = tf.add(decoded, self.decoderBiases[layer])
if layer != len(self.decoderDims) - 2: # Keep output layer linear
decoded = self.activationFunction(decoded)
return decoded
def __calculateLoss(self):
# TO DO: ADD REGULARISATION
if self.sparseInput:
nonZeros = tf.where(tf.greater(self.input, 0))
input = tf.gather(self.input, nonZeros)
output = tf.gather(self.decoded, nonZeros)
else:
input = self.input
output = self.decoded
return tf.sqrt(
tf.losses.mean_squared_error(
labels=input,
predictions=output
)
)
def setBatch(self, input, learningRate=0.0):
self.batchDict = {
self.input: input,
self.learningRate: learningRate
}
def run(self, operations=None, train=False):
# Returns values of specified list of operations
# Trains network's parameters if specified
if not type(operations) is list:
operations = [operations]
if train:
ops = [self.train]
else:
ops = []
if operations is not None:
for op in operations:
if op == 'input':
ops.append(self.input)
if op == 'noisyInput':
ops.append(self.noisyInput)
if op == 'encoded':
ops.append(self.encoded)
if op == 'decoded':
ops.append(self.decoded)
if op == 'loss':
ops.append(self.loss)
if (train and len(ops) == 2) or (not train and len(ops) == 1):
return self.session.run(ops, self.batchDict)[-1]
elif train:
return self.session.run(ops, self.batchDict)[1:]
else:
return self.session.run(ops, self.batchDict)
def save(self, epoch, modelName="Autoencoder"):
modelName += '.ckpt'
dir = os.path.dirname(os.path.realpath(__file__)) + '/SavedModels/'
self.saver.save(self.session, dir + modelName)
loss = self.session.run(self.loss, self.batchDict)
with open(dir + modelName + '_epoch.pk', 'wb') as epochFile:
pk.dump(epoch, epochFile)
with open(dir + modelName + '_loss.pk', 'wb') as lossFile:
pk.dump(loss, lossFile)
def restore(self, modelName="Autoencoder"):
modelName += '.ckpt'
dir = os.path.dirname(os.path.realpath(__file__)) + '/SavedModels/'
self.saver.restore(self.session, dir + modelName)
with open(dir + modelName + '_epoch.pk', 'rb') as epochFile:
epoch = pk.load(epochFile)
with open(dir + modelName + '_loss.pk', 'rb') as lossFile:
loss = pk.load(lossFile)
return epoch, loss
def kill(self):
self.session.close()
以下是模型的训练方法:
import os
import numpy as np
import pandas as pd
from Autoencoder import Autoencoder
loadModel = True
# loadModel = False
learningRate = 0.001
numEpochs = 10000
batchSize = 1 # Divide into 670
printStep = 1
projectDir = os.path.dirname(os.path.realpath(__file__))
# (671, 9066)
original = pd.read_csv(projectDir + '/Data/ratings_small_pivoted.csv').drop('userId', axis=1)
numSamples = original.shape[0]
numFeatures = original.shape[1]
numBatches = numSamples // batchSize
encoderDims = [
numFeatures,
numFeatures // 2
]
ae = Autoencoder(encoderDims, sparseInput=True)
if loadModel:
bestEpoch, bestLoss = ae.restore()
else:
bestEpoch, bestLoss = 0, 9999
for epoch in range(1, numEpochs - bestEpoch + 1):
epochLoss = 0
for batch in range(numBatches):
batchInput = original[batch * batchSize: (batch + 1) * batchSize]
ae.setBatch(batchInput, learningRate)
batchLoss = ae.run(['loss'], train=True)
epochLoss += batchLoss
epochLoss /= numBatches
if epochLoss < bestLoss:
bestLoss = epochLoss
ae.save(epoch + bestEpoch)
if epoch == 1 or epoch % printStep == 0:
print("EPOCH: {} / {}".format(epoch + bestEpoch, numEpochs))
print("LOSS: {} ({})\n".format("%.4f" % epochLoss, "%.4f" % bestLoss))
学习率为0.001,在100个时期后损失达到~1.0 . 然后我把它减少到0.0001并且损失立即在第一个时期跳到~3.9,然后在这个时期之后持续减少,但是非常缓慢 .