我有两个关于Estimator API的问题 . 我创建了这个input_fn(删除了无关的代码):
def decode_csv(line):
if (is_prediction):
col1, col2, col3, col4, col5, _dt, _w2v = tf.decode_csv(line, [[""], [""], [0], [0], [""], [""], [0.0]], field_delim=';')
features = [col1, col3, col4]
d = dict(zip(feature_names, features))
else:
col1, col2, col3, col4, col5, col6, _w2v = tf.decode_csv(line, [[""], [""], [0], [0], [""], [0], [0.0]], field_delim=';')
label = col6
features = [col1, col3, col4]
d = dict(zip(feature_names, features)), label
return d
if (is_prediction):
dataset = (tf.data.TextLineDataset(file_path)
.map(decode_csv)
.batch(BATCH_SIZE)
)
iterator = dataset.make_one_shot_iterator()
batch_features = iterator.get_next()
return batch_features
else:
dataset = (tf.data.TextLineDataset(file_path)
.skip(0)
.map(decode_csv, num_parallel_calls=int(NUMBER_OF_THREADS))
#.cache()
.shuffle(1000)
.batch(BATCH_SIZE)
.prefetch(10)
)
iterator = dataset.make_one_shot_iterator()
batch_features, batch_labels = iterator.get_next()
return batch_features, batch_labels
这个model_fn,基于googledev博客上的自定义估算器教程:
if mode == tf.estimator.ModeKeys.PREDICT:
tf.logging.info("rxclassifier: PREDICT, {}".format(mode))
elif mode == tf.estimator.ModeKeys.EVAL:
tf.logging.info("rxclassifier: EVAL, {}".format(mode))
elif mode == tf.estimator.ModeKeys.TRAIN:
tf.logging.info("rxclassifier: TRAIN, {}".format(mode))
net = tf.feature_column.input_layer(features, params["feature_columns"])
for units in params['hidden_units']:
net = tf.layers.dense(net, units=units, activation=tf.nn.relu)
logits = tf.layers.dense(net, params['n_classes'], activation=None)
predicted_classes = tf.argmax(logits, 1)
if mode == tf.estimator.ModeKeys.PREDICT:
predictions = {
'class_ids': predicted_classes[:, tf.newaxis],
'probabilities': tf.nn.softmax(logits),
'logits': logits,
}
return tf.estimator.EstimatorSpec(mode, predictions=predictions)
loss = tf.losses.sparse_softmax_cross_entropy(labels=labels, logits=logits)
accuracy = tf.metrics.accuracy(labels, predictions=predicted_classes)
metrics = {'average_accuracy': accuracy}
tf.summary.scalar('accuracy', accuracy[1])
tf.summary.scalar('loss', loss)
if mode == tf.estimator.ModeKeys.EVAL:
return tf.estimator.EstimatorSpec(mode, loss=loss, eval_metric_ops=metrics)
assert mode == tf.estimator.ModeKeys.TRAIN, "TRAIN is only ModeKey left"
optimizer = tf.train.AdagradOptimizer(LEARNING_RATE)
train_op = optimizer.minimize(loss, global_step=tf.train.get_global_step())
return tf.estimator.EstimatorSpec(mode, loss=loss, train_op=train_op)
它运作良好,但我需要使用此估算器做两件事:
1)隔离特定的CSV行值以默认返回预测而不将值提供给估算器 - 通常错误预测的值(并且无论如何都不需要预测) .
我试过类似的东西:
if (tf.where(tf.not_equal(features['myFeature'],tf.constant(MY_VALUE))...
return tf.Estimator.EstimatorSpec(-Some way to get class X with 100% prediction-)
在输入图层之前,我不太确定如何使其工作,因为我需要删除每个要素列中的行(不是在DNN中传递它们)并返回“预测” .
另一种可接受的解决方案是从CSV中过滤掉这些值 - 而不是理想地改变CSV .
2)我有5个输出类,但是类4和5是3类的子集(因此类4,5预测在逻辑上也可以是3类) . 因此,Estimator很难找出适合预测的方法 .
有没有办法连接DNN?然后,我将运行DNN 1以获得1-2-3类的训练/预测,然后重新运行类3,得到微调的DNN 2,以区分真正的3和4-5类 .
谢谢!