首页 文章

恢复Tensorflow模型在Google Colab中失败

提问于
浏览
1

使用saver.restore(sess,model_dir)恢复tensorflow模型在Google Colaboratory中失败了 .

Restore Code

tf.reset_default_graph()
    sq_net = classifierNet(input_shape,out_classes,lr_rate,is_train)

    with tf.Session() as sess:

        sess.run(tf.global_variables_initializer())
        train_vars = tf.trainable_variables()
        if model_dir is not None:
                 if os.path.exists("{}.index".format(model_dir)):
                        saver = tf.train.Saver()
                        saver.restore(sess, model_dir)
                        print("Model at %s restored" % model_dir)
                 else:
                        print("Model path does not exist, skipping...")
        else:
                 print("Model path is None - Nothing to restore")

上面的代码产生以下错误:

INFO:tensorflow:Restoring parameters from dri//colab//mod//
    ---------------------------------------------------------------------------
    NotFoundError                             Traceback (most recent call last)
    /usr/local/lib/python3.6/dist-packages/tensorflow/python/client/session.py in _do_call(self, fn, *args)
    1360   def _register_dead_handle(self, handle):
    -> 1361     # Register a dead handle in the session. Delete the dead tensors when
    1362     # the number of dead tensors exceeds certain threshold.

    /usr/local/lib/python3.6/dist-packages/tensorflow/python/client/session.py in _run_fn(session, feed_dict, fetch_list, target_list, options, run_metadata)
    1339     # Nothing to do if we're using the new session interface
    -> 1340     # TODO(skyewm): remove this function altogether eventually
    1341     if self._created_with_new_api: return

    /usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/errors_impl.py in __exit__(self, type_arg, value_arg, traceback_arg)

   NotFoundError: Unsuccessful TensorSliceReader constructor: Failed to find any matching files for dri//colab//mod//model.ckpt
 [[Node: save/RestoreV2 = RestoreV2[dtypes=[DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, ..., DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT], _device="/job:localhost/replica:0/task:0/device:CPU:0"](_arg_save/Const_0_0, save/RestoreV2/tensor_names, save/RestoreV2/shape_and_slices)]]

    During handling of the above exception, another exception occurred:

    NotFoundError                             Traceback (most recent call last)
<ipython-input-44-fb31e76f6b17> in <module>()
     11           if os.path.exists("{}.index".format(model_dir)):
     12               saver = tf.train.Saver(var_list=v0_vars)
---> 13               saver.restore(sess, model_dir)
     14               print("Model at %s restored" % model_dir)
     15           else:

/usr/local/lib/python3.6/dist-packages/tensorflow/python/training/saver.py in restore(self, sess, save_path)
   1753   # Create a saver.
   1754   saver = tf.train.Saver(...variables...)
-> 1755   # Remember the training_op we want to run by adding it to a collection.
   1756   tf.add_to_collection('train_op', train_op)
   1757   sess = tf.Session()

/usr/local/lib/python3.6/dist-packages/tensorflow/python/client/session.py in run(self, fetches, feed_dict, options, run_metadata)
    903     This is EXPERIMENTAL and subject to change.
    904 
--> 905     To use partial execution, a user first calls `partial_run_setup()` and
    906     then a sequence of `partial_run()`. `partial_run_setup` specifies the
    907     list of feeds and fetches that will be used in the subsequent

/usr/local/lib/python3.6/dist-packages/tensorflow/python/client/session.py in _run(self, handle, fetches, feed_dict, options, run_metadata)
   1135     convertible to an ndarray) with matching element type and shape. See
   1136     @{tf.Session.run} for details of the allowable feed key and value types.
-> 1137 
   1138     The returned callable will have the same return type as
   1139     `tf.Session.run(fetches, ...)`. For example, if `fetches` is a `tf.Tensor`,

/usr/local/lib/python3.6/dist-packages/tensorflow/python/client/session.py in _do_run(self, handle, target_list, fetch_list, feed_dict, options, run_metadata)
   1353           tf_session.TF_ExtendGraph(
   1354               self._session, graph_def.SerializeToString(), status)
-> 1355         self._opened = True
   1356 
   1357   # The threshold to run garbage collection to delete dead tensors.

/usr/local/lib/python3.6/dist-packages/tensorflow/python/client/session.py in _do_call(self, fn, *args)
   1372       fetches = []
   1373       for deleter_key, tensor_handle in enumerate(tensors_to_delete):
-> 1374         holder, deleter = session_ops._get_handle_deleter(self.graph,
   1375                                                           deleter_key,
   1376                                                           tensor_handle)

NotFoundError: Unsuccessful TensorSliceReader constructor: Failed to find any matching files for dri//colab//mod//model.ckpt
     [[Node: save/RestoreV2 = RestoreV2[dtypes=[DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, ..., DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT], _device="/job:localhost/replica:0/task:0/device:CPU:0"](_arg_save/Const_0_0, save/RestoreV2/tensor_names, save/RestoreV2/shape_and_slices)]]

Caused by op 'save/RestoreV2', defined at:
  File "/usr/lib/python3.6/runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)
  File "/usr/lib/python3.6/runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "/usr/local/lib/python3.6/dist-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "/usr/local/lib/python3.6/dist-packages/traitlets/config/application.py", line 658, in launch_instance
    app.start()
  File "/usr/local/lib/python3.6/dist-packages/ipykernel/kernelapp.py", line 477, in start
    ioloop.IOLoop.instance().start()
  File "/usr/local/lib/python3.6/dist-packages/zmq/eventloop/ioloop.py", line 177, in start
    super(ZMQIOLoop, self).start()
  File "/usr/local/lib/python3.6/dist-packages/tornado/ioloop.py", line 888, in start
    handler_func(fd_obj, events)
  File "/usr/local/lib/python3.6/dist-packages/tornado/stack_context.py", line 277, in null_wrapper
    return fn(*args, **kwargs)
  File "/usr/local/lib/python3.6/dist-packages/zmq/eventloop/zmqstream.py", line 440, in _handle_events
    self._handle_recv()
  File "/usr/local/lib/python3.6/dist-packages/zmq/eventloop/zmqstream.py", line 472, in _handle_recv
    self._run_callback(callback, msg)
  File "/usr/local/lib/python3.6/dist-packages/zmq/eventloop/zmqstream.py", line 414, in _run_callback
    callback(*args, **kwargs)
  File "/usr/local/lib/python3.6/dist-packages/tornado/stack_context.py", line 277, in null_wrapper
    return fn(*args, **kwargs)
  File "/usr/local/lib/python3.6/dist-packages/ipykernel/kernelbase.py", line 283, in dispatcher
    return self.dispatch_shell(stream, msg)
  File "/usr/local/lib/python3.6/dist-packages/ipykernel/kernelbase.py", line 235, in dispatch_shell
    handler(stream, idents, msg)
  File "/usr/local/lib/python3.6/dist-packages/ipykernel/kernelbase.py", line 399, in execute_request
    user_expressions, allow_stdin)
  File "/usr/local/lib/python3.6/dist-packages/ipykernel/ipkernel.py", line 196, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "/usr/local/lib/python3.6/dist-packages/ipykernel/zmqshell.py", line 533, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "/usr/local/lib/python3.6/dist-packages/IPython/core/interactiveshell.py", line 2718, in run_cell
    interactivity=interactivity, compiler=compiler, result=result)
  File "/usr/local/lib/python3.6/dist-packages/IPython/core/interactiveshell.py", line 2822, in run_ast_nodes
    if self.run_code(code, result):
  File "/usr/local/lib/python3.6/dist-packages/IPython/core/interactiveshell.py", line 2882, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-44-fb31e76f6b17>", line 12, in <module>
    saver = tf.train.Saver(var_list=v0_vars)
  File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/training/saver.py", line 1293, in __init__
  File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/training/saver.py", line 1302, in build
    """
  File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/training/saver.py", line 1339, in _build
    """Deletes old checkpoints if necessary.
  File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/training/saver.py", line 796, in _build_internal
    RuntimeError: If the SAVERS collection already has more than one items.
  File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/training/saver.py", line 449, in _AddRestoreOps
    filename_tensor: Tensor for the path of the file to load.
  File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/training/saver.py", line 847, in bulk_restore
    if all_model_checkpoint_paths is None:
  File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/gen_io_ops.py", line 1030, in restore_v2
    shape_and_slices = _ops.convert_to_tensor(shape_and_slices, _dtypes.string)
  File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/op_def_library.py", line 787, in _apply_op_helper
    op_def=op_def)
  File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/ops.py", line 3271, in create_op
  File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/ops.py", line 1650, in __init__
    # Just being a bit paranoid here

NotFoundError (see above for traceback): Unsuccessful TensorSliceReader constructor: Failed to find any matching files for dri//colab//mod//model.ckpt
     [[Node: save/RestoreV2 = RestoreV2[dtypes=[DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, ..., DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT], _device="/job:localhost/replica:0/task:0/device:CPU:0"](_arg_save/Const_0_0, save/RestoreV2/tensor_names, save/RestoreV2/shape_and_slices)]]

安装驱动器代码:

!apt-get install -y -qq software-properties-common python-software-properties module-init-tools
!add-apt-repository -y ppa:alessandro-strada/ppa 2>&1 > /dev/null
!apt-get update -qq 2>&1 > /dev/null
!apt-get -y install -qq google-drive-ocamlfuse fuse
# Generate auth tokens for Colab
from google.colab import auth
auth.authenticate_user()
# Generate creds for the Drive FUSE library.
from oauth2client.client import GoogleCredentials
creds = GoogleCredentials.get_application_default()
import getpass
!google-drive-ocamlfuse -headless -id={creds.client_id} -secret={creds.client_secret} < /dev/null 2>&1 | grep URL
vcode = getpass.getpass()
!echo {vcode} | google-drive-ocamlfuse -headless -id={creds.client_id} -secret={creds.client_secret}
!mkdir -p dri
!google-drive-ocamlfuse dri

在Google Colab上训练我的模型后,我想保存它,然后将其恢复用于测试目的 . 我能够保存模型,但恢复功能会在Google Colab中引发上述错误(这虽然在我的本地机器上工作) . 请建议正确的方法来做到这一点 .

提前致谢!

1 回答

相关问题