请注意:这个问题在github上也是issue .

UPDATE:

我试图在Keras中实现一个自定义层:一个通过卷积计算其门的GRU层 . 该代码有效,但仅限于Theano后端 .

在github上,我得到了在keras.json中将“image-layout”切换为“tf”的提示 . 事实上,我已经硬编码了“th”图像布局,所以我不得不修复我的代码中的一些问题 . 现在它支持“th”和“tf”布局 . 模型也适应了:现在我有两个模型,一个用于“th”,一个用于“tf” .

这是对问题的完全重写 . 问题是一样的,但代码是新的 .

随着theano作为后端,两个图像布局运行良好 . 网络能够从数据中学习,新层似乎有效 . 我很确定我对“th”和“tf”布局的实现都是正确的 . 但是当我切换到tensorflow时,它总是会崩溃 . 布局似乎没有效果 .

代码中有一个技巧:GRU使用卷积来计算其门 . 但该卷积不包含在图层中 . 我只是假设每次使用这个图层时,都会有一个2D卷积,它产生3个输出特征(每个门一个) . 这是代码:

import numpy as np
from keras import backend as K
from keras import initializations, activations
from keras.engine import InputSpec
from keras.layers.recurrent import Recurrent


# copied from the keras github source. removed lots of unnecessary (for me) code

# assuming a 2D Convolution was run by hand before this layer.
# please note that this has no variables of its own.
# TODO: incorporate the 2D Convolution into this layer

class CGRU(Recurrent):
    def __init__(self,
                 init='glorot_uniform', inner_init='orthogonal',
                 activation='tanh', inner_activation='hard_sigmoid', **kwargs):

        self.init = initializations.get(init)
        self.inner_init = initializations.get(inner_init)
        self.activation = activations.get(activation)
        self.inner_activation = activations.get(inner_activation)

        self.dim_ordering = K.image_dim_ordering()
        assert self.dim_ordering in {'tf', 'th'}, '`dim_ordering` must be in {"tf", "th"}.'

        # removing the regularizers and the dropout

        super(CGRU, self).__init__(**kwargs)

        # this seems necessary in order to accept 5 input dimensions
        # (samples, timesteps, features, x, y)
        self.input_spec = [InputSpec(ndim=5)]

    def build(self, input_shape):
        self.input_spec = [InputSpec(shape=input_shape)]

        if self.dim_ordering == "th":
            nb_features = input_shape[2]
            x_dim = input_shape[3]
            y_dim = input_shape[4]

            # the input_dim was broken and didn't cause any problems. I'm commenting it out to see
            # whether it is really necessary
            # self.input_dim = (3,x_dim, y_dim)

            # moved here from the constructor. Curiously it seems like batch_size has been removed from here.
            # I also don't understand why I don't need to specify the time_sequence if there is the flag
            # output_sequence is set to True
            # TODO: debug where this is used
            self.output_dim = [1, x_dim, y_dim]

        elif self.dim_ordering == "tf":
            nb_features = input_shape[4]
            x_dim = input_shape[2]
            y_dim = input_shape[3]

            # see above
            # self.input_dim = (x_dim, y_dim, 3)

            # see above
            self.output_dim = [x_dim, y_dim, 1]

        assert (nb_features == 3, "the CGRU expects 3 input features")

        if self.stateful:
            self.reset_states()
        else:
            # initial states: all-zero tensor of shape (output_dim)
            self.states = [None]

    def reset_states(self):

        assert self.stateful, 'Layer must be stateful.'
        input_shape = self.input_spec[0].shape
        if not input_shape[0]:
            raise Exception('If a RNN is stateful, a complete ' +
                            'input_shape must be provided (including batch size).')

        if self.dim_ordering == "th":
            nb_features = input_shape[2]
            x_dim = input_shape[3]
            y_dim = input_shape[4]

        elif self.dim_ordering == "tf":
            nb_features = input_shape[4]
            x_dim = input_shape[2]
            y_dim = input_shape[3]

        if hasattr(self, 'states'):
            K.set_value(self.states[0],
                        np.zeros((input_shape[0], x_dim, y_dim)))
        else:
            self.states = [K.zeros((input_shape[0], x_dim, y_dim))]

    def preprocess_input(self, x):
        # TODO: if at some point, this layer does calculations, revisit and optimise this code
        return x

    def step(self, x, states):
        h_tm1 = states[0]  # previous memory

        # usually here would be matrix multiplications for the different gates
        # I assume that a convolution before this layer has prepared 3 features per coordinate
        # I'll simply use those 3 features
        # in fact: this layer doesn't have any trainable weights

        # TODO: is an if else problematic wrt performance.
        # TODO: since I'm currently trying to get the layer to work with tensorflow, I'll now hardcode for tf
        if self.dim_ordering == "th":
            z = self.inner_activation(x[:, 0, :, :])
            r = self.inner_activation(x[:, 1, :, :])
            hh = self.activation(x[:, 2, :, :])
        else:  # self.dim_ordering == "th":
            z = self.inner_activation(x[:, :, :, 0])
            r = self.inner_activation(x[:, :, :, 1])
            hh = self.activation(x[:, :, :, 2])

        h = z * h_tm1 + (1 - z) * hh
        return h, [h]

    def get_initial_states(self, x):
        initial_state = K.zeros_like(x)  # (samples, timesteps, input_dim)
        # input_dim = (3, x_dim, y_dim) for th
        # input_dim = (x_dim, y_dim, 3) for tf
        # please note: the number of features is expected to be 3, one feature for each gate

        if self.dim_ordering == "th":
            initial_state = K.sum(initial_state, axis=(1, 2))  # (samples, x_dim, y_dim)
        else:  # "tf"
            initial_state = K.sum(initial_state, axis=(1, -1))  # (samples, x_dim, y_dim)

        return [initial_state]

    def get_output_shape_for(self, input_shape):
        if self.return_sequences:
            return tuple([input_shape[0], input_shape[1]] + self.output_dim)
        else:
            return tuple([input_shape[0]] + self.output_dim)

    def get_config(self):
        config = {'output_dim': self.output_dim,
                  'init': self.init.__name__,
                  'inner_init': self.inner_init.__name__,
                  'activation': self.activation.__name__,
                  'inner_activation': self.inner_activation.__name__}

        # removed the various regularizers and dropouts.
        # surely this isn't needed if not present ?
        base_config = super(CGRU, self).get_config()
        return dict(list(base_config.items()) + list(config.items()))

很抱歉给你带来如此多的代码,我试着尽可能地评论和格式化 .

这是设置,这是我使用代码的方式:

if K.image_dim_ordering()=="tf":
    # this is the actual input, fed to the network
    inputs = Input((40, 40, 40, 1))

    # now reshape to a sequence
    #reshaped = Reshape((40, 1, 40, 40))(inputs)

    # now create a model like uNet which is fed the time-distributed data
    # this model will perform a segmentation on a 2D slice of data.

    conv_inputs = Input((40, 40, 1))
    base_filters_size = 16
    conv1 = Convolution2D(16, 3, 3, activation='relu', border_mode='same')(conv_inputs)
    #conv1 = Convolution2D(64, 3, 3, activation='relu', border_mode='same')(conv1)
    conv1 = Convolution2D(16, 3, 3, activation='relu', border_mode='same')(conv1)
    #conv1 = Convolution2D(32, 3, 3, activation='relu', border_mode='same')(conv1)
    conv1 = Convolution2D(3, 3, 3, activation='relu', border_mode='same')(conv1)
    convmodel = Model(input=conv_inputs, output=conv1)

    convmodel.summary()

    #apply the segmentation to each layer
    time_dist=TimeDistributed(convmodel)(inputs)

    # now a model with the two GRUs which move in different directions
    time_dist=Reshape((40,40,40, 3))(time_dist)

    # The next line will crash
    up=CGRU(go_backwards=False, return_sequences=True, name="up", input_shape=[40,40,40,3])(time_dist)
    down=CGRU(go_backwards=True, return_sequences=True, name="down", input_shape=[40,40,40,3])(time_dist)

    # merge the two outputs
    merged_updown=merge([up,down], mode="concat", concat_axis=2)
    reshaped=Reshape((40,40,40,2))(merged_updown)

    # and apply a final convolution to map from 2 features back to 1    
    mergeconv_inputs=Input((40,40,2))
    mergeconv=Convolution2D(1,3,3, activation="sigmoid", border_mode="same")(mergeconv_inputs)
    merge_model=Model(input=mergeconv_inputs, output=mergeconv)

    final_time_dist=TimeDistributed(merge_model)(reshaped)

    output=Reshape((40,40,40,1))(final_time_dist)

    model=Model(input=inputs, output=output)
    print(model.summary())
    model.compile(optimizer=Adam(lr=1e-4), loss=dice_coef_loss, metrics=[dice_coef])

具有“tf”图像布局的相同代码在theano下运行正常并且在张量流下崩溃 .

问题是: ValueError: Shapes (?, ?, 40, 40) and (40, ?, 40) are not compatible

这是完整的错误消息:

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-8-295ff1bf414f> in <module>()
     25     # now a model with the two GRUs which move in different directions
     26     time_dist=Reshape((40,40,40, 3))(time_dist)
---> 27     up=CGRU(go_backwards=False, return_sequences=True, name="up", input_shape=[40,40,40,3])(time_dist)
     28     down=CGRU(go_backwards=True, return_sequences=True, name="down", input_shape=[40,40,40,3])(time_dist)
     29 

/usr/local/lib/python2.7/dist-packages/keras/engine/topology.pyc in __call__(self, x, mask)
    513         if inbound_layers:
    514             # this will call layer.build() if necessary
--> 515             self.add_inbound_node(inbound_layers, node_indices, tensor_indices)
    516             input_added = True
    517 

/usr/local/lib/python2.7/dist-packages/keras/engine/topology.pyc in add_inbound_node(self, inbound_layers, node_indices, tensor_indices)
    571         # creating the node automatically updates self.inbound_nodes
    572         # as well as outbound_nodes on inbound layers.
--> 573         Node.create_node(self, inbound_layers, node_indices, tensor_indices)
    574 
    575     def get_output_shape_for(self, input_shape):

/usr/local/lib/python2.7/dist-packages/keras/engine/topology.pyc in create_node(cls, outbound_layer, inbound_layers, node_indices, tensor_indices)
    148 
    149         if len(input_tensors) == 1:
--> 150             output_tensors = to_list(outbound_layer.call(input_tensors[0], mask=input_masks[0]))
    151             output_masks = to_list(outbound_layer.compute_mask(input_tensors[0], input_masks[0]))
    152             # TODO: try to auto-infer shape if exception is raised by get_output_shape_for

/usr/local/lib/python2.7/dist-packages/keras/layers/recurrent.pyc in call(self, x, mask)
    211                                              constants=constants,
    212                                              unroll=self.unroll,
--> 213                                              input_length=input_shape[1])
    214         if self.stateful:
    215             self.updates = []

/usr/local/lib/python2.7/dist-packages/keras/backend/tensorflow_backend.pyc in rnn(step_function, inputs, initial_states, go_backwards, mask, constants, unroll, input_length)
   1193             parallel_iterations=32,
   1194             swap_memory=True,
-> 1195             sequence_length=None)
   1196 
   1197         if nb_states > 1:

/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/rnn.pyc in _dynamic_rnn_loop(cell, inputs, initial_state, parallel_iterations, swap_memory, sequence_length, dtype)
   1023     shape = _state_size_with_prefix(
   1024         output_size, prefix=[const_time_steps, const_batch_size])
-> 1025     output.set_shape(shape)
   1026 
   1027   final_outputs = nest.pack_sequence_as(

/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/ops.pyc in set_shape(self, shape)
    406         this tensor.
    407     """
--> 408     self._shape = self._shape.merge_with(shape)
    409 
    410   @property

/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/tensor_shape.pyc in merge_with(self, other)
    568       except ValueError:
    569         raise ValueError("Shapes %s and %s are not compatible" %
--> 570                          (self, other))
    571 
    572   def concatenate(self, other):

ValueError: Shapes (?, ?, 40, 40) and (40, ?, 40) are not compatible