2017-08-28 36 views
0

我试图在TensorFlow中构建一个相当特殊的网络,并且我有点让它工作。可悲的是,我偶然发现了一个我无法解决的错误,甚至找不到合适的地方。 据我所知,网络已成功构建,直到损失函数被定义。然后该错误消息说,有关未对应的形状:在梯度计算MaxPoolWithArgmax时,形状在TF中不兼容

ValueError: Shapes (1, 17, 17, 44) and (1, 16, 16, 44) are not compatible

的东西是错误不说在哪张或代码行出现问题。我已经打印出所有可以提出的形状,而且我甚至找不到这种形状(1,17,17,44)。

from tensorflow.python.framework import ops 
from tensorflow.python.ops import gen_nn_ops 
@ops.RegisterGradient("MaxPoolWithArgmax") 
def _MaxPoolWithArgmaxGrad(op, grad, some_other_arg): 
    return gen_nn_ops._max_pool_grad(op.inputs[0], 
            op.outputs[0], 
            grad, 
            op.get_attr("ksize"), 
            op.get_attr("strides"), 
            padding=op.get_attr("padding"), 
            data_format='NHWC') 
class FCN_RGBD: 

    def __init__(self, checkpoint_dir='./checkpoints/'): 
     self.build(1) 

     # "allow_soft_placement = True" makes TensorFlow automatically choose an existing and supported GPU device 
     self.config = tf.ConfigProto(allow_soft_placement = True) 
     self.session = tf.Session(config = self.config) 
     self.session.run(tf.global_variables_initializer()) 

    def weight_variable(self, shape): 
     initial = tf.truncated_normal(shape, stddev=0.1) 
     return tf.Variable(initial) 

    def bias_variable(self, shape): 
     initial = tf.constant(0.1, shape=shape) 
     return tf.Variable(initial) 

    def conv_layer(self, x, W_shape, b_shape, strides, name, padding): 
     W = self.weight_variable(W_shape) 
     b = self.bias_variable([b_shape]) 
     return tf.nn.relu(tf.nn.conv2d(x, W, strides=strides, padding=padding) + b) 

    def conv_skip_layer(self, x, W_shape, b_shape, name, padding): 
     W = self.weight_variable(W_shape) 
     b = self.bias_variable([b_shape]) 
     return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding=padding) + b 

    def deconv_layer(self, x, out_shape, W_shape, b_shape, strides, name, padding): 
     W = self.weight_variable(W_shape) 
     b = self.bias_variable([b_shape]) 
     return tf.nn.conv2d_transpose(x, W, output_shape=out_shape, strides=strides, padding=padding) + b 

    def pool_layer3x3(self, x): 
     with tf.device('/gpu:0'): 
      return tf.nn.max_pool_with_argmax(x, ksize=[1, 3, 3, 1], strides=[1, 3, 3, 1], padding='SAME') 

    def pool_layer2x2(self, x): 
     with tf.device('/gpu:0'): 
      return tf.nn.max_pool_with_argmax(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME') 

    def build(self, batchsize): 

     print('Building the FCN...') 

     with tf.device('/gpu:0'): 

      self.x = tf.placeholder(tf.float32, shape=(batchsize, 250, 250, 1)) 
      self.y = tf.placeholder(tf.int64, shape=(batchsize, 250, 250, 1)) 

      self.rate = tf.placeholder(tf.float32, shape=[]) 

      conv1 = self.conv_layer(self.x, [5, 5, 1, 64], 64, [1, 2, 2, 1], 'conv1', 'SAME') 

      pool1, pool_1_argmax = self.pool_layer3x3(conv1) 

      conv1_skip = self.conv_skip_layer(pool1, [1, 1, 64, 44], 44, 'conv1_skip', 'VALID') 

      conv2 = self.conv_layer(pool1, [3, 3, 64, 128], 128, [1, 1, 1, 1], 'conv2', 'VALID') 

      pool2, pool_2_argmax = self.pool_layer2x2(conv2) 

      conv2_skip = self.conv_skip_layer(pool2, [1, 1, 128, 44], 44, 'conv2_skip', 'VALID') 

      conv3 = self.conv_layer(pool2, [5, 5, 128, 256], 256, [1, 1, 1, 1], 'conv3', 'VALID') 

      conv4 = self.conv_layer(conv3, [3, 3, 256, 44], 44, [1, 1, 1, 1], 'conv4', 'SAME') 

      deconv1 = self.deconv_layer(conv4, tf.stack([batchsize, 16, 16, 44]), [3, 3, 44, 44], 44, [1, 1, 1, 1], 'deconv1', 'SAME') 

      conv2_skip = tf.image.resize_image_with_crop_or_pad(conv2_skip, 16, 16) 

      sum1 = conv2_skip + deconv1 

      dropout1 = tf.nn.dropout(sum1, keep_prob=0.5) 

      deconv2 = self.deconv_layer(dropout1, tf.stack([batchsize, 34, 34, 44]), [4, 4, 44, 44], 44, [1, 2, 2, 1], 'deconv2', 'SAME') 

      conv1_skip = tf.image.resize_image_with_crop_or_pad(conv1_skip, 34, 34) 

      sum2 = conv1_skip + deconv2 

      dropout2 = tf.nn.dropout(sum2, keep_prob=0.5) 

      deconv_final = self.deconv_layer(dropout2, tf.stack([batchsize, 250, 250, 44]), [19, 19, 44, 44], 44, [1, 7, 7, 1], 'deconv_final', 'VALID') 

      annotation_pred = tf.argmax(deconv_final, dimension=3, name='prediction') 

      cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=tf.squeeze(self.y, squeeze_dims=[3]), logits=deconv_final) 

      self.loss = tf.reduce_mean(cross_entropy, name='cross_entropy_mean') 

      self.train_step = tf.train.AdamOptimizer(self.rate).minimize(self.loss) 

      self.prediction = tf.argmax(tf.reshape(tf.nn.softmax(logits), tf.shape(deconv_final)), dimension=3) 

这是错误消息:

Traceback (most recent call last): 
    File "C:\Users\user\AppData\Local\conda\conda\envs\tensorflow-gpu\lib\site-packages\tensorflow\python\framework\tensor_shape.py", line 560, in merge_with 
    new_dims.append(dim.merge_with(other[i])) 
    File "C:\Users\user\AppData\Local\conda\conda\envs\tensorflow-gpu\lib\site-packages\tensorflow\python\framework\tensor_shape.py", line 135, in merge_with 
    self.assert_is_compatible_with(other) 
    File "C:\Users\user\AppData\Local\conda\conda\envs\tensorflow-gpu\lib\site-packages\tensorflow\python\framework\tensor_shape.py", line 108, in assert_is_compatible_with 
    % (self, other)) 
ValueError: Dimensions 17 and 16 are not compatible 

During handling of the above exception, another exception occurred: 

Traceback (most recent call last): 
    File "main.py", line 5, in <module> 
    fcn_rgbd = FCN_RGBD() 
    File "C:\Users\user\netcase\Workspace\Depth_BPC_v1\FCN_RGBD.py", line 23, in __init__ 
    self.build(1) 
    File "C:\Users\user\netcase\Workspace\Depth_BPC_v1\FCN_RGBD.py", line 162, in build 
    self.train_step = tf.train.AdamOptimizer(self.rate).minimize(self.loss) 
    File "C:\Users\user\AppData\Local\conda\conda\envs\tensorflow-gpu\lib\site-packages\tensorflow\python\training\optimizer.py", line 315, in minimize 
    grad_loss=grad_loss) 
    File "C:\Users\user\AppData\Local\conda\conda\envs\tensorflow-gpu\lib\site-packages\tensorflow\python\training\optimizer.py", line 386, in compute_gradients 
    colocate_gradients_with_ops=colocate_gradients_with_ops) 
    File "C:\Users\user\AppData\Local\conda\conda\envs\tensorflow-gpu\lib\site-packages\tensorflow\python\ops\gradients_impl.py", line 580, in gradients 
    in_grad.set_shape(t_in.get_shape()) 
    File "C:\Users\user\AppData\Local\conda\conda\envs\tensorflow-gpu\lib\site-packages\tensorflow\python\framework\ops.py", line 413, in set_shape 
    self._shape = self._shape.merge_with(shape) 
    File "C:\Users\user\AppData\Local\conda\conda\envs\tensorflow-gpu\lib\site-packages\tensorflow\python\framework\tensor_shape.py", line 564, in merge_with 
    (self, other)) 
ValueError: Shapes (1, 17, 17, 44) and (1, 16, 16, 44) are not compatible 

我这个隐约说明问题很抱歉,但我真的没有一个想法从哪里开始。

+1

你为什么要定义你自己的渐变?我猜想这是一个错误的来源,因为在梯度计算过程中抛出异常。 – lejlot

+0

因为据我所知,没有针对MaxPoolWithArgmax的渐变atm(https://github.com/tensorflow/tensorflow/issues/1793)。但我已经尝试了与'普通'max_pool一样的方法,而且我也遇到了同样的错误。 – Gizmo

+1

你确定有没有它在干净的运行**有相同的错误**? (人们有时只是注释掉笔记本中的行,它不会从内存中删除已注册的渐变)。由于这个错误纯粹是在梯度计算中,如果这与你的覆盖无关,那看起来就像是tf中的一个错误,这很不可能。 – lejlot

回答

1

原来是不同层次尺寸错误的问题。不幸的是,conv2d_transpose的错误消息不是很有帮助。这个贴子帮了我很多:Confused about conv2d_transpose

+1

很有意思,它并没有在网络建设期间引发任何错误。这是否涉及一些广播? – lejlot

+0

我不知道,但我相信答案是否定的。无论如何谢谢你! :) – Gizmo