4

我想训练一个卷积网络输出一个数字0-100。但是很快,模型会停止更新权重,并且只有完全连接图层中的偏差发生变化。我无法理解为什么。权重张量流不训练(只偏差变化)

图片: enter image description here

我用不同的层数等等玩耍了,但我始终只对FC偏见改变了同样的问题运行。

这是当前测试的代码。我已经剥去了像辍学这样的东西。过度贴合在这个时刻不是一个问题。其实,我想尝试在适合的数据只是这样我就可以看到我的模型学习什么

from __future__ import print_function 

import tensorflow as tf 

from tensorflow.examples.tutorials.mnist import input_data 
import matplotlib.pyplot as plt 
import matplotlib.image as mpimg 


################################################################################### 
############################# Read Data ########################################### 

with tf.name_scope("READ_DATA"): 

    def read_my_file_format(filename_queue): 
    reader = tf.WholeFileReader() 
    key, record_string = reader.read(filename_queue) 
    split_res = tf.string_split([key],'_') 
    key = split_res.values[5] 
    example = tf.image.decode_png(record_string) 
    example = tf.image.rgb_to_grayscale(example, name=None) 

    processed_example = resize_img(example) 
    processed_example = reshape_img(processed_example) 
    return processed_example, key 


    def resize_img(imgg): 
    return tf.image.resize_images(imgg,[102,525]) 

    def reshape_img(imgg): 
    return tf.reshape(imgg,shape=[102,525,1]) 


    def input_pipeline(bsize=30, num_epochs=None): 
    filename_queue = tf.train.string_input_producer(
     tf.train.match_filenames_once("./png_imgs/*.png"), num_epochs=num_epochs, shuffle=True) 
    example, label = read_my_file_format(filename_queue) 

    min_after_dequeue = bsize 
    capacity = min_after_dequeue + 3 * 8 

    example_batch, label_batch = tf.train.shuffle_batch(
     [example, label], batch_size=bsize, capacity=capacity, 
     min_after_dequeue=min_after_dequeue) 
    return example_batch, label_batch 

    imb_batch1,label_batch1 = input_pipeline() 

    single_img, single_lbl = input_pipeline(bsize=1) 

############################# Read Data ########################################### 
################################################################################### 



# Parameters 
#learning_rate = 0.0001 
training_iters = 200000 
batch_size = 30 

# Network Parameters 
n_input = 600*300*3 
n_classes = 1 # MNIST total classes (0-9 digits) 
dropout = 0.75 # Dropout, probability to keep units 

# tf Graph input 
x = tf.placeholder(tf.float32, [None, 102,525,1]) 
y = tf.placeholder(tf.float32, [None, 1]) 
keep_prob = tf.placeholder(tf.float32) #dropout (keep probability) 
learning_rate = tf.placeholder(tf.float32) 


# Create some wrappers for simplicity 
def conv2d(x, W, b, strides=1): 
    # Conv2D wrapper, with bias and relu activation 
    x = tf.nn.conv2d(x, W, strides=[1, strides, strides, 1], padding='SAME') 
    x = tf.nn.bias_add(x, b) 
    return tf.nn.relu(x) 


def maxpool2d(x, k=2): 
    # MaxPool2D wrapper 
    return tf.nn.max_pool(x, ksize=[1, k, k, 1], strides=[1, k, k, 1], 
          padding='SAME') 


# Create model 
def conv_net(x, dropout): 

    # Convolution Layer 
    with tf.variable_scope('conv1') as scope: 
     w = tf.get_variable('weights',[5,5,1,32], initializer=tf.contrib.layers.xavier_initializer()) 
     b = tf.get_variable('biases',[32],initializer=tf.random_normal_initializer()) 
     conv1 = conv2d(x,w,b) 
     tf.summary.histogram('weights',w) 
     tf.summary.histogram('biases',b) 

    with tf.variable_scope('conv2') as scope: 
     w = tf.get_variable('weights',[5,5,32,32], initializer=tf.contrib.layers.xavier_initializer()) 
     b = tf.get_variable('biases',[32],initializer=tf.random_normal_initializer()) 
     conv2 = conv2d(conv1,w,b) 
     tf.summary.histogram('weights',w) 
     tf.summary.histogram('biases',b) 

    with tf.name_scope("Maxpool"): 
     conv2 = maxpool2d(conv2,k=2) 

    with tf.variable_scope('FC1') as scope: 
     w = tf.get_variable('weights',[32*263*51,64], initializer=tf.contrib.layers.xavier_initializer()) 
     b = tf.get_variable('biases',[64],initializer=tf.random_normal_initializer()) 
     FC1 = tf.reshape(conv2,[-1,w.get_shape().as_list()[0]]) 
     FC1 = tf.add(tf.matmul(FC1,w),b) 
     tf.summary.histogram('weights',w) 
     tf.summary.histogram('biases',b) 


    with tf.variable_scope('FC2') as scope: 
     w = tf.get_variable('weights',[64,1], initializer=tf.contrib.layers.xavier_initializer()) 
     b = tf.get_variable('biases',[1],initializer=tf.random_normal_initializer()) 
     FC2 = tf.add(tf.matmul(FC1,w),b) 
     tf.summary.histogram('weights',w) 
     tf.summary.histogram('biases',b) 

    return FC2 


# Construct model 
pred = conv_net(x, keep_prob) 

def cost(): 
    with tf.name_scope("Cost"): 

    diff = tf.abs(tf.subtract(y,pred)) 
    cost=tf.reduce_mean(diff) 
    print(cost) 
    tf.summary.histogram('Label',y) 
    tf.summary.histogram('predicted',pred) 
    tf.summary.scalar('cost',cost) 
    return cost 

with tf.name_scope("Optimizer"): 
    optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost()) 
# optimizer = tf.train.RMSPropOptimizer(learning_rate=learning_rate).minimize(cost()) 


# Initializing the variables 
saver = tf.train.Saver() 
init = tf.global_variables_initializer() 
merged = tf.summary.merge_all() 


# Launch the graph 
with tf.Session() as sess: 

    sess.run(init) 

    coord = tf.train.Coordinator() 
    threads = tf.train.start_queue_runners(coord=coord) 
    writer = tf.summary.FileWriter("/tmp/tensorboard/log01") 
    writer.add_graph(sess.graph) 
    step = 1 
    l_rate= 0.1 

    # Keep training until reach max iterations 
    while step * batch_size < training_iters: 
     print("step: ",step) 
     batch_x, batch_y = sess.run([imb_batch1,label_batch1]) 

     batch_y = batch_y.reshape(-1,1) 
     if step % 100 == 0 : 
      l_rate = l_rate/5 

     if l_rate < 0.000001 : 
      l_rate= 0.000001 

     if step > 20: 
      _,sumry = sess.run([optimizer,merged], feed_dict={x: batch_x, y: batch_y, 
             keep_prob: dropout, learning_rate: l_rate}) 
      writer.add_summary(sumry,step) 
     else : 
      sess.run(optimizer, feed_dict={x: batch_x, y: batch_y, 
             keep_prob: dropout, learning_rate: l_rate}) 


     step += 1 

    print("Training Done!") 



    coord.request_stop() 
    coord.join(threads) 

是否有一个愚蠢的错误在什么地方造成这种代码?

+0

我的预感:对我来说,这可能表明学习率太高。由于在训练过程中您正在改变学习速度......也许您可以尝试使用恒定的LR并查看问题是否仍然存在? – jjmontes

+0

@jjmontes,是的,我试过不断的学习率,大的和小的,但问题仍然存在。我的希望是以较大的LR开始,因为权重会被大的LR“震惊”,从而产生更积极的变化,稍后更精细的LR会调整。但没有这样的运气。 – Simmeman

+0

您是否尝试将xavier_initializer()切换为正常的? – Dotan

回答

0

在第一个完全连接的层中没有非线性,所以它没有增加与只有一个完全连接的层相关的值。

相关问题