2017-06-23 58 views
1

这是我第一次使用Tensorflow。这是执行回归的基本MLP。该代码是从标准MNIST分类修饰:Tensorflow:用于回归的MLP显示测试集的相同预测值

https://github.com/aymericdamien/TensorFlow-Examples/blob/master/examples/3_NeuralNetworks/multilayer_perceptron.py

我只改变了输入,输出,超参数,成本函数来

cost = tf.reduce_mean(tf.square(pred-y)) 

和out_layer

out = tf.sigmoid(out_layer) 
之后加入此

我正在训练4440个具有5个功能的输入数据并在2956个数据上进行测试。在第三纪元之后,所有的值对于训练组都是相同的。问题是对于测试集,我得到了相同的预测值。

Training started... 

Epoch 1 

Loss= 0.001181 , y_pred= 0.485037 , y_actual= 0.450664 
Loss= 0.014749 , y_pred= 0.206193 , y_actual= 0.32764 
Loss= 0.000000 , y_pred= 0.323003 , y_actual= 0.323016 
Loss= 0.028031 , y_pred= 0.276502 , y_actual= 0.109078 
Loss= 0.024109 , y_pred= 0.283097 , y_actual= 0.127827 
Loss= 0.000688 , y_pred= 0.222412 , y_actual= 0.196174 
Loss= 0.022695 , y_pred= 0.285257 , y_actual= 0.13461 
Loss= 0.043803 , y_pred= 0.228042 , y_actual= 0.437334 
Loss= 0.002999 , y_pred= 0.251055 , y_actual= 0.30582 
Epoch 2 

Loss= 0.041213 , y_pred= 0.247654 , y_actual= 0.450664 
Loss= 0.005612 , y_pred= 0.252729 , y_actual= 0.32764 
Loss= 0.001075 , y_pred= 0.29023 , y_actual= 0.323016 
Loss= 0.018882 , y_pred= 0.246489 , y_actual= 0.109078 
Loss= 0.018060 , y_pred= 0.262215 , y_actual= 0.127827 
Loss= 0.001204 , y_pred= 0.23087 , y_actual= 0.196174 
Loss= 0.018622 , y_pred= 0.271072 , y_actual= 0.13461 
Loss= 0.038593 , y_pred= 0.240883 , y_actual= 0.437334 
Loss= 0.002938 , y_pred= 0.251615 , y_actual= 0.30582 
Epoch 3 

Loss= 0.041822 , y_pred= 0.24616 , y_actual= 0.450664 
Loss= 0.005700 , y_pred= 0.252141 , y_actual= 0.32764 
Loss= 0.001073 , y_pred= 0.29026 , y_actual= 0.323016 
Loss= 0.018882 , y_pred= 0.24649 , y_actual= 0.109078 
Loss= 0.018059 , y_pred= 0.26221 , y_actual= 0.127827 
Loss= 0.001203 , y_pred= 0.230861 , y_actual= 0.196174 
Loss= 0.018622 , y_pred= 0.271074 , y_actual= 0.13461 
Loss= 0.038595 , y_pred= 0.240879 , y_actual= 0.437334 
Loss= 0.002938 , y_pred= 0.251613 , y_actual= 0.30582 
Epoch 4 

Loss= 0.041822 , y_pred= 0.24616 , y_actual= 0.450664 
Loss= 0.005700 , y_pred= 0.252141 , y_actual= 0.32764 
Loss= 0.001073 , y_pred= 0.29026 , y_actual= 0.323016 
Loss= 0.018882 , y_pred= 0.24649 , y_actual= 0.109078 
Loss= 0.018059 , y_pred= 0.26221 , y_actual= 0.127827 
Loss= 0.001203 , y_pred= 0.23086 , y_actual= 0.196174 
Loss= 0.018623 , y_pred= 0.271074 , y_actual= 0.13461 
Loss= 0.038595 , y_pred= 0.240879 , y_actual= 0.437334 
Loss= 0.002938 , y_pred= 0.251613 , y_actual= 0.30582 

Training Finished! 

Testing started... 

Loss= 0.010336 , y_pred= 0.246348 , y_actual= 0.348012 
Loss= 0.123387 , y_pred= 0.246348 , y_actual= 0.597613 
Loss= 0.005033 , y_pred= 0.246348 , y_actual= 0.175401 
Loss= 0.022147 , y_pred= 0.246348 , y_actual= 0.0975305 
Loss= 0.004484 , y_pred= 0.246348 , y_actual= 0.313307 
Loss= 0.010506 , y_pred= 0.246348 , y_actual= 0.348845 
Loss= 0.000052 , y_pred= 0.246348 , y_actual= 0.239131 

我已经尝试了各种帖子提供的所有可能的解决方案,描述相同的问题。就像数据被混洗和归一化一样,y和pred的维数是相同的。

1)TensorFlow always converging to same output for all items after training

2)MLP in tensorflow for regression... not converging

3)tensorflow deep neural network for regression always predict same results in one batch

下面是代码。非常感谢。

# In[67]: 

import tensorflow as tf 
import numpy as np 


# In[68]: 

# Parameters 
learning_rate = 0.01 
epoch = 1 
dropout = 0.75 
# Network Parameters 
n_hidden_1 = 256 # 1st layer number of features 
n_hidden_2 = 256 # 2nd layer number of features 
n_hidden_3 = 256 
n_hidden_4 = 256 
n_input = 5 
n_val = 1 

train_set = 4440 

# tf Graph input 
x = tf.placeholder("float", [None, n_input], name = "x") 
y = tf.placeholder("float", [None, n_val], name = "y") 
# keep_prob = tf.placeholder(tf.float32) #dropout (keep probability) 


# In[69]: 

# Create model 
def multilayer_perceptron(x, weights, biases): 

    # Hidden layer with RELU activation 
    layer_1 = tf.add(tf.matmul(x, weights['h1']), biases['b1']) 
    layer_1 = tf.nn.relu(layer_1) 

    # Hidden layer with RELU activation 
    layer_2 = tf.add(tf.matmul(layer_1, weights['h2']), biases['b2']) 
    layer_2 = tf.nn.relu(layer_2) 

    # Hidden layer with RELU activation 
    layer_3 = tf.add(tf.matmul(layer_2, weights['h3']), biases['b3']) 
    layer_3 = tf.nn.relu(layer_3) 

    # Hidden layer with RELU activation 
    layer_4 = tf.add(tf.matmul(layer_3, weights['h4']), biases['b4']) 
    layer_4 = tf.nn.relu(layer_4) 

    # Output layer with linear activation 
    out_layer = tf.matmul(layer_4, weights['out']) + biases['out'] 
    out = tf.sigmoid(out_layer) 
    return out 


# In[70]: 

# Store layers weight & bias 
weights = { 
    'h1': tf.Variable(tf.random_normal([n_input, n_hidden_1], mean=0.0, stddev=0.01 ,dtype=tf.float32, name = "h1")), 
    'h2': tf.Variable(tf.random_normal([n_hidden_1, n_hidden_2], mean=0.0, stddev=0.01 ,dtype=tf.float32, name = "h2")), 
    'h3': tf.Variable(tf.random_normal([n_hidden_2, n_hidden_3], mean=0.0, stddev=0.01 ,dtype=tf.float32,name = "h3")), 
    'h4': tf.Variable(tf.random_normal([n_hidden_3, n_hidden_4], mean=0.0, stddev=0.01 ,dtype=tf.float32,name = "h4")), 
    'out': tf.Variable(tf.random_normal([n_hidden_4, n_val], mean=0.0, stddev=0.01 ,dtype=tf.float32,name = "out")) 
} 
biases = { 
    'b1': tf.Variable(tf.random_normal([n_hidden_1], mean=0.0, stddev=0.01 ,dtype=tf.float32,name = "b1")), 
    'b2': tf.Variable(tf.random_normal([n_hidden_2], mean=0.0, stddev=0.01 ,dtype=tf.float32,name = "b2")), 
    'b3': tf.Variable(tf.random_normal([n_hidden_3], mean=0.0, stddev=0.01 ,dtype=tf.float32,name = "b3")), 
    'b4': tf.Variable(tf.random_normal([n_hidden_4], mean=0.0, stddev=0.01 ,dtype=tf.float32,name = "b4")), 
    'out': tf.Variable(tf.random_normal([n_val], mean=0.0, stddev=0.01 ,dtype=tf.float32,name = "out")) 
} 
# Construct model 
pred = multilayer_perceptron(x, weights, biases) 
# pred = tf.transpose(pred) 

# Define loss and optimizer 
cost = tf.reduce_mean(tf.square(pred-y)) 
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost) 

# Initializing the variables 
init = tf.global_variables_initializer() 


# In[71]: 

# Launch the graph 
with tf.Session() as sess: 
    sess.run(init) 
    # Training 
    print "Training started...\n" 

    for ep in range(1,epoch+1): 

     print "Epoch",ep 
     print 
     num = 0 
     with open('norm_rand_feature_y.csv') as f: 

      for line in f: 

       data = line.split(",") 
       x_temp = data[0:5] 
       y_temp = data[5] 

       x_temp = np.asarray(x_temp) 
       x_temp = x_temp.reshape(1,x_temp.shape[0]) 
       x_temp = x_temp.astype(np.float32) 

       y_temp = np.asarray(y_temp) 
       y_temp = y_temp.reshape(1,1) 
       y_temp = y_temp.astype(np.float32) 

       sess.run(optimizer, feed_dict={x: x_temp, y: y_temp}) 

       loss,y_pre = sess.run([cost,pred], feed_dict={x: x_temp, 
                y: y_temp}) 

#     print tuple(pred.get_shape().as_list()) 
#     print y.shape 



       if num%500 == 0: 
        print "Loss= " + "{:.6f}".format(loss), ", y_pred=",y_pre[0][0], ", y_actual=",y_temp[0][0] 

       num = num+1 
       if num == train_set: 
        break 

#  variables_names =[v.name for v in tf.trainable_variables()] 
#  values = sess.run(variables_names) 
#  for k,v in zip(variables_names, values): 
#   print(k, v) 

#  print sess.run("h1", feed_dict={x: x_temp,y: y_temp, keep_prob:1.0}) 
    print "Training Finished!\n" 

    #Testing 
    y_value = list() 
    y_actual = list() 
    error = 0 
    num=0 
    print "Testing started...\n" 
    with open('norm_rand_feature_y.csv') as f: 

      for j in range(train_set): 
       f.next() 

      for line in f: 

       data = line.split(",") 
       x_temp = data[0:5] 
       y_temp = float(data[5]) 

       x_temp = np.asarray(x_temp) 
       x_temp = x_temp.astype(np.float32) 
       x_temp = x_temp.reshape(1,x_temp.shape[0]) 

       y_temp = np.asarray(y_temp) 
       y_temp = y_temp.reshape(1,1) 
       y_temp = y_temp.astype(np.float32) 

       loss = sess.run(cost, feed_dict={x: x_temp, y:y_temp}) 


       y_pred = sess.run(pred, feed_dict={x: x_temp}) 

       print "Loss= " + "{:.6f}".format(loss), ", y_pred=",y_pre[0][0], ", y_actual=",y_temp[0][0] 

       y_value.append(y_pred[0][0]) 
       y_actual.append(y_temp) 
       error = error + abs(y_pred[0][0] - y_temp) 

#     num = num+1 
#     if num == 100: 
#      break 

    print 
    print "Testing Finished!\n" 
    error = error/(7396-train_set+1) 
    print "Total error:",error[0][0] 
    y_row = zip(y_value,y_actual) 
    np.savetxt("test_y_mlp.csv", y_row, delimiter=",") 

回答

0

我会尝试先:

  • 尝试学习速率播放。特别是因为批量大小为1,可能太高。
  • 增加批量大小(在你的情况下,由一批养活你的数据,有16个批次,也许开始)
  • 一个简单的方法来测试,如果你的实现是正确的是尝试过拟合数据的一个非常小的量。取10个样本并做1000次迭代,你应该可以有非常低的损失(至少1E-6)
+1

1)我尝试将学习速率从0.1改为1e-8。 2)形成一批批量16,3)对于学习速率1e-2到1e-4,在10个样本上进行1000次迭代,损失小于1e-6。仍然预测值是相同的。还有其他建议吗?谢谢。 –