2017-10-15 99 views
1

我正在试验TensorFlow(目前看起来很棒!),我正在玩一个玩具的例子1类分类问题。我生成了一些功能,如果第一个特点是高于阈值,则例子是“积极的”神经网络似乎无法学习简单的关系TensorFlow

全部代码在这里: https://gist.github.com/tnbredillet/f136c2bc40815517e0aa1139bd2060ee

的问题是,它似乎该模型是无法捕获简单的关系。 当然,我错过了很多东西(简历,正规化,批量规范化,超参数调优)等等。 但我仍然期望模型能够设法找出一个正确的? 也许这只是我的代码中的错误?

欢迎:-)

编辑任何见解:

数据生成代码:

num_examples = 100000 
split = 0.2 
num_features = 1 


def generate_input_data(num_examples, num_features): 
    features = [] 
    labels = [] 
    for i in xrange(num_examples): 
     features.append(np.random.rand(num_features) * np.random.randint(1, 10) + np.random.rand(num_features)) 
    if np.random.randint(101) > 90: 
     features[i-1][np.random.randint(num_features)] = 0 

    hard = ceil(np.sum(features[i-1])) % 2 
    easy = 0 
    if features[i-1][0] > 3: 
     easy = 1 
    labels.append(easy) 

    df = pd.concat(
    [ 
     pd.DataFrame(features), 
     pd.Series(labels).rename('labels') 
    ], 
    axis=1, 
    ) 
    return df 


def one_hot_encoding(train_df): 
    #TODO: handle categorical feature one hot encoding. 
    return 0, 0 


def scale_data(train_df, test_df): 
    categorical_columns, encoding = one_hot_encoding(train_df) 

    scaler = MinMaxScaler(feature_range=(0,1)) 

    scaler.fit(train_df.drop(['labels'], axis=1)) 

    train_df = pd.concat(
     [ 
      pd.DataFrame(scaler.transform(train_df.drop('labels', axis=1))), 
      train_df['labels'] 
     ], 
     axis=1, 
    ) 
    test_df = pd.concat(
     [ 
     pd.DataFrame(scaler.transform(test_df.drop('labels', axis=1))), 
     test_df['labels'] 
     ], 
     axis=1, 
    ) 

    return train_df, test_df 


def preprocess_data(train_df, test_df): 
    all_dfs = [train_df, test_df] 
    features = set() 
    for df in all_dfs: 
     features |= set(df.columns) 

    for df in all_dfs: 
     for f in features: 
      if f not in df.columns: 
       df[f] = 0.0 

    for df in all_dfs: 
     df.sort_index(axis=1, inplace=True) 

    train_df, test_df = scale_data(train_df, test_df) 


    train_df = shuffle(train_df).reset_index(drop=True) 

    return train_df, test_df 


def get_data(num_examples, split): 
    train_df = generate_input_data(num_examples, num_features) 
    test_df = generate_input_data(int(ceil(num_examples*split)), num_features) 
    return preprocess_data(train_df, test_df) 

def get_batch(df, batch_size, epoch): 
    start = batch_size*epoch-batch_size 
    end = batch_size*epoch 
    if end > len(df): 
     end = len(df) 
    size = end - start  
    batch_x = df.drop('labels', axis=1)[start:end].as_matrix() 
    batch_y = df['labels'][start:end].as_matrix().reshape(size, 1) 
    return batch_x, batch_y 

而且网络定义/训练和评估:

train_df, test_df = get_data(num_examples, split) 

n_hidden_1 = 8 
n_hidden_2 = 4 
learning_rate = 0.01 
batch_size = 500 
num_epochs = 200 
display_epoch = 50 

def neural_net(x): 
    layer_1 = tf.add(tf.matmul(x, weights['h1']), biases['b1']) 
    layer_2 = tf.add(tf.matmul(layer_1, weights['h2']), biases['b2']) 
    out_layer = tf.matmul(layer_2, weights['out']) + biases['out'] 
    return out_layer 

weights = { 
    'h1': tf.Variable(tf.random_normal([num_features, n_hidden_1])), 
    'h2': tf.Variable(tf.random_normal([n_hidden_1, n_hidden_2])), 
    'out': tf.Variable(tf.random_normal([n_hidden_2, 1])) 
} 
biases = { 
    'b1': tf.Variable(tf.random_normal([n_hidden_1])), 
    'b2': tf.Variable(tf.random_normal([n_hidden_2])), 
    'out': tf.Variable(tf.random_normal([1])) 
} 

X = tf.placeholder(tf.float32, shape=(None, num_features)) 
Y = tf.placeholder(tf.float32, shape=(None, 1))  

logits = neural_net(X) 

loss_op =   tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=logits, labels=Y)) 
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate) 
train_op = optimizer.minimize(loss_op) 

predictions = tf.sigmoid(logits) 
predicted_class = tf.greater(predictions, 0.5) 
correct = tf.equal(predicted_class, tf.equal(Y,1.0)) 
accuracy = tf.reduce_mean(tf.cast(correct, 'float')) 

with tf.Session() as sess: 

    sess.run(tf.global_variables_initializer()) 
    sess.run(tf.local_variables_initializer()) 

    for epoch in range(1, num_epochs + 1): 
     batch_x, batch_y = get_batch(train_df, batch_size, epoch) 
     sess.run(train_op, feed_dict={X: batch_x, Y: batch_y}) 
     if epoch % display_epoch == 0 or epoch == 1: 
      loss, acc , pred, fff= sess.run([loss_op, accuracy, predictions, logits], 
              feed_dict={X: batch_x, 
                 Y: batch_y}) 
      c = ', '.join('{}={}'.format(*t) for t in zip(pred, batch_y)) 
      print("[{}] Batch loss={:.4f}, Accuracy={:.5f}, Logits vs labels= {}".format(epoch, loss, acc, c)) 


    print("Optimization Finished!") 

    batch_x, batch_y = get_batch(test_df, batch_size, 1) 
    print("Testing Accuracy:", \ 
    sess.run(accuracy, feed_dict={X: batch_x, 
            Y: batch_y})) 

最终输出:

[1] Batch loss=3.2160, Accuracy=0.41000 
[50] Batch loss=0.6661, Accuracy=0.61800 
[100] Batch loss=0.6472, Accuracy=0.65200 
[150] Batch loss=0.6538, Accuracy=0.64000 
[200] Batch loss=0.6508, Accuracy=0.64400 
Optimization Finished! 
('Testing Accuracy:', 0.63999999) 

回答

2

在这种情况下,它不是一个机器学习算法问题,而是数据生成中的一个错误,它正在搅乱你想要的关系。在这个函数:

def generate_input_data(num_examples, num_features): 
    features = [] 
    labels = [] 
    for i in xrange(num_examples): 
     features.append(np.random.rand(num_features) * np.random.randint(1, 10) + np.random.rand(num_features)) 
    if np.random.randint(101) > 90: 
     features[i-1][np.random.randint(num_features)] = 0 

    hard = ceil(np.sum(features[i-1])) % 2 
    easy = 0 
    if features[i-1][0] > 3: 
     easy = 1 
    labels.append(easy) 

    df = pd.concat(
    [ 
     pd.DataFrame(features), 
     pd.Series(labels).rename('labels') 
    ], 
    axis=1, 
    ) 
    return df 

你被i-1索引功能,以确定标签。但是,xrange将生成从0开始的数字,因此您不需要减去1。事实上,当你这样做的时候,这种关系变得接近于随机性,并且基本上是不可预测的,所以即使你的模型的其余部分是好的,它也不能很好地得分。

因此,您需要用i来代替索引,例如, if features[i][0] > 3