1

我有一个两层CNN与以下建筑的理解:在这个CNN降维,似乎是想对我的理论

enter image description here

下面是这个架构中tensorflow表示:

import os 
import tensorflow as tf 
import sys 
import urllib 
import numpy as np 
import random 
from sklearn.preprocessing import OneHotEncoder 
from PIL import Image 
import glob 
train = [] 
for filename in glob.glob('/Users/madhavthaker/Documents/CSCI63/Final Project/face-emoticon-master/data/ck+_scaled/*.png'): #assuming gif 
    img=np.asarray(Image.open(filename)) 
    img_flat = img.reshape(img.size) 
    train.append(img_flat) 

if sys.version_info[0] >= 3: 
    from urllib.request import urlretrieve 
else: 
    from urllib import urlretrieve 

LOGDIR = 'log3/' 
GITHUB_URL ='https://raw.githubusercontent.com/mamcgrath/TensorBoard-TF-Dev-Summit-Tutorial/master/' 

### MNIST EMBEDDINGS ### 
ckp_labels = [5, 0, 3, 5, 4, 0, 1, 3, 5, 4, 0, 3, 5, 0, 1, 5, 4, 0, 0, 0, 2, 1, 3, 5, 0, 3, 5, 1, 3, 5, 0, 3, 5, 4, 0, 3, 5, 3, 1, 1, 0, 4, 5, 2, 1, 5, 3, 5, 1, 5, 3, 1, 5, 1, 5, 0, 1, 5, 3, 5, 1, 3, 0, 1, 5, 2, 3, 1, 5, 3, 1, 3, 1, 5, 3, 2, 5, 3, 1, 5, 3, 4, 0, 5, 0, 3, 1, 3, 2, 5, 1, 3, 5, 1, 5, 4, 0, 3, 1, 5, 1, 2, 5, 1, 3, 5, 3, 5, 1, 3, 5, 5, 3, 1, 1, 3, 4, 1, 5, 4, 1, 5, 0, 1, 3, 5, 2, 3, 5, 5, 3, 5, 1, 0, 1, 5, 3, 0, 5, 1, 0, 3, 5, 0, 3, 5, 3, 1, 4, 5, 1, 3, 5, 1, 3, 1, 3, 5, 1, 5, 0, 3, 5, 1, 1, 4, 1, 5, 1, 4, 1, 0, 1, 3, 5, 5, 0, 1, 0, 5, 4, 0, 5, 3, 5, 3, 5, 1, 3, 5, 2, 0, 5, 2, 0, 5, 2, 3, 4, 3, 2, 5, 1, 5, 0, 3, 0, 1, 3, 5, 0, 1, 3, 5, 0, 4, 3, 3, 1, 4, 2, 1, 3, 5, 5, 3, 0, 3, 1, 5, 5, 0, 3, 5, 3, 2, 5, 3, 4, 7, 7, 7, 7, 7, 7, 7, 7, 0, 2, 4, 0, 7, 2, 0, 7, 0, 7, 2, 4, 4, 0, 2, 4, 7, 2] 
labels_test = np.array(ckp_labels).reshape(-1,1) 

enc = OneHotEncoder() 
enc.fit(labels_test) 
labels_final = enc.transform(labels_test).toarray() 

train = np.asarray(train) 

# Add convolution layer 
def conv_layer(input, size_in, size_out, name="conv"): 
    with tf.name_scope(name): 
    #w = tf.Variable(tf.zeros([5, 5, size_in, size_out]), name="W") 
    #b = tf.Variable(tf.zeros([size_out]), name="B") 
    w = tf.Variable(tf.truncated_normal([17, 17, size_in, size_out], stddev=0.1), name="W") 
    b = tf.Variable(tf.constant(0.1, shape=[size_out]), name="B") 
    conv = tf.nn.conv2d(input, w, strides=[1, 1, 1, 1], padding="SAME") 
    act = tf.nn.relu(conv + b) 
    tf.summary.histogram("weights", w) 
    tf.summary.histogram("biases", b) 
    tf.summary.histogram("activations", act) 
    return tf.nn.max_pool(act, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="SAME") 

# Add fully connected layer 
def fc_layer(input, size_in, size_out, name="fc"): 
    with tf.name_scope(name): 
    w = tf.Variable(tf.truncated_normal([size_in, size_out], stddev=0.1), name="W") 
    b = tf.Variable(tf.constant(0.1, shape=[size_out]), name="B") 
    act = tf.nn.relu(tf.matmul(input, w) + b) 
    tf.summary.histogram("weights", w) 
    tf.summary.histogram("biases", b) 
    tf.summary.histogram("activations", act) 
    return act 


def mnist_model(learning_rate, use_two_conv, use_two_fc, hparam): 

    tf.reset_default_graph() 
    tf.set_random_seed(1) 
    sess = tf.Session() 

    # Setup placeholders, and reshape the data 
    x = tf.placeholder(tf.float32, shape=[None, 256*256], name="x") 
    x_image = tf.reshape(x, [-1, 256, 256, 1]) 
    tf.summary.image('input', x_image, 3) 
    y = tf.placeholder(tf.float32, shape=[None, 7], name="labels") 

    if use_two_conv: 
    conv1 = conv_layer(x_image, 1, 32, "conv1") 
    conv_out = conv_layer(conv1, 32, 64, "conv2") 
    else: 
    conv1 = conv_layer(x_image, 1, 64, "conv") 
    conv_out = tf.nn.max_pool(conv1, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="SAME") #adding padding "VALID" means no padding 
    flattened = tf.reshape(conv_out, [-1, 55 * 55 * 64]) 


    if use_two_fc: 
    fc1 = fc_layer(flattened, 55 * 55 * 64, 40, "fc1") 
    embedding_input = fc1 
    embedding_size = 40 
    logits = fc_layer(fc1, 40, 7, "fc2") 
    else: 
    embedding_input = flattened 
    embedding_size = 7*7*64 
    logits = fc_layer(flattened, 7*7*64, 10, "fc") 

    with tf.name_scope("xent"): 
    xent = tf.reduce_mean(
     tf.nn.softmax_cross_entropy_with_logits(
      logits=logits, labels=y), name="xent") 
    tf.summary.scalar("xent", xent) 

    with tf.name_scope("train"): 
    train_step = tf.train.AdamOptimizer(learning_rate).minimize(xent) 

    with tf.name_scope("accuracy"): 
    correct_prediction = tf.equal(tf.argmax(logits, -1), tf.argmax(y, -1)) 
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) 
    tf.summary.scalar("accuracy", accuracy) 

    summ = tf.summary.merge_all() 


    embedding = tf.Variable(tf.zeros([1024, embedding_size]), name="test_embedding") 
    assignment = embedding.assign(embedding_input) 
    saver = tf.train.Saver() 

    sess.run(tf.global_variables_initializer()) 
    writer = tf.summary.FileWriter(LOGDIR + hparam) 
    writer.add_graph(sess.graph) 

    config = tf.contrib.tensorboard.plugins.projector.ProjectorConfig() 
    embedding_config = config.embeddings.add() 
    embedding_config.tensor_name = embedding.name 
    embedding_config.sprite.image_path = LOGDIR + 'sprite_1024.png' 
    embedding_config.metadata_path = LOGDIR + 'labels_1024.tsv' 
    # Specify the width and height of a single thumbnail. 
    embedding_config.sprite.single_image_dim.extend([256, 256]) 
    tf.contrib.tensorboard.plugins.projector.visualize_embeddings(writer, config) 

    for i in range(300): 
    batch_index = random.sample(range(0,100),25) 

    if i % 5 == 0: 
     [train_accuracy, s] = sess.run([accuracy, summ], feed_dict={x: train[batch_index], y: labels_final[batch_index]}) 
     writer.add_summary(s, i) 
     print ("train accuracy:", train_accuracy) 
    sess.run(train_step, feed_dict={x: train[batch_index], y: labels_final[batch_index]}) 

def make_hparam_string(learning_rate, use_two_fc, use_two_conv): 
    conv_param = "conv2" if use_two_conv else "conv1" 
    fc_param = "fc2" if use_two_fc else "fc1" 
    return "lr_%.0E%s%s" % (learning_rate, conv_param, fc_param) 

def main(): 
    # You can try adding some more learning rates 
    #for learning_rate in [1E-3, 1E-4, 1E-5]: 
    for learning_rate in [1E-4]: 

    # Include "False" as a value to try different model architectures 
    #for use_two_fc in [True, False]: 
    for use_two_fc in [True]: 
     #for use_two_conv in [True, False]: 
     for use_two_conv in [True]: 
     # Construct a hyperparameter string for each one (example: "lr_1E-3fc2conv2") 
     hparam = make_hparam_string(learning_rate, use_two_fc, use_two_conv) 
     print('Starting run for %s' % hparam) 
     sys.stdout.flush() # this forces print-ed lines to show up. 

     # Actually run with the new settings 
     mnist_model(learning_rate, use_two_fc, use_two_conv, hparam) 


if __name__ == '__main__': 
    main() 

根据我的数学,一切都检查出来,但是当我运行代码时,出现以下错误:

InvalidArgumentError (see above for traceback): Input to reshape is a tensor with 6553600 values, but the requested shape requires a multiple of 193600 

我得到的错误在这行代码:

flattened = tf.reshape(conv_out, [-1, 55 * 55 * 64]) 

我真的不知道这是为什么发生。我的数学有问题,因为他们认为conv_out的展平尺寸应该是[-1, 64*64*64]

任何帮助将不胜感激。让我知道你是否需要更多信息。

+0

您能否更明确地知道发生此错误的位置以及您期望的尺寸是什么?鉴于大量代码的问题,我遇到了困难。 –

+0

当然,我已经添加了引发此错误的行。预期的尺寸是该行代码中的输入。我预计[55,55,64]是尺寸,但[64,64,64]的作品。不知道为什么 – madsthaks

回答

1

它看起来像我错误地计算了每个卷积/合并图层输出的大小。以下是你如何解决这个问题。我提炼你的代码下降到只有这个:

import tensorflow as tf 
import numpy as np 

def conv_layer(input, size_in, size_out, name="conv"): 
    with tf.name_scope(name): 
    w = tf.Variable(tf.truncated_normal([17, 17, size_in, size_out], stddev=0.1), name="W") 
    b = tf.Variable(tf.constant(0.1, shape=[size_out]), name="B") 
    conv = tf.nn.conv2d(input, w, strides=[1, 1, 1, 1], padding="VALID") 
    act = tf.nn.relu(conv + b) 
    return tf.nn.max_pool(act, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="SAME") 


# Setup placeholders, and reshape the data 
x = tf.placeholder(tf.float32, shape=[None, 256*256], name="x") 
x_image = tf.reshape(x, [-1, 256, 256, 1]) 

conv1 = conv_layer(x_image, 1, 32, "conv1") 
conv_out = conv_layer(conv1, 32, 64, "conv2") 

flattened = tf.reshape(conv_out, [-1, 55 * 55 * 64]) 

sess.run(tf.global_variables_initializer()) 
print(sess.run(tf.shape(conv1), {x: np.zeros([1, 256*256])})) 

此代码提要正确的形状的输入为零,并使用tf.shape()计算conv1输出的形状。我回到:

[ 1 128 128 32]

这不符合您的计算数字。

我怀疑你错误地计算了填充,但很难说如果不知道你是如何得到顶部表格中的数字的话。如果没有别的,第一个卷积有填充SAME和步幅1,所以输入和输出将具有相同的空间维度。

希望这会有所帮助!

相关问题