我已经在Theano中基于在Lasagne中的mnist.py示例构建了一个DNN。 我试图训练第一由单个隐藏层由神经网络,定义为Python,Theano - ValueError:输入尺寸不匹配
def build_first_auto(input_var=None):
l_input=lasagne.layers.InputLayer(shape=(None, 1, 48, 1), input_var=input_var)
l_hidden1=lasagne.layers.DenseLayer(l_input,num_units=256,nonlinearity=lasagne.nonlinearities.sigmoid,W=lasagne.init.GlorotUniform())
return l_hidden1
这里面
from load_dataset import load_dataset
from build_DNNs import build_first_auto
import sys
import os
import time
import numpy as np
from numpy import linalg as LA
import theano
import theano.tensor as T
import lasagne
import scipy.io as sio
def iterate_minibatches(inputs, targets, batchsize, shuffle=False):
assert len(inputs) == len(targets)
if shuffle:
indices = np.arange(len(inputs))
np.random.shuffle(indices)
for start_idx in range(0, len(inputs) - batchsize + 1, batchsize):
if shuffle:
excerpt = indices[start_idx:start_idx + batchsize]
else:
excerpt = slice(start_idx, start_idx + batchsize)
yield inputs[excerpt], targets[excerpt]
def train_autoencoder(num_epochs):
Xtrain, ytrain = load_dataset()
# Prepare Theano variables for inputs and targets
input_var = T.tensor4('inputs')
target_var = T.matrix('targets')
# Create neural network model
network = build_first_auto(input_var)
prediction = lasagne.layers.get_output(network)
params = lasagne.layers.get_all_params(network, trainable=True)
loss = lasagne.objectives.binary_crossentropy(prediction, target_var)
loss = loss.mean()
updates = lasagne.updates.nesterov_momentum(loss, params, learning_rate=0.01, momentum=0.9)
np.save('params', params)
#Monitoring the training
test_prediction = lasagne.layers.get_output(network, deterministic=True)
test_loss = lasagne.objectives.categorical_crossentropy(test_prediction,target_var)
test_loss = test_loss.mean()
test_acc = T.mean(T.eq(T.argmax(test_prediction,axis=1),target_var),dtype=theano.config.floatX)
#Compile
train_fn = theano.function([input_var, target_var], loss, updates=updates, on_unused_input='ignore') #on_unused_input='ignore'
# Compile a second function computing the validation loss and accuracy:
val_fn = theano.function([input_var, target_var], [test_loss, test_acc])
#Training
print("Starting training...")
for epoch in range(num_epochs):
# In each epoch, we do a full pass over the training data:
train_err = 0
train_batches = 0
start_time = time.time()
for batch in iterate_minibatches(Xtrain, ytrain, 30821, shuffle=True):
inputs, targets = batch
train_err += train_fn(inputs, targets)
train_batches += 1
# And a full pass over the validation data:
val_err = 0
val_acc = 0
val_batches = 0
for batch in iterate_minibatches(Xtrain, ytrain, 30821, shuffle=False):
inputs, targets = batch
err, acc = val_fn(inputs, targets)
val_err += err
val_acc += acc
val_batches += 1
# Then we print the results for this epoch:
print("Epoch {} of {} took {:.3f}s".format(
epoch + 1, num_epochs, time.time() - start_time))
print(" training loss:\t\t{:.6f}".format(train_err/train_batches))
print(" validation loss:\t\t{:.6f}".format(val_err/val_batches))
print(" validation accuracy:\t\t{:.2f} %".format(
val_acc/val_batches * 100))
的损失函数采用的是二进制交叉熵。的问题是,我正在与该阵列的尺寸的误差:
ValueError: Input dimension mis-match. (input[1].shape[1] = 1, input[3].shape[1] = 256)
Apply node that caused the error: Elemwise{Composite{(((i0 * i1 * (i2 - scalar_sigmoid(i3)))/i4) - ((i0 * i5 * scalar_sigmoid(i3))/i4))}}(TensorConstant{(1, 1) of -1.0}, targets, TensorConstant{(1, 1) of 1.0}, Elemwise{Add}[(0, 0)].0, Elemwise{mul,no_inplace}.0, Elemwise{sub,no_inplace}.0)
Toposort index: 17
Inputs types: [TensorType(float64, (True, True)), TensorType(float64, matrix), TensorType(float64, (True, True)), TensorType(float64, matrix), TensorType(float64, (True, True)), TensorType(float64, matrix)]
Inputs shapes: [(1, 1), (30821, 1), (1, 1), (30821, 256), (1, 1), (30821, 1)]
Inputs strides: [(8, 8), (8, 8), (8, 8), (2048, 8), (8, 8), (8, 8)]
Inputs values: [array([[-1.]]), 'not shown', array([[ 1.]]), 'not shown', array([[ 30821.]]), 'not shown']
Outputs clients: [[Dot22Scalar(InplaceDimShuffle{1,0}.0, Elemwise{Composite{(((i0 * i1 * (i2 - scalar_sigmoid(i3)))/i4) - ((i0 * i5 * scalar_sigmoid(i3))/i4))}}.0, TensorConstant{0.01}), Sum{axis=[0], acc_dtype=float64}(Elemwise{Composite{(((i0 * i1 * (i2 - scalar_sigmoid(i3)))/i4) - ((i0 * i5 * scalar_sigmoid(i3))/i4))}}.0)]]
作为提示我可以说,输入的尺寸是(30821,1,48,1)和目标(30821,1 )。 我已经阅读了几个关于如何解决这个错误与重塑,但它不适用于我的情况。 也定义target_var = T.matrix()而不是T.ivector()没有帮助。 为隐藏层设置适当的尺寸是可行的,但这个神经网络的功能应该独立于这个数字。谢谢你的帮助。