我一直试图在Python 3.6创建香草3层RNN,但每当我训练它的训练迭代的第一个X量训练损失减小后上升和下降零星我无法确定问题。我现在不想使用Tensorflow或Keras或任何深度学习框架,因为我试图更好地了解这些NN如何工作。RNN培训损失变为Down然后不正常徒
我敢肯定我的问题在于无论是在路上,我在我前进的道具功能添加的矩阵合在一起,或者在我的backprop功能我得到错误和权重更新一部分,所以我会后他们都在下面。
很抱歉,如果我的代码是有点乱,但这里是我的功能,通过我的神经网络转发道具。
def RNN_forward(self, A):
#input layer
self.A = A
#self.AB is a-b weight matrix
self.B1 = np.dot(self.A, self.AB)
#self.B_t is self.B (layer 2) from the previous timestep
self.B_t1 = np.dot(self.B_t, self.B_RNN)
#combining matrices
self.B = self.sigmoid((self.B1) + (self.B_t1))
self.C1 = np.dot(self.B, self.BC)
#output layer
self.C = self.sigmoid(self.C1)
return self.C
我backprop和更新权重矩阵功能捣成泥成一个功能,遗憾的一切都是那么凌乱
def RNN_backprop(self, C_real):
#did this so I could transpose a 1D matrix
BT = self.B[np.newaxis]
#get error of BC weight matrix
delta3 = np.multiply(-(C_real-self.C), self.sigmoidPrime(self.C1))
BCp = np.dot(BT.T, delta3)
BCpT = BCp[np.newaxis]
#get error of AB weight matrix
delta2 = np.dot(delta3, self.BC.T)*self.sigmoidPrime(self.B1)
AT, d2 = self.A[np.newaxis], delta2[np.newaxis]
ABp = np.dot(AT.T, d2)
#get error of weight matrix linked to previous time step
delta2t = np.dot(delta3, self.BC.T)*self.sigmoidPrime(self.B_t1)
B_RNNT, d2t = self.B_t[np.newaxis], delta2t[np.newaxis]
B_RNNp = np.dot(B_RNNT.T, d2t)
#training speed
weight_multiplier = 10
#update weight matricies
self.BC = self.BC - BCpT.T*weight_multiplier
self.AB = self.AB - ABp*weight_multiplier
self.B_RNN = self.B_RNN - B_RNNp*weight_multiplier
#store layer 2 to be used in next timestep
self.B_t = self.B
我很高兴地张贴整个代码或任何人都可能需要帮助的任何其他位诊断我的问题。
预先感谢任何建议或你可能有潜在的解决方案!
编辑:这里是整个代码:它真的不好评论所以这将是难以阅读,但它的一切都在这里。仅仅因为我不确定我是否会尝试使用它们,有些行被注释掉了。
进口numpy的为NP
类NN(对象):杜绝这种行为
def __init__(self):
self.A_size = 4
self.B_size = 6
self.C_size = 1
def init_weights(self):
self.AB = np.random.randn(self.A_size, self.B_size)
self.BC = np.random.randn(self.B_size, self.C_size)
self.B_t = [0]
self.B_RNN = np.random.randn(self.B_size, self.B_size)
def sigmoid(self, X1):
return 1/(1+np.exp(-X1))
def sigmoidPrime(self,X1):
return np.exp(-X1)/((1+np.exp(-X1))**2)
def print_status(self, NN_type):
print("---------Status---------")
print("A: ", self.A)
print("AB: ", self.AB)
print("B: ", self.B)
print("BC: ", self.BC)
print("C: ", self.C)
if NN_type == "RNN":
print("Previous B: ", self.B_t)
if NN_type == "LSTM":
pass
#print("Error: ", self.cost)
print("---------Done---------")
def RNN_forward(self, A):
self.A = A
self.B1 = np.dot(self.A, self.AB)
if len(self.B_t) > 2:
self.B_t1 = np.dot(self.B_t, self.B_RNN)
self.B = self.sigmoid((self.B1) + (self.B_t1))
#self.B = self.sigmoid(np.tanh(self.B1) + np.tanh(self.B_t1))
else:
self.B = self.sigmoid(self.B1)
self.B_t = self.B
self.B_t1 = np.dot(self.B_t, self.B_RNN)
print('this should only print once')
self.C1 = np.dot(self.B, self.BC)
self.C = self.sigmoid(self.C1)
return self.C
def skip_backprop(self):
self.B_t = self.B
def get_cost(self, C_real):
#self.cost = 0.5*sum((C_real-self.C)**2)
self.cost = 0.5*((C_real-self.C)**2)
return self.cost
def RNN_backprop(self, C_real):
BT = self.B[np.newaxis]
delta3 = np.multiply(-(C_real-self.C), self.sigmoidPrime(self.C1))
BCp = np.dot(BT.T, delta3)
BCpT = BCp[np.newaxis]
delta2 = np.dot(delta3, self.BC.T)*self.sigmoidPrime(self.B1)
AT, d2 = self.A[np.newaxis], delta2[np.newaxis]
ABp = np.dot(AT.T, d2)
delta2t = np.dot(delta3, self.BC.T)*self.sigmoidPrime(self.B_t1)
B_RNNT, d2t = self.B_t[np.newaxis], delta2t[np.newaxis]
B_RNNp = np.dot(B_RNNT.T, d2t)
#Important
#weight_multiplier = 5 * (np.sum(np.absolute(ABp))+np.sum(np.absolute(BCpT))+np.sum(np.absolute(B_RNNp)))
weight_multiplier = 0.01
self.BC = self.BC - BCpT.T*weight_multiplier
self.AB = self.AB - ABp*weight_multiplier
self.B_RNN = self.B_RNN - B_RNNp*weight_multiplier
self.B_t = self.B
NN = NN()
NN.init_weights()
#important
iterations = 100000
for a in range(iterations):
total_error = 0
for i in range(50):
NN.RNN_forward(np.array(As[i]))
NN.RNN_backprop(C_reals[i])
total_error += NN.get_cost(C_reals[i])
if a%500 == 0:
print("Error: ",total_error)
NN.RNN_forward(np.array([1,1,1,0.2]))
NN.skip_backprop()
print("0.4: ", NN.RNN_forward(np.array([1,1,1,0.3])))
NN.RNN_forward(np.array([1,1,1,0.4]))
NN.skip_backprop()
print("0.2: ", NN.RNN_forward(np.array([1,1,1,0.3])))
self.B_RNN在第一个代码块中使用之前未定义。在第二个代码块中,weight_multiplier应该是你的学习率?通常学习率要小得多,比如1e2或更小 –
self.B_RNN是在创建课程实例时定义的,我一直在玩各种不同的学习率,但他们似乎都有同样的问题。我会测试一些小得多的学习率,看看它是否有帮助! –
由于这不是一个功能完整的代码,因此很难检查以了解发生了什么问题。 –