2014-01-15 23 views
0

我一直在阅读Bishop的机器学习方面的书,我试图为神经网络实现反向传播算法,但它没有找到解决方案。代码如下。我把它分解成网络代码和测试代码。问题使用反向传播与感知器

import numpy as np 
from collections import namedtuple 
import matplotlib.pyplot as plt 
import scipy.optimize as opt 

# Network code 

def tanh(x): 
    return np.tanh(x) 


def dtanh(x): 
    return 1 - np.tan(x)**2 


def identity(x): 
    return x 


def unpack_weights(w, D, M, K): 
    """ 
    len(w) = (D + 1)*M + (M + 1)*K, where 
     D = number of inputs, excluding bias 
     M = number of hidden units, excluding bias 
     K = number of output units 
    """ 
    UnpackedWeights = namedtuple("UpackedWeights", ["wHidden", "wOutput"]) 

    cutoff = M*(D + 1) 
    wHidden = w[:cutoff].reshape(M, D + 1) 
    wOutput = w[cutoff:].reshape(K, M + 1) 
    return UnpackedWeights(wHidden=wHidden, wOutput=wOutput) 


def compute_output(x, weights, fcnHidden=tanh, fcnOutput=identity): 
    NetworkResults = namedtuple("NetworkResults", ["hiddenAct", "hiddenOut", "outputAct", "outputOut"]) 

    xBias = np.vstack((1., x)) 
    hiddenAct = weights.wHidden.dot(xBias) 
    hiddenOut = np.vstack((1., fcnHidden(hiddenAct))) 

    outputAct = weights.wOutput.dot(hiddenOut) 
    outputOut = fcnOutput(outputAct) 
    return NetworkResults(hiddenAct=hiddenAct, hiddenOut=hiddenOut, outputAct=outputAct, 
          outputOut=outputOut) 


def backprop(t, x, M, fcnHidden=tanh, fcnOutput=identity, dFcnHidden=dtanh): 
    maxIter = 10000 
    learningRate = 0.2 
    N, K = t.shape 
    N, D = x.shape 

    nParams = (D + 1)*M + (M + 1)*K 
    w0 = np.random.uniform(-0.1, 0.1, nParams) 

    for _ in xrange(maxIter): 
     sse = 0. 
     for n in xrange(N): 
      weights = unpack_weights(w0, D, M, K) 

      # Compute net output 
      netResults = compute_output(x=x[n].reshape(-1, 1), weights=weights, 
             fcnHidden=fcnHidden, fcnOutput=fcnOutput) 

      # Compute derivatives of error function wrt wOutput 
      outputDelta = netResults.outputOut - t[n].reshape(K, 1) 
      outputDerivs = outputDelta.dot(netResults.hiddenOut.T) 

      # Compute derivateives of error function wrt wHidden 
      hiddenDelta = dFcnHidden(netResults.hiddenAct)*(weights.wOutput[:, 1:].T.dot(outputDelta)) 
      xBias = np.vstack((1., x[n].reshape(-1, 1))) 
      hiddenDerivs = hiddenDelta.dot(xBias.T) 

      delErr = np.hstack((np.ravel(hiddenDerivs), np.ravel(outputDerivs))) 
      w1 = w0 - learningRate*delErr 
      w0 = w1 
      sse += np.sum(outputDelta**2) 

    return w0 

# Testing code 

def generate_test_data(): 
    D, M, K, N = 1, 3, 1, 25 
    x = np.sort(np.random.uniform(-1., 1., (N, D)), axis=0) 
    t = 1.0 + x**2 
    return D, M, K, N, x, t 


def test_backprop(): 
    D, M, K, N, x, t = generate_test_data() 
    return backprop(t, x, M) 


def scipy_solution(t, x, D, M, K, N, method="BFGS"): 

    def obj_fn(w): 
     weights = unpack_weights(w, D, M, K) 
     err = 0 
     for n in xrange(N): 
      netOut = compute_output(x[n], weights=weights) 
      err += (netOut.outputOut[0, 0] - t[n])**2 
     return err 

    w0 = np.random.uniform(-1, 1, (D + 1)*M + (M + 1)*K) 
    return opt.minimize(obj_fn, w0, method=method) 

当我使用SciPy的的优化模块(即scipy_solution()函数)来查找网络权值,误差平方的总和会非常接近于零,而网络的输出看起来象我生成的数据。当我使用反向传播功能时,平方误差的总和在2.0和3.0之间,网络输出看起来几乎是线性的。而且,当我将权重的scipy解决方案作为起始值提供给backprop函数时,我的backprop函数仍然找不到正确的解决方案。

我一直坚持这一两天,所以我会很感激任何人有任何提示。谢谢。

回答

1
def dtanh(x): 
    return 1 - np.tan(x)**2 

应该

def dtanh(x): 
    return 1 - np.tanh(x)**2 
+0

哇靠。而已。谢谢! – hahdawg