2013-11-23 121 views
3

我正在尝试构建用于预测的递归神经网络。我正在做PyBrain用于预测的递归神经网络不学习

我已经创建了两个简单的脚本来测试思想和技术,然后再将它们实现为更复杂的东西。

我试着按照经过验证的代码尽可能多地工作,即: 对stackoverflowgithub

在第一个例子我想给过去的值的时间表预测罪值:

#!/usr/bin/env python 
# -*- coding: utf-8 -*- 

"""An example of a simple RNN.""" 

import time 
import math 
import matplotlib.pyplot as plt 

from normalizator import Normalizator 

from pybrain.tools.shortcuts import buildNetwork 
from pybrain.structure.modules import LSTMLayer 
from pybrain.structure import LinearLayer, SigmoidLayer 
from pybrain.supervised.trainers import BackpropTrainer 
from pybrain.supervised import RPropMinusTrainer 
from pybrain.datasets import SupervisedDataSet 
from pybrain.datasets import SequentialDataSet 
import pybrain.datasets.sequential 


class Network(object): 
    """Sieć neuronowa.""" 

    def __init__(self, inputs, hidden, outputs): 
     """Just a constructor.""" 
     self.inputs = inputs 
     self.outputs = outputs 
     self.hidden = hidden 
     self.network = self.build_network(inputs, hidden, outputs) 
     self.norm = Normalizator() 

    def build_network(self, inputs, hidden, outputs): 
     """Builds the network.""" 
     network = buildNetwork(inputs, hidden, outputs, 
           hiddenclass=LSTMLayer, 
           #hiddenclass=SigmoidLayer, 
           outclass=SigmoidLayer, 
           bias = True, 
           outputbias=False, recurrent=True) 
     network.sortModules() 
     print "Constructed network:" 
     print network 
     return network 

    def train(self, learning_set, max_terations=100): 
     """Trains the network.""" 
     print "\nThe network is learning..." 
     time_s = time.time() 
     self.network.randomize() 
     #trainer = RPropMinusTrainer(self.network, dataset=learning_set, 
     #       verbose=True) 
     learning_rate = 0.05 
     trainer = BackpropTrainer(self.network, learning_set, verbose=True, 
            momentum=0.8, learningrate=learning_rate) 
     errors = trainer.trainUntilConvergence(maxEpochs=max_terations) 
     #print "Last error in learning:", errors[-1] 
     time_d = time.time() - time_s 
     print "Learning took %d seconds." % time_d 
     return errors, learning_rate 

    def test(self, data): 
     """Tests the network.""" 
     print ("X\tCorrect\tOutput\t\tOutDenorm\tError") 
     mse = 0.0 
     outputs = [] 
     #self.network.reset() 
     for item in data: 
      x_val = self.norm.denormalize("x", item[0]) 
      sin_val = self.norm.denormalize("sin", item[1]) 
      #get the output from the network 
      output = self.network.activate(item[0])[0] 
      out_denorm = self.norm.denormalize("sin", output) 
      outputs.append(out_denorm) 
      #compute the error 
      error = sin_val - out_denorm 
      mse += error**2 
      print "%f\t%f\t%f\t%f\t%f" % \ 
       (round(x_val, 2), sin_val, output, out_denorm, error) 
     mse = mse/float(len(data)) 
     print "MSE:", mse 
     return outputs, mse 

    def show_plot(self, correct, outputs, learn_x, test_x, 
        learning_targets, mse): 
     """Plots some useful stuff :)""" 
     #print "learn_x:", learn_x 
     #print "test_x:", test_x 
     #print "output:", outputs 
     #print "correct:", correct 
     fig = plt.figure() 
     ax = fig.add_subplot(111) 
     ax.plot(test_x, outputs, label="Prediction", color="red") 
     ax.plot(test_x, correct, ":", label="Original data") 
     ax.legend(loc='upper left') 
     plt.xlabel('X') 
     plt.ylabel('Sinus') 
     plt.title('Sinus... (mse=%f)' % mse) 
     #plot a portion of the learning data 
     learning_plt = fig.add_subplot(111) 
     learn_index = int(0.9 * len(learning_targets)) 
     learning_plt.plot(learn_x[learn_index:], learning_targets[learn_index:], 
          label="Learning values", color="blue") 
     learning_plt.legend(loc='upper left') 
     plt.show() 

    def prepare_data(self): 
     """Prepares the data.""" 
     learn_inputs = [round(x, 2) for x in [y * 0.05 for y in range(0, 4001)]] 
     learn_targets = [math.sin(z) for z in learn_inputs] 

     test_inputs = [round(x, 2) for x in [y * 0.05 for y in range(4001, 4101)]] 
     test_targets = [math.sin(z) for z in test_inputs] 

     self.norm.add_feature("x", learn_inputs + test_inputs) 
     self.norm.add_feature("sin", learn_targets + test_targets) 

     #learning_set = pybrain.datasets.sequential.SupervisedDataSet(1, 1) 
     learning_set = SequentialDataSet(1, 1) 
     targ_close_to_zero = 0 
     for inp, targ in zip(learn_inputs, learn_targets): 
      if abs(targ) < 0.01: 
       targ_close_to_zero += 1 
      #if inp % 1 == 0.0: 
      if targ_close_to_zero == 2: 
       print "New sequence at", (inp, targ) 
       targ_close_to_zero = 0 
       learning_set.newSequence() 
      learning_set.appendLinked(self.norm.normalize("x", inp), 
             self.norm.normalize("sin", targ)) 

     testing_set = [] 
     for inp, targ in zip(test_inputs, test_targets): 
      testing_set.append([self.norm.normalize("x", inp), 
           self.norm.normalize("sin", targ), inp, targ]) 
     return learning_set, testing_set, learn_inputs, test_inputs, learn_targets 

if __name__ == '__main__': 
    nnetwork = Network(1, 20, 1) 
    learning_set, testing_set, learning_inputs, testing_inputs, learn_targets = \ 
     nnetwork.prepare_data() 
    errors, rate = nnetwork.train(learning_set, 125) 
    outputs, mse = nnetwork.test(testing_set) 
    correct = [element[3] for element in testing_set] 
    nnetwork.show_plot(correct, outputs, 
         learning_inputs, testing_inputs, learn_targets, mse) 

结果是悲惨的,至少可以这样说。

​​

这是疯了。

第二个是相似的基础上,sun spots数据:

#!/usr/bin/env python 
# -*- coding: utf-8 -*- 

"""An example of a simple RNN.""" 

import argparse 
import sys 
import operator 
import time 

from pybrain.tools.shortcuts import buildNetwork 
from pybrain.structure import FullConnection 
from pybrain.structure.modules import LSTMLayer 
from pybrain.structure import LinearLayer, SigmoidLayer 
from pybrain.supervised.trainers import BackpropTrainer 
from pybrain.supervised import RPropMinusTrainer 
from pybrain.datasets import SupervisedDataSet 
import pybrain.datasets.sequential 

import matplotlib.pyplot as plt 
from matplotlib.ticker import FormatStrFormatter 

from normalizator import Normalizator 


class Network(object): 
    """Neural network.""" 

    def __init__(self, inputs, hidden, outputs): 
     """Constructor.""" 
     self.inputs = inputs 
     self.outputs = outputs 
     self.hidden = hidden 
     self.network = self.build_network(inputs, hidden, outputs) 
     self.norm = Normalizator() 

    def build_network(self, inputs, hidden, outputs): 
     """Builds the network.""" 
     network = buildNetwork(inputs, hidden, outputs, bias=True, 
           hiddenclass=LSTMLayer, 
           #hiddenclass=SigmoidLayer, 
           outclass=SigmoidLayer, 
           outputbias=False, fast=False, recurrent=True) 
     #network.addRecurrentConnection(
     # FullConnection(network['hidden0'], network['hidden0'], name='c3')) 
     network.sortModules() 
     network.randomize() 
     print "Constructed network:" 
     print network 
     return network 

    def train(self, learning_set, max_terations=100): 
     """Trains the network.""" 
     print "\nThe network is learning..." 
     time_s = time.time() 
     trainer = RPropMinusTrainer(self.network, dataset=learning_set, 
            verbose=True) 
     learning_rate = 0.001 
     #trainer = BackpropTrainer(self.network, learning_set, verbose=True, 
     #   batchlearning=True, momentum=0.8, learningrate=learning_rate) 
     errors = trainer.trainUntilConvergence(maxEpochs=max_terations) 
     #print "Last error in learning:", errors[-1] 
     time_d = time.time() - time_s 
     print "Learning took %d seconds." % time_d 
     return errors, learning_rate 

    def test(self, data): 
     """Tests the network.""" 
     print ("Year\tMonth\tCount\tCount_norm\t" + 
       "Output\t\tOutDenorm\tError") 
     # do the testing 
     mse = 0.0 
     outputs = [] 
     #print "Test data:", data 
     for item in data: 
      #month = self.norm.denormalize("month", item[1]) 
      #year = self.norm.denormalize("year", item[2]) 
      year, month = self.norm.denormalize("ym", item[5]) 
      count = self.norm.denormalize("count", item[3]) 
      #get the output from the network 
      output = self.network.activate((item[1], item[2])) 
      out_denorm = self.norm.denormalize("count", output[0]) 
      outputs.append(out_denorm) 
      #compute the error 
      error = count - out_denorm 
      mse += error**2 
      print "%d\t%d\t%s\t%f\t%f\t%f\t%f" % \ 
       (year, month, count, item[3], 
       output[0], out_denorm, error) 
     mse /= len(data) 
     print "MSE:", mse 
     #corrects = [self.norm.denormalize("count", item[3]) for item in data] 
     #print "corrects:", len(corrects) 
     return outputs, mse 

    def show_plot(self, correct, outputs, learn_x, test_x, 
        learning_targets, mse): 
     """Rysuje wykres :)""" 
     #print "x_axis:", x_axis 
     #print "output:", output 
     #print "correct:", correct 
     fig = plt.figure() 
     ax = fig.add_subplot(111) 
     ax.plot(test_x, outputs, label="Prediction", color="red") 
     ax.plot(test_x, correct, ":", label="Correct") 
     #            int(201000.0/100) 
     ax.xaxis.set_major_formatter(FormatStrFormatter('%s')) 
     ax.legend(loc='upper left') 
     learn_index = int(0.8 * len(learn_x)) 
     learn_part_x = learn_x[learn_index:] 
     learn_part_vals = learning_targets[learn_index:] 
     learning_plt = fig.add_subplot(111) 
     learning_plt.plot(learn_part_x, learn_part_vals, 
          label="Learning values", color="blue") 
     learning_plt.legend(loc='upper left') 
     plt.xlabel('Year-Month') 
     plt.ylabel('Values') 
     plt.title('... (mse=%f)' % mse) 
     plt.show() 

    def read_data(self, learnfile, testfile): 
     """Wczytuje dane uczące oraz testowe.""" 
     #read learning data 
     data_learn_tmp = [] 
     for line in learnfile: 
      if line[1] == "#": 
       continue 
      row = line.split() 
      year = float(row[0][0:4]) 
      month = float(row[0][4:6]) 
      yearmonth = int(row[0]) 
      count = float(row[2]) 
      data_learn_tmp.append([month, year, count, yearmonth]) 
     data_learn_tmp = sorted(data_learn_tmp, key=operator.itemgetter(1, 0)) 
     # read test data 
     data_test_tmp = [] 
     for line in testfile: 
      if line[0] == "#": 
       continue 
      row = line.split() 
      year = float(row[0][0:4]) 
      month = float(row[0][4:6]) 
      count = float(row[2]) 
      year_month = int(row[0]) 
      data_test_tmp.append([month, year, count, year_month]) 
     data_test_tmp = sorted(data_test_tmp, key=operator.itemgetter(1, 0)) 
     # prepare data for normalization 
     months = [item[0] for item in data_learn_tmp + data_test_tmp] 
     years = [item[1] for item in data_learn_tmp + data_test_tmp] 
     counts = [item[2] for item in data_learn_tmp + data_test_tmp] 
     self.norm.add_feature("month", months) 
     self.norm.add_feature("year", years) 
     ym = [(years[index], months[index]) for index in xrange(0, len(years))] 
     self.norm.add_feature("ym", ym, ranked=True) 
     self.norm.add_feature("count", counts) 
     #build learning data set 
     learning_set = pybrain.datasets.sequential.SequentialDataSet(2, 1) 
     #learning_set = pybrain.datasets.sequential.SupervisedDataSet(2, 1) 
     # add items to the learning dataset proper 
     last_year = -1 
     for item in data_learn_tmp: 
      if last_year != item[1]: 
       learning_set.newSequence() 
       last_year = item[1] 
      year_month = self.norm.normalize("ym", (item[1], item[0])) 
      count = self.norm.normalize("count", item[2]) 
      learning_set.appendLinked((year_month), (count)) 
     #build testing data set proper 
     words = ["N/A"] * len(data_test_tmp) 
     testing_set = [] 
     for index in range(len(data_test_tmp)): 
      month = self.norm.normalize("month", data_test_tmp[index][0]) 
      year = self.norm.normalize("year", data_test_tmp[index][3]) 
      year_month = self.norm.normalize("ym", 
         (data_test_tmp[index][4], data_test_tmp[index][0])) 
      count = self.norm.normalize("count", data_test_tmp[index][5]) 
      testing_set.append((words[index], month, year, 
           count, data_test_tmp[index][6], year_month)) 
     #learning_set, testing_set, learn_inputs, test_inputs, learn_targets 
     learn_x = [element[3] for element in data_learn_tmp] 
     test_x = [element[3] for element in data_test_tmp] 
     learn_targets = [element[2] for element in data_learn_tmp] 
     test_targets = [element[2] for element in data_test_tmp] 
     return (learning_set, testing_set, learn_x, test_x, 
       learn_targets, test_targets) 


def get_args(): 
    """Buduje parser cli.""" 
    parser = argparse.ArgumentParser(
     description='Trains a simple recurrent neural network.') 

    parser.add_argument('--inputs', type=int, default=2, 
         help='Number of input neurons.') 
    parser.add_argument('--hidden', type=int, default=5, 
         help='Number of hidden neurons.') 
    parser.add_argument('--outputs', type=int, default=1, 
         help='Number of output neurons.') 

    parser.add_argument('--iterations', type=int, default=100, 
       help='Maximum number of iteration epoch in training phase.') 

    parser.add_argument('trainfile', nargs='?', type=argparse.FileType('r'), 
         default=sys.stdin, help="File with learning dataset.") 
    parser.add_argument('testfile', nargs='?', type=argparse.FileType('r'), 
         default=sys.stdin, help="File with testing dataset.") 

    parser.add_argument('--version', action='version', version='%(prog)s 1.0') 

    return parser.parse_args() 

if __name__ == '__main__': 
    args = get_args() 
    nnetwork = Network(args.inputs, args.hidden, args.outputs) 
    learning_set, testing_set, learn_x, test_x, learn_targets, test_targets = \ 
     nnetwork.read_data(args.trainfile, args.testfile) 
    errors, rate = nnetwork.train(learning_set, args.iterations) 
    outputs, mse = nnetwork.test(testing_set) 
    nnetwork.show_plot(test_targets, outputs, 
         learn_x, test_x, learn_targets, mse) 

在这里也,我看到的只是混乱,我不能告诉你的情节,因为我没有足够的声望点数。但基本上,预测函数是一个周期性的齿形曲线,与输入或过去的数据没有多大关联。

Year Month Count Count_norm Output  OutDenorm Error 
2009 9  4.3  0.016942 0.216687 54.995108 -50.695108 
2009 10  4.8  0.018913 0.218810 55.534015 -50.734015 
2009 11  4.1  0.016154 0.221876 56.312243 -52.212243 
2009 12  10.8 0.042553 0.224774 57.047758 -46.247758 
2010 1  13.2 0.052009 0.184361 46.790833 -33.590833 
2010 2  18.8 0.074074 0.181018 45.942258 -27.142258 
2010 3  15.4 0.060678 0.183226 46.502806 -31.102806 

我试图用两种不同的学习算法,隐单元的多种组合,学习率,类型元素加入到学习数据集,但无济于事。

我现在完全失去了。

+1

我会好心建议重塑你的问题到更具体的东西。没有人可以向你保证,一个神经网络实际上适合于任何问题。你究竟在问什么? –

+1

@PantelisNatsiavas,感谢对我的问题的兴趣。当然,没有人能保证神经网络能够解决任何问题。然而,有些论文描述了用于时间序列预测/回归的递归神经网络的使用。因此,sin函数的简单回归对于RNN来说应该不是那么大的问题。我要求提供任何提示/想法,可能会导致我解决NN不融合/学习问题。 – Bartosz

+0

这是列表中的示例内还是外部示例错误? – BartoszKP

回答

5

如果您在输出层使用逻辑激活功能,则输出将被限制在范围(0,1)。但是你的sin函数提供的输出范围为(-1,1)。我认为这就是为什么你的罪恶学习很难收敛到一个小错误。你甚至无法在你的训练数据中正确预测罪恶功能,你能吗?也许您可能需要在训练和测试之前调整您的输入/输出集。

+0

感谢您的意见。事实上,网络输出和窦功能的尺度有些不同。这就是为什么我将放入网络的所有内容标准化到[0,1]范围,然后将网络输出的规范化为原始值。 我发现正常的BackPropagation算法比RPropMinusTrainer更糟糕。 现在我研究我的“真实”数据,即我真正想要预测的数据,而不仅仅是像太阳黑子和鼻窦一样。我得到了更好的结果。这可能是数据的性质+一些代码清理。但是,我对这个问题做了什么?...... – Bartosz