2016-08-19 18 views
0

我正在研究使用theano库进行深度学习的主要概念。我正在尝试运行本教程中出现的这段代码。这段代码运行几个小时。我应该如何保存计算后的模型以备后用?以及我应该如何加载它?深度学习:如何保存计算模型以进行预测以及如何稍后加载

import cPickle 
import gzip 
import os 
import sys 
import time 

import numpy 

import theano 
import theano.tensor as T 
from theano.tensor.signal import downsample 
from theano.tensor.nnet import conv 

from logistic_sgd import LogisticRegression, load_data 
from mlp import HiddenLayer 


class LeNetConvPoolLayer(object): 
    """Pool Layer of a convolutional network """ 

    def __init__(self, rng, input, filter_shape, image_shape, poolsize=(2, 2)): 
     """ 
     Allocate a LeNetConvPoolLayer with shared variable internal parameters. 
     :type rng: numpy.random.RandomState 
     :param rng: a random number generator used to initialize weights 
     :type input: theano.tensor.dtensor4 
     :param input: symbolic image tensor, of shape image_shape 
     :type filter_shape: tuple or list of length 4 
     :param filter_shape: (number of filters, num input feature maps, 
           filter height,filter width) 
     :type image_shape: tuple or list of length 4 
     :param image_shape: (batch size, num input feature maps, 
          image height, image width) 
     :type poolsize: tuple or list of length 2 
     :param poolsize: the downsampling (pooling) factor (#rows,#cols) 
     """ 

     assert image_shape[1] == filter_shape[1] 
     self.input = input 

     # there are "num input feature maps * filter height * filter width" 
     # inputs to each hidden unit 
     fan_in = numpy.prod(filter_shape[1:]) 
     # each unit in the lower layer receives a gradient from: 
     # "num output feature maps * filter height * filter width"/
     # pooling size 
     fan_out = (filter_shape[0] * numpy.prod(filter_shape[2:])/
        numpy.prod(poolsize)) 
     # initialize weights with random weights 
     W_bound = numpy.sqrt(6./(fan_in + fan_out)) 
     self.W = theano.shared(numpy.asarray(
      rng.uniform(low=-W_bound, high=W_bound, size=filter_shape), 
      dtype=theano.config.floatX), 
           borrow=True) 

     # the bias is a 1D tensor -- one bias per output feature map 
     b_values = numpy.zeros((filter_shape[0],), dtype=theano.config.floatX) 
     self.b = theano.shared(value=b_values, borrow=True) 

     # convolve input feature maps with filters 
     conv_out = conv.conv2d(input=input, filters=self.W, 
       filter_shape=filter_shape, image_shape=image_shape) 

     # downsample each feature map individually, using maxpooling 
     pooled_out = downsample.max_pool_2d(input=conv_out, 
              ds=poolsize, ignore_border=True) 

     # add the bias term. Since the bias is a vector (1D array), we first 
     # reshape it to a tensor of shape (1,n_filters,1,1). Each bias will 
     # thus be broadcasted across mini-batches and feature map 
     # width & height 
     self.output = T.tanh(pooled_out + self.b.dimshuffle('x', 0, 'x', 'x')) 

     # store parameters of this layer 
     self.params = [self.W, self.b] 


def evaluate_lenet5(learning_rate=0.1, n_epochs=200, 
        dataset='mnist.pkl.gz', 
        nkerns=[20, 50], batch_size=500): 
    """ Demonstrates lenet on MNIST dataset 
    :type learning_rate: float 
    :param learning_rate: learning rate used (factor for the stochastic 
          gradient) 
    :type n_epochs: int 
    :param n_epochs: maximal number of epochs to run the optimizer 
    :type dataset: string 
    :param dataset: path to the dataset used for training /testing (MNIST here) 
    :type nkerns: list of ints 
    :param nkerns: number of kernels on each layer 
    """ 

    rng = numpy.random.RandomState(23455) 

    datasets = load_data(dataset) 

    train_set_x, train_set_y = datasets[0] 
    valid_set_x, valid_set_y = datasets[1] 
    test_set_x, test_set_y = datasets[2] 

    # compute number of minibatches for training, validation and testing 
    n_train_batches = train_set_x.get_value(borrow=True).shape[0] 
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] 
    n_test_batches = test_set_x.get_value(borrow=True).shape[0] 
    n_train_batches /= batch_size 
    n_valid_batches /= batch_size 
    n_test_batches /= batch_size 

    # allocate symbolic variables for the data 
    index = T.lscalar() # index to a [mini]batch 
    x = T.matrix('x') # the data is presented as rasterized images 
    y = T.ivector('y') # the labels are presented as 1D vector of 
         # [int] labels 

    ishape = (28, 28) # this is the size of MNIST images 

    ###################### 
    # BUILD ACTUAL MODEL # 
    ###################### 
    print '... building the model' 

    # Reshape matrix of rasterized images of shape (batch_size,28*28) 
    # to a 4D tensor, compatible with our LeNetConvPoolLayer 
    layer0_input = x.reshape((batch_size, 1, 28, 28)) 

    # Construct the first convolutional pooling layer: 
    # filtering reduces the image size to (28-5+1,28-5+1)=(24,24) 
    # maxpooling reduces this further to (24/2,24/2) = (12,12) 
    # 4D output tensor is thus of shape (batch_size,nkerns[0],12,12) 
    layer0 = LeNetConvPoolLayer(rng, input=layer0_input, 
      image_shape=(batch_size, 1, 28, 28), 
      filter_shape=(nkerns[0], 1, 5, 5), poolsize=(2, 2)) 

    # Construct the second convolutional pooling layer 
    # filtering reduces the image size to (12-5+1,12-5+1)=(8,8) 
    # maxpooling reduces this further to (8/2,8/2) = (4,4) 
    # 4D output tensor is thus of shape (nkerns[0],nkerns[1],4,4) 
    layer1 = LeNetConvPoolLayer(rng, input=layer0.output, 
      image_shape=(batch_size, nkerns[0], 12, 12), 
      filter_shape=(nkerns[1], nkerns[0], 5, 5), poolsize=(2, 2)) 

    # the HiddenLayer being fully-connected, it operates on 2D matrices of 
    # shape (batch_size,num_pixels) (i.e matrix of rasterized images). 
    # This will generate a matrix of shape (20,32*4*4) = (20,512) 
    layer2_input = layer1.output.flatten(2) 

    # construct a fully-connected sigmoidal layer 
    layer2 = HiddenLayer(rng, input=layer2_input, n_in=nkerns[1] * 4 * 4, 
         n_out=500, activation=T.tanh) 

    # classify the values of the fully-connected sigmoidal layer 
    layer3 = LogisticRegression(input=layer2.output, n_in=500, n_out=10) 

    # the cost we minimize during training is the NLL of the model 
    cost = layer3.negative_log_likelihood(y) 

    # create a function to compute the mistakes that are made by the model 
    test_model = theano.function([index], layer3.errors(y), 
      givens={ 
       x: test_set_x[index * batch_size: (index + 1) * batch_size], 
       y: test_set_y[index * batch_size: (index + 1) * batch_size]}) 

    validate_model = theano.function([index], layer3.errors(y), 
      givens={ 
       x: valid_set_x[index * batch_size: (index + 1) * batch_size], 
       y: valid_set_y[index * batch_size: (index + 1) * batch_size]}) 

    # create a list of all model parameters to be fit by gradient descent 
    params = layer3.params + layer2.params + layer1.params + layer0.params 

    # create a list of gradients for all model parameters 
    grads = T.grad(cost, params) 

    # train_model is a function that updates the model parameters by 
    # SGD Since this model has many parameters, it would be tedious to 
    # manually create an update rule for each model parameter. We thus 
    # create the updates list by automatically looping over all 
    # (params[i],grads[i]) pairs. 
    updates = [] 
    for param_i, grad_i in zip(params, grads): 
     updates.append((param_i, param_i - learning_rate * grad_i)) 

    train_model = theano.function([index], cost, updates=updates, 
      givens={ 
      x: train_set_x[index * batch_size: (index + 1) * batch_size], 
      y: train_set_y[index * batch_size: (index + 1) * batch_size]}) 

    ############### 
    # TRAIN MODEL # 
    ############### 
    print '... training' 
    # early-stopping parameters 
    patience = 10000 # look as this many examples regardless 
    patience_increase = 2 # wait this much longer when a new best is 
          # found 
    improvement_threshold = 0.995 # a relative improvement of this much is 
            # considered significant 
    validation_frequency = min(n_train_batches, patience/2) 
            # go through this many 
            # minibatche before checking the network 
            # on the validation set; in this case we 
            # check every epoch 

    best_params = None 
    best_validation_loss = numpy.inf 
    best_iter = 0 
    test_score = 0. 
    start_time = time.clock() 

    epoch = 0 
    done_looping = False 

    while (epoch < n_epochs) and (not done_looping): 
     epoch = epoch + 1 
     for minibatch_index in xrange(n_train_batches): 

      iter = (epoch - 1) * n_train_batches + minibatch_index 

      if iter % 100 == 0: 
       print 'training @ iter = ', iter 
      cost_ij = train_model(minibatch_index) 

      if (iter + 1) % validation_frequency == 0: 

       # compute zero-one loss on validation set 
       validation_losses = [validate_model(i) for i 
            in xrange(n_valid_batches)] 
       this_validation_loss = numpy.mean(validation_losses) 
       print('epoch %i, minibatch %i/%i, validation error %f %%' % \ 
         (epoch, minibatch_index + 1, n_train_batches, \ 
         this_validation_loss * 100.)) 

       # if we got the best validation score until now 
       if this_validation_loss < best_validation_loss: 

        #improve patience if loss improvement is good enough 
        if this_validation_loss < best_validation_loss * \ 
         improvement_threshold: 
         patience = max(patience, iter * patience_increase) 

        # save best validation score and iteration number 
        best_validation_loss = this_validation_loss 
        best_iter = iter 

        # test it on the test set 
        test_losses = [test_model(i) for i in xrange(n_test_batches)] 
        test_score = numpy.mean(test_losses) 
        print(('  epoch %i, minibatch %i/%i, test error of best ' 
          'model %f %%') % 
          (epoch, minibatch_index + 1, n_train_batches, 
          test_score * 100.)) 

      if patience <= iter: 
       done_looping = True 
       break 

    end_time = time.clock() 
    print('Optimization complete.') 
    print('Best validation score of %f %% obtained at iteration %i,'\ 
      'with test performance %f %%' % 
      (best_validation_loss * 100., best_iter + 1, test_score * 100.)) 
    print >> sys.stderr, ('The code for file ' + 
          os.path.split(__file__)[1] + 
          ' ran for %.2fm' % ((end_time - start_time)/60.)) 

if __name__ == '__main__': 
    evaluate_lenet5() 


def experiment(state, channel): 
    evaluate_lenet5(state.learning_rate, dataset=state.dataset) 
+2

你试过酸洗过吗?而且我不确定你的代码在这种情况下会为你的问题添加什么... – Julien

回答

0

一般情况下,你想找到任何地方,一个共享变量创建(theano.shared),以及腌制的值。如果你有一个共享变量a,你可以使用a.get_value来获得变量的值,然后使用这个变量(或使用numpy.savenumpy.savez)。当你想加载网络时,只需加载这些共享变量值,并使用a.set_value再次将它们分配给共享变量。

在你的情况下,面向对象的方法是为LeNetConvPoolLayer写一个saveload方法。例如,save方法可以做

def save(self, filename): 
    np.savez(filename, W=self.W.get_value(), b=self.b.get_value()) 

然后你就可以使用这些saveload功能来保存和加载每一层如你所愿。

试图腌制整个事情是另一种选择,但某些Theano对象将不能正常工作,当他们被腌制和加载(我不太确定哪个,但它可能取决于例如,如果你的共享变量存储在CPU或GPU内部)。所以最好按照我上面介绍的方法单独使用这些值,特别是如果您想长时间存储它们或在机器之间共享这些值。

0
# in evaluate_lenet5 block, save your model after training finish 
open('layer0_model.pkl', 'wb') as f0: 
pickle.dump(layer0,f0) 
open('layer1_model.pkl', 'wb') as f1: 
pickle.dump(layer1,f1) 
open('layer2_model.pkl', 'wb') as f2: 
pickle.dump(layer0,f2) 
open('layer3_model.pkl', 'wb') as f3: 
pickle.dump(layer0,f3) 



# load the saved model 
layer0 = pickle.load(open('layer0_model.pkl')) 
layer1 = pickle.load(open('layer1_model.pkl'))  
layer2 = pickle.load(open('layer2_model.pkl'))  
layer3 = pickle.load(open('layer3_model.pkl')) 
相关问题