2015-05-26 53 views
4

我正在解决Andrej Karpathy的神经网络课程的作业2(link)。编程环境是ipython笔记本。当我尝试加载CIFAR10数据时,我反复出现内存错误。我试图谷歌任何解决方案,但没有奏效。请在这里帮助我。加载CIFAR10培训数据时的内存错误

from cs231n.data_utils import load_CIFAR10 
 

 
def get_CIFAR10_data(num_training=49000, num_validation=1000, num_test=1000): 
 
    """ 
 
    Load the CIFAR-10 dataset from disk and perform preprocessing to prepare 
 
    it for the two-layer neural net classifier. These are the same steps as 
 
    we used for the SVM, but condensed to a single function. 
 
    """ 
 
    # Load the raw CIFAR-10 data 
 
    cifar10_dir = 'cs231n/datasets/cifar-10-batches-py' 
 
    X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir) 
 
     
 
    # Subsample the data 
 
    mask = range(num_training, num_training + num_validation) 
 
    X_val = X_train[mask] 
 
    y_val = y_train[mask] 
 
    mask = range(num_training) 
 
    X_train = X_train[mask] 
 
    y_train = y_train[mask] 
 
    mask = range(num_test) 
 
    X_test = X_test[mask] 
 
    y_test = y_test[mask] 
 

 
    # Normalize the data: subtract the mean image 
 
    mean_image = np.mean(X_train, axis=0) 
 
    X_train -= mean_image 
 
    X_val -= mean_image 
 
    X_test -= mean_image 
 

 
    # Reshape data to rows 
 
    X_train = X_train.reshape(num_training, -1) 
 
    X_val = X_val.reshape(num_validation, -1) 
 
    X_test = X_test.reshape(num_test, -1) 
 

 
    return X_train, y_train, X_val, y_val, X_test, y_test 
 

 

 
# Invoke the above function to get our data. 
 
X_train, y_train, X_val, y_val, X_test, y_test = get_CIFAR10_data() 
 
print 'Train data shape: ', X_train.shape 
 
print 'Train labels shape: ', y_train.shape 
 
print 'Validation data shape: ', X_val.shape 
 
print 'Validation labels shape: ', y_val.shape 
 
print 'Test data shape: ', X_test.shape 
 
print 'Test labels shape: ', y_test.shape

import cPickle as pickle 
 
import numpy as np 
 
import os 
 

 
def load_CIFAR_batch(filename): 
 
    """ load single batch of cifar """ 
 
    with open(filename, 'rb') as f: 
 
    datadict = pickle.load(f) 
 
    X = datadict['data'] 
 
    Y = datadict['labels'] 
 
    X = X.reshape(10000, 3, 32, 32).transpose(0,2,3,1).astype("float") 
 
    Y = np.array(Y) 
 
    return X, Y 
 

 
def load_CIFAR10(ROOT): 
 
    """ load all of cifar """ 
 
    xs = [] 
 
    ys = [] 
 
    for b in range(1,6): 
 
    f = os.path.join(ROOT, 'data_batch_%d' % (b,)) 
 
    X, Y = load_CIFAR_batch(f) 
 
    xs.append(X) 
 
    ys.append(Y)  
 
    Xtr = np.concatenate(xs) 
 
    Ytr = np.concatenate(ys) 
 
    del X, Y 
 
    Xte, Yte = load_CIFAR_batch(os.path.join(ROOT, 'test_batch')) 
 
    return Xtr, Ytr, Xte, Yte

+1

有人请吗? –

+1

你解决了这个问题吗?我有类似的问题... – David

回答

0

如果有人正面临着在Windows操作系统上同样的问题,请安装64位蟒蛇分布。 x86分配的内存使用限制在2GB。