2014-02-17 33 views
2

我正在使用neuralnet程序包R预测手写数字。正在使用MNIST database进行该算法的训练和测试。下面是R代码我使用:在R中使用'neuralnet'时出现意外输出

# Importing the data into R 
path <- "path_to_data_folder/MNIST_database_of_handwritten_digits/" # Data can be downloaded from: http://yann.lecun.com/exdb/mnist/ 
to.read = file(paste0(path, "train-images-idx3-ubyte"), "rb") 
to.read_Label = file(paste0(path, "train-labels-idx1-ubyte"), "rb") 
magicNumber <- readBin(to.read, integer(), n=1, endian="big") 
magicNumber_Label <- readBin(to.read_Label, integer(), n=1, endian="big") 
numberOfImages <- readBin(to.read, integer(), n=1, endian="big") 
numberOfImages_Label <- readBin(to.read_Label, integer(), n=1, endian="big") 
rowPixels <- readBin(to.read, integer(), n=1, endian="big") 
columnPixels <- readBin(to.read, integer(), n=1, endian="big") 

# image(1:rowPixels, 1:columnPixels, matrix(readBin(to.read, integer(), n=(rowPixels*columnPixels), size=1, endian="big"), rowPixels, columnPixels)[,columnPixels:1], col=gray((0:255)/255)) 

trainDigits <- NULL 
trainDigits <- vector(mode="list", length=numberOfImages) 
for(i in 1:numberOfImages) 
    trainDigits[[i]] <- as.vector(matrix(readBin(to.read, integer(), n=(rowPixels*columnPixels), size=1, endian="big"), rowPixels, columnPixels)[,columnPixels:1]) 

trainDigits <- t(data.frame(trainDigits)) # Takes a minute 
trainDigits <- data.frame(trainDigits, row.names=NULL) 

# i <- 1 # Specify the image number to visualize the image 
# image(1:rowPixels, 1:columnPixels, matrix(trainDigits[i,], rowPixels, columnPixels), col=gray((0:255)/255)) 

trainDigits_Label <- NULL 
for(i in 1:numberOfImages_Label) 
    trainDigits_Label <- c(trainDigits_Label, readBin(to.read_Label, integer(), n=1, size=1, endian="big")) 

# appending the labels to the training data 
trainDigits <- cbind(trainDigits, trainDigits_Label) 

#################### Modelling #################### 

library(neuralnet) 
# Considering only 500 rows for training due to time and memory constraints 
myNnet <- neuralnet(formula = as.formula(paste0("trainDigits_Label ~ ", paste0("X",1:(ncol(trainDigits)-1), collapse="+"))), 
           data = trainDigits[1:500,], hidden = 10, algorithm='rprop+', learningrate=0.01) 

#################### Test Data #################### 

to.read_test = file(paste0(path, "t10k-images-idx3-ubyte"), "rb") 
to.read_Label_test = file(paste0(path, "t10k-labels-idx1-ubyte"), "rb") 
magicNumber <- readBin(to.read_test, integer(), n=1, endian="big") 
magicNumber_Label <- readBin(to.read_Label_test, integer(), n=1, endian="big") 
numberOfImages_test <- readBin(to.read_test, integer(), n=1, endian="big") 
numberOfImages_Label_test <- readBin(to.read_Label_test, integer(), n=1, endian="big") 
rowPixels <- readBin(to.read_test, integer(), n=1, endian="big") 
columnPixels <- readBin(to.read_test, integer(), n=1, endian="big") 

testDigits <- NULL 
testDigits <- vector(mode="list", length=numberOfImages_test) 
for(i in 1:numberOfImages_test) 
    testDigits[[i]] <- as.vector(matrix(readBin(to.read_test, integer(), n=(rowPixels*columnPixels), size=1, endian="big"), rowPixels, columnPixels)[,columnPixels:1]) 

testDigits <- t(data.frame(testDigits)) # Takes a minute 
testDigits <- data.frame(testDigits, row.names=NULL) 

testDigits_Label <- NULL 
for(i in 1:numberOfImages_Label_test) 
    testDigits_Label <- c(testDigits_Label, readBin(to.read_Label_test, integer(), n=1, size=1, endian="big")) 

#################### 'neuralnet' Predictions #################### 

predictOut <- compute(myNnet, testDigits) 
table(round(predictOut$net.result), testDigits_Label) 

#################### Random Forest #################### 
# Cross-validating NN results with Random Forest 

library(randomForest) 
myRF <- randomForest(x=trainDigits[,-ncol(trainDigits)], y=as.factor(trainDigits_Label), ntree=100) 

predRF <- predict(myRF, newdata=testDigits) 
table(predRF, testDigits_Label) # Confusion Matrix 
sum(diag(table(predRF, testDigits_Label)))/sum(table(predRF, testDigits_Label)) # % of correct predictions 

有60000幅训练图像(28倍* 28像素的图像)以及0到9的分布(几乎)间的整个数据集相等的数字。与上面仅使用500个图像的'建模'部分不同,我使用整个训练数据集来训练一个myNnet模型(28 * 28 = 784个输入和10个输出),然后预测测试数据集中10,000个图像的输出。 (由于内存限制,我在隐藏层中只使用了10个神经元)。

我用预测获得的结果很奇怪:输出是一种高斯分布,其中大部分时间是预测4, 4中的0或9减少(种类)成指数。你可以看到下面的混淆矩阵(我四舍五入的输出,因为他们不是整数):

> table(round(predictOut$net.result), testDigits_Label) 
    testDigits_Label 
     0 1 2 3 4 5 6 7 8 9 
    -2 1 1 4 1 1 3 0 4 1 2 
    -1 8 17 12 9 7 8 8 12 7 10 
    0 38 50 44 45 35 28 36 40 30 39 
    1 77 105 86 80 71 69 68 75 67 77 
    2 116 163 126 129 101 97 111 101 99 117 
    3 159 205 196 174 142 140 153 159 168 130 
    4 216 223 212 183 178 170 177 169 181 196 
    5 159 188 150 183 183 157 174 176 172 155 
    6 119 111 129 125 143 124 144 147 129 149 
    7 59 53 52 60 74 52 51 91 76 77 
    8 22 14 18 14 32 36 28 38 35 41 
    9 6 5 3 7 15 8 8 16 9 16 

我想一定有什么错我的做法,所以我使用randomForestR试图预测。但是,randomForest工作正常,给出了超过95%的准确性。这里是randomForest预测的混淆矩阵:

> table(predRF, testDigits_Label) 
     testDigits_Label 
predRF 0 1 2 3 4 5 6 7 8 9 
    0 967 0 6 1 1 7 11 2 5 5 
    1 0 1123 0 0 0 1 3 7 0 5 
    2 1 2 974 9 3 1 3 25 4 2 
    3 0 3 5 963 0 21 0 0 9 10 
    4 0 0 12 0 940 1 4 2 7 15 
    5 4 0 2 16 0 832 6 0 11 4 
    6 6 5 5 0 7 11 929 0 3 2 
    7 1 1 14 7 2 2 0 979 4 6 
    8 1 1 12 7 5 11 2 1 917 10 
    9 0 0 2 7 24 5 0 12 14 950 
  • 问题1:那么,为什么有这个数据集的这种奇怪的行为neuralnet谁能请解释一下吗? (顺便说一句,当我检查时,neuralnetiris数据集正常工作)。

    • 编辑:我想我明白当使用neuralnet为高斯样的输出分布的原因。当使用neuralnet时,每个输出类(这里有10个类)只有一个输出节点(或者它是神经元?)而不是节点。因此,在计算用于反向传播的delta delta时,该算法计算'期望输出'与'计算输出'之间的差异,对于所有实例的聚合,对于输出为4或5. Error因此,权重将在反向传播过程中进行调整,以使输出误差最小化。这可能是由neuralnet给出的高斯类型输出的原因。
  • 问题2:同时,我想知道如何纠正的neuralnet此行为,并获得与randomForest结果看齐预测。

+0

见[此示例](http://www.parallelr.com/r-dnn-并行加速度/)DNN的本地R与MNIST数据集。 – Patric

回答

10

一些初步的意见,你可以像这样多一点点有效加载数据:

# Read in data. 
trainDigits <- replicate(numberOfImages,c(matrix(readBin(to.read, integer(), n=(rowPixels*columnPixels), size=1, endian="big"),rowPixels,columnPixels)[,columnPixels:1])) 
trainDigits <- data.frame(t(trainDigits),row.names=NULL) 
trainDigits_Label<-replicate(numberOfImages,readBin(to.read_Label, integer(), n=1, size=1, endian="big")) 

你的第一个问题是,你还没有指定一个多类预测neuralnet。你在做的是预测一个实数,从0到9.这就是为什么只有一个输出,而不是10个预测。

如果您在?neuralnet看起来有一个多类预测的一个例子。您必须将每个班级放在一个单独的变量中,并将其放在formula的左侧。其他软件包,如nnet,会自动检测到factor并为您执行此操作。您可以使用classInd功能的因素分成多个变量:

# appending the labels to the training data 
output <- class.ind(trainDigits_Label) 
colnames(output)<-paste0('out.',colnames(output)) 
output.names<-colnames(output) 
input.names<-colnames(trainDigits) 
trainDigits<-cbind(output,trainDigits) 

现在你可以粘贴在一起的公式:

# Considering only 500 rows 
trainsize=500 
# neuralnet:::varify.variables (sic) does not pass "data" when calling "terms". 
# If it did, you wouldn't have to construct the formula like this. 
library(neuralnet) 
myNnet <- neuralnet(formula = paste(paste(output.names,collapse='+'),'~', 
           paste(input.names,collapse='+')), 
        data = trainDigits[1:trainsize,], 
        hidden = 10, 
        algorithm='rprop+', 
        learningrate=0.01, 
        rep=1) 

修正仍没有使神经网络表现良好。要了解神经网络做得有多糟糕,请看培训数据。它应该是非常好的,因为它已经看到了所有这些数据:

# Accuracy on training data 
res<-compute(myNnet,trainDigits[1:trainsize,input.names]) 
picks<-(0:9)[apply(res$net.result,1,which.max)] 
prop.table(table(trainDigits_Label[1:trainsize] == picks)) 
# FALSE TRUE 
# 0.376 0.624 

准确性62%在训练数据上很糟糕。如你所料,它执行在略高于随机对数据的其余部分:

# Accuracy on test data 
res<-compute(myNnet,trainDigits[(trainsize+1):60000,input.names]) 
picks<-(0:9)[apply(res$net.result,1,which.max)] 
prop.table(table(trainDigits_Label[(trainsize+1):60000] == picks)) 
# FALSE   TRUE 
# 0.8612268908 0.1387731092 
# 14% accuracy 

随机森林确实得非常好完全相同的数据。最近它变得如此受欢迎有一个很好的理由。

trainsize=500 
library(randomForest) 
myRF <- randomForest(trainDigits_Label~., 
        data=data.frame(trainDigits_Label=as.factor(trainDigits_Label), 
            trainDigits[input.names])[1:trainsize,], 
        ntree=100) 

# Train 
p <- as.numeric(as.character(predict(myRF))) 
prop.table(table(trainDigits_Label[1:trainsize]==p)) 
# Accuracy: 79%  

# Test 
p <- as.numeric(as.character(predict(myRF,trainDigits[(trainsize+1):60000,]))) 
prop.table(table(trainDigits_Label[(trainsize+1):60000]==p)) 
# Accuracy: 76% 

因此,对于第二个问题,我的反问题是:为什么你会期望神经网络和随机森林一样好?他们可能有一些模糊的结构相似性,但拟合过程是完全不同的。我想你可以对神经网络中的节点进行细化,并将它们与随机森林模型中最重要的变量进行比较。但是,在这一点上,它更像是一个统计问题,而不是一个编程问题。

+0

非常感谢有关如何使用'neuralnet'进行多类分类的见解。是的,看到RF是如此简单的算法如何在这项任务中表现如此出色,这是非常有趣的。关于你的反问题,我认为神经网络是一个非常强大的算法,它可以学习识别其他ML算法几乎看不到的任何模式。即使在简单模式的情况下,我也期望神经网络能够调整其权重,以便模仿其他可以检测这些模式的算法的行为。 – StrikeR

+2

我同意神经网络*灵活*,这可以让他们识别其他机器看不见的图案。但这并不意味着他们更强大*;灵活性使他们更难以适应,更容易陷入局部最小值,并且更容易过度训练数据(如本例所示)。 – nograpes

2

我要感谢在此讨论中的所有以前的作者,因为它是在网络上使用包装的上最具信息量的来源!这个讨论对我来说很有帮助,我研究了神经网络 R包。

问题2:它是可以使用neuralnet使用这些提示来预测数字标签与更好的精度

  • 使用更多的神经元。隐藏层中的10个神经元是不够的。至少应该使用30个神经元。
  • 在训练前规范化和集中输入。阅读Max Kuhn的“Applied Predictive Modeling”,第3章。
  • 学习率参数仅用于“backprop”算法。对于其他算法(RPROP +,下垂,单反相机,...)使用learningrate.limitlearningrate.factor参数。
  • 使用更多的训练数据。

有了30元NN会给:

[1] "NN to predict Labels." 
[1] "Confusion matrix for training set:" 
     Expected 
Predicted 0 1 2 3 4 5 6 7 8 9 
     0 96 0 0 0 0 0 0 0 0 0 
     1 1 116 0 0 0 0 0 0 0 0 
     2 0 0 99 0 0 0 0 0 0 0 
     3 0 0 0 93 0 0 0 0 0 0 
     4 0 0 0 0 104 1 0 0 0 0 
     5 0 0 0 0 1 91 0 0 0 0 
     6 0 0 0 0 0 0 94 0 0 0 
     7 0 0 0 0 0 0 0 117 0 0 
     8 0 0 0 0 0 0 0 0 87 0 
     9 0 0 0 0 0 0 0 0 0 100 
[1] "Model accuracy on training set is 99.7%" 

[1] "Confusion matrix for test set:" 
     Expected 
Predicted 0 1 2 3 4 5 6 7 8 9 
     0 337 380 257 160 87 85 67 25 45 30 
     1 134 169 97 77 60 64 70 32 41 16 
     2 121 179 112 109 59 79 69 31 55 27 
     3 119 136 138 114 99 102 96 67 66 55 
     4 87 102 91 135 106 102 104 86 87 54 
     5 84 75 95 114 114 91 142 104 82 66 
     6 48 41 80 98 106 116 144 138 104 92 
     7 22 28 55 82 103 78 100 146 104 124 
     8 16 9 42 56 80 60 65 123 93 125 
     9 12 16 65 65 168 115 101 276 297 420 
[1] "Model accuracy on test set is 17.32%" 

测试结果远远好,但混淆矩阵对角线具有什么形状意味着该模型在正确的方向努力。通过使用训练集的大小和阈值,可以改进模型的准确性。我有30%的准确性。但是这个模型是有限的,并且可以用预测标签类而不是标签的模型获得最好的结果。我能够通过neuralne packge从shuch模型中获得约80%的准确性。

随着30层的神经元和1000训练大小此NN给出:

[1] "NN to predict Label Classes." 
[1] "Confusion matrix for training set:" 
     Expected 
Predicted 0 1 2 3 4 5 6 7 8 9 
     0 95 0 0 0 0 0 0 0 0 1 
     1 0 113 0 0 0 0 0 1 0 0 
     2 0 0 98 0 0 2 0 1 0 0 
     3 1 2 0 93 1 0 0 1 0 0 
     4 0 0 0 0 104 0 0 0 0 1 
     5 1 1 0 0 0 90 0 1 0 0 
     6 0 0 0 0 0 0 93 0 0 0 
     7 0 0 0 0 0 0 0 112 0 0 
     8 0 0 0 0 0 0 0 0 86 0 
     9 0 0 1 0 0 0 1 1 1 98 
[1] "Model accuracy on training set is 98.2%" 
[1] "Confusion matrix for test set:" 
     Expected 
Predicted 0 1 2 3 4 5 6 7 8 9 
     0 791 0 32 28 11 62 12 25 20 22 
     1 1 1050 13 4 10 10 2 13 31 35 
     2 24 2 580 59 8 13 39 73 26 24 
     3 42 14 105 607 79 112 74 68 106 124 
     4 10 12 40 28 495 62 59 20 83 83 
     5 39 31 25 126 35 444 71 6 54 22 
     6 13 3 45 7 22 15 554 3 18 13 
     7 4 4 31 11 37 10 7 732 11 66 
     8 21 7 92 79 51 96 50 19 518 21 
     9 35 12 69 61 234 68 90 69 107 599 
[1] "Model accuracy on test set is 63.7%" 
#################### Importing the data into R ########## 
#path <- "path_to_data_folder/MNIST_database_of_handwritten_digits/" # Data can be downloaded from: http://yann.lecun.com/exdb/mnist/ 
path <- "../MNIST_DATA/UNZIP/" 
to.read = file(paste0(path, "train-images.idx3-ubyte"), "rb") 
to.read_Label = file(paste0(path, "train-labels.idx1-ubyte"), "rb") 
magicNumber <- readBin(to.read, integer(), n=1, endian="big") 
magicNumber_Label <- readBin(to.read_Label, integer(), n=1, endian="big") 
numberOfImages <- readBin(to.read, integer(), n=1, endian="big") 
numberOfImages_Label <- readBin(to.read_Label, integer(), n=1, endian="big") 
rowPixels <- readBin(to.read, integer(), n=1, endian="big") 
columnPixels <- readBin(to.read, integer(), n=1, endian="big") 

trainDigits <- NULL 

#Trick #1: read unsigned data 
trainDigits <- replicate(numberOfImages,c(matrix(readBin(to.read, integer(), n=(rowPixels*columnPixels), 
                 size=1, endian="big", signed=F), 
               rowPixels,columnPixels)[,columnPixels:1])) 
trainDigits <- data.frame(t(trainDigits),row.names=NULL) 
trainDigits_Label<-replicate(numberOfImages,readBin(to.read_Label, integer(), n=1, size=1, endian="big", signed=F)) 
close(to.read) 
close(to.read_Label) 

#################### Test Data #################### 

to.read_test = file(paste0(path, "t10k-images.idx3-ubyte"), "rb") 
to.read_Label_test = file(paste0(path, "t10k-labels.idx1-ubyte"), "rb") 
magicNumber <- readBin(to.read_test, integer(), n=1, endian="big") 
magicNumber_Label <- readBin(to.read_Label_test, integer(), n=1, endian="big") 
numberOfImages_test <- readBin(to.read_test, integer(), n=1, endian="big") 
numberOfImages_Label_test <- readBin(to.read_Label_test, integer(), n=1, endian="big") 
rowPixels <- readBin(to.read_test, integer(), n=1, endian="big") 
columnPixels <- readBin(to.read_test, integer(), n=1, endian="big") 

#read unsigned data 
testDigits <- replicate(numberOfImages_test,c(matrix(readBin(to.read, integer(), n=(rowPixels*columnPixels), 
                  size=1, endian="big", signed=F), 
                rowPixels,columnPixels)[,columnPixels:1])) 
testDigits <- data.frame(t(testDigits),row.names=NULL) 
testDigits_Label<-replicate(numberOfImages_test,readBin(to.read_Label_test, integer(), n=1, size=1, endian="big", signed=F)) 
close(to.read_test) 
close(to.read_Label_test) 

#################### Modelling #################### 

library(neuralnet) 

#add Label data to training data.frame 
trainData <- cbind(trainDigits_Label, trainDigits) 
names(trainData)[1] <- "Label" 

#Reduce training data for speedup 
trainSample <- 1000 #use more then 500 rows to get better model accuracy (slow!) 
trainData <- trainData[1:trainSample,] 
myThreshold <- trainSample/5000 #use smaller threshold to get better model accuracy (slow!) 

#Trick #2: normalize and center pixel data before trainig and testing 
normFactor <- max(trainData) #=255 
trainData[,-1] <- trainData[,-1]/normFactor #normalize inputs 
centerFactor <- mean(as.matrix(trainData[,-1])) #0.5 mean по столбцу? 
trainData[,-1] <- trainData[,-1]- centerFactor #center inputs 
testDigits <- testDigits/normFactor - centerFactor 

#Trick #3: use more neurons in the hidden layer to rise the model accuracy 
nHidden=30 

#train model which predicts Labels 
myFormula <- as.formula(paste0("Label ~ ", paste0("X",1:(ncol(trainDigits)), collapse="+"))) 
myNnet <- neuralnet(formula = myFormula, data = trainData, hidden = c(nHidden), 
        algorithm='rprop+', #learningrate=0.01, 
        learningrate.limit=list(min=c(1e-10), max=c(0.01)), #default values min/max = 1e-10/0.1 
        learningrate.factor=list(minus=c(0.5), plus=c(1.2)), #default values minus/plus = 0.5/1.2 
        err.fct="sse", #Using "sum square errors" function for Error 
        act.fct="tanh",#Using tangent hyperbolicus activation smoothing function 
        threshold=myThreshold, 
        lifesign="full", lifesign.step=500, 
        stepmax=3e05) 

#Trick #4: get rid of negative predictions. consider them to be equal to zero. 
#The same with too big predictions (>9) 
myNnet$net.result[[1]][myNnet$net.result[[1]]<0]<-0 
myNnet$net.result[[1]][myNnet$net.result[[1]]>9]<-9 

#################### 'neuralnet' Predictions #################### 

predictOut <- compute(myNnet, testDigits) 
predictOut$net.result[predictOut$net.result<0] <- 0 
predictOut$net.result[predictOut$net.result>9] <- 9 

#################### Result analysis #################### 

#Model accuracy on training data 
confTrain <- table(Predicted=round(myNnet$net.result[[1]]), Expected=(trainData[,"Label"])) 
print("NN to predict Labels.") 
print("Confusion matrix for training set:") 
print (confTrain) 
print(paste0("Model accuracy on training set is ", round(sum(diag(confTrain))/sum(confTrain)*100,4), "%")) 

#Model accuracy on test data 
confTest <- table(Predicted=round(predictOut$net.result), Expected=testDigits_Label) 
print("Confusion matrix for test set:") 
print (confTest) 
print(paste0("Model accuracy on test set is ", round(sum(diag(confTest))/sum(confTest)*100,4), "%")) 



######################################################################################### 
#Trick #5: Predict digit Class instead of predicting digit Label 
#Replace each Label with a vector of 10 bits "Label classes" 
library (nnet) 

# appending the Label classes to the training data 
output <- class.ind(trainData[,"Label"]) 
colnames(output)<-paste0('out.',colnames(output)) 
output.names<-colnames(output) 
input.names<-colnames(trainData[,-1]) 
trainData <-cbind(output,trainData) 

#train model which predicts Label classes 
myFormula <- as.formula(paste0(paste0(output.names,collapse='+')," ~ ", 
           paste0(input.names, collapse="+"))) 
myNnetClass <- neuralnet(formula = myFormula, data = trainData, hidden = c(nHidden), 
        algorithm='sag', #learningrate=0.01, 
        learningrate.limit=list(min=c(1e-10), max=c(0.01)), #default values min/max = 1e-10/0.1 
        learningrate.factor=list(minus=c(0.5), plus=c(1.2)), #default values minus/plus = 0.5/1.2 
        err.fct="sse", #Using "sum square errors" function for Error 
        act.fct="tanh",#Using tangent hyperbolicus activation smoothing function 
        threshold=myThreshold, 
        lifesign="full", lifesign.step=500, 
        stepmax=3e05) 


# Convert binary output to categorical output (labels) 
nnres=myNnetClass$net.result[[1]] 
myNnetClass$net.result[[1]] <- (0:9)[apply(myNnetClass$net.result[[1]],1,which.max)] 


#################### 'neuralnet' Predictions #################### 

predictOutClass <- compute(myNnetClass, testDigits) 
colnames(predictOutClass$net.result) <- paste0("Cl", 0:9) 
predictedLabel <- (0:9)[apply(predictOutClass$net.result, 1, which.max)] 

#################### Result analysis #################### 

#Model accuracy on training data 
confTrain <- table(Predicted=myNnetClass$net.result[[1]], Expected=trainData[,"Label"]) 
print("NN to predict Label Classes.") 
print("Confusion matrix for training set:") 
print (confTrain) 
print(paste0("Model accuracy on training set is ", round(sum(diag(confTrain))/sum(confTrain)*100,4), "%")) 

#Model accuracy on test data 
confTest <- table(Predicted=predictedLabel, Expected=testDigits_Label) 
print("Confusion matrix for test set:") 
print (confTest) 
print(paste0("Model accuracy on test set is ", round(sum(diag(confTest))/sum(confTest)*100,4), "%")) 
+0

非常感谢ASH的这些见解。我目前从这个问题上移开,但想尝试一下你的建议。 – StrikeR