0

我目前正在尝试使用BLB引导来计算模型估计,并且希望这样做是并行的。我的代码工作正常,当我不做平行。当我并行计算时,问题是从每个核心获得的结果都包含NA值。我不明白我如何获得NA值,而虹膜数据集的值根本不包含NA。 这里是我正在使用的代码:在R中的并行计算,实现引导

library(doParallel) 
library(itertools) 

num_of_cores <- detectCores() 
cl <- makePSOCKcluster(num_of_cores) 
registerDoParallel(cl) 

attach(iris) 
data <- iris 
coeftmp <- data.frame() 
system.time(
r <- foreach(dat = isplitRows(data, chunks=num_of_cores), 
      .combine = cbind) %dopar% { 

       BLBsize = round(nrow(dat)^0.6) 
       for (i in 1:400){ 
         set.seed(i) 

         # sampling B(n) data points from the original data set without replacement 
         sample_BOFN <- dat[sample(nrow(dat), size = BLBsize, replace = FALSE), ] 

          # sampling from the subsample with replacment 
         sample_bootstrap <- sample_BOFN[sample(nrow(sample_BOFN), size = nrow(sample_BOFN), replace = TRUE), ] 

         bootstrapModel <- glm(sample_bootstrap$Petal.Width ~ Petal.Length + Sepal.Length + Sepal.Width, data = sample_bootstrap) 
         coeftmp <- rbind(coeftmp, bootstrapModel$coefficients) 

       } 
       #calculating the estimators of the model with mean 
        colMeans(coeftmp) 

     }) 
+0

因为我不知道你有多少个内核有,我不知道,如果这个问题能解决:

您可以通过调查看到这个你的问题。但它可能会:http://stackoverflow.com/questions/33221779/ –

+0

另外,如果你是自举的话,我为什么不抽样替换'sample_BOFN'就不清楚。但是它也不会显示你使用'sample_BOFN',所以你可能希望从(示例)代码中删除它。 –

+0

我正在尝试实现BLB bootstrap,它需要从子样本不带替换样本中抽样。所以这就是为什么。 – navri

回答

0

我认为你将不得不通过调试器上这几个迭代来解决这个问题。但是,你得到NA sfrom此行

bootstrapModel <- glm(sample_bootstrap$Petal.Width ~ Petal.Length + Sepal.Length + Sepal.Width, data = sample_bootstrap) 

我猜测您从sample_bootstrap S的一赠一奇,因为奇会给你一个NA系数。但是可能还有其他的东西导致了这个错误,尽管它肯定来自这行代码....你需要通过调试器来隔离它。

...即,这不是一个完整的答案。但是,这应该让你解决你自己的问题:

r2 <- foreach(dat = isplitRows(data, chunks=1)) %dopar% { 

    BLBsize = round(nrow(dat)^0.6) 
    for (i in 1:400){ 
     set.seed(i) 

     # sampling B(n) data points from the original data set without replacement 
     sample_BOFN <- dat[sample(nrow(dat), size = BLBsize, replace = FALSE), ] 

     # sampling from the subsample with replacment 
     sample_bootstrap <- sample_BOFN[sample(nrow(sample_BOFN), size = nrow(sample_BOFN), replace = TRUE), ] 

     bootstrapModel <- glm(sample_bootstrap$Petal.Width ~ Petal.Length + Sepal.Length + Sepal.Width, data = sample_bootstrap) 
     coeftmp <- rbind(coeftmp, bootstrapModel$coefficients) 

    } 
    #calculating the estimators of the model with mean 
    # return a list, not just the colMeans -- for debugging purposes 
    return(list(coeftmp= coeftmp, result= colMeans(coeftmp))) 

    } 

    sum(is.na(r2[[1]][[1]])) # no missing coefficients with 1 core 

r <- foreach(dat = isplitRows(data, chunks=num_of_cores)) %dopar% { 

    BLBsize = round(nrow(dat)^0.6) 
    for (i in 1:400){ 
     set.seed(i) 

     # sampling B(n) data points from the original data set without replacement 
     sample_BOFN <- dat[sample(nrow(dat), size = BLBsize, replace = FALSE), ] 

     # sampling from the subsample with replacment 
     sample_bootstrap <- sample_BOFN[sample(nrow(sample_BOFN), size = nrow(sample_BOFN), replace = TRUE), ] 

     bootstrapModel <- glm(sample_bootstrap$Petal.Width ~ Petal.Length + Sepal.Length + Sepal.Width, data = sample_bootstrap) 
     coeftmp <- rbind(coeftmp, bootstrapModel$coefficients) 

    } 
    #calculating the estimators of the model with mean 
    # return a list, not just the colMeans -- for debugging purposes 
    return(list(coeftmp= coeftmp, result= colMeans(coeftmp))) 

    } 

# lots of missing values in your coeftmp results. 
lapply(r, function(l) {sum(is.na(l[[1]]))})