2017-09-21 49 views
0

,我有以下数据集:dplyr ::计数()多列

dat = structure(list(C86_1981 = c("Outer London", "Buckinghamshire", 
NA, "Ross and Cromarty", "Cornwall and Isles of Scilly", NA, 
"Kirkcaldy", "Devon", "Kent", "Renfrew"), C96_1981 = c("Outer London", 
"Buckinghamshire", NA, "Ross and Cromarty", "Not known/missing", 
NA, "Kirkcaldy", NA, NA, NA), C00_1981 = c("Outer London", "Inner London", 
"Lancashire", "Ross and Cromarty", NA, "Humberside", "Kirkcaldy", 
NA, NA, NA), C04_1981 = c("Kent", NA, NA, "Ross and Cromarty", 
NA, "Humberside", "Not known/missing", NA, NA, "Renfrew"), C08_1981 = c("Kent", 
"Oxfordshire", NA, "Ross and Cromarty", "Cornwall and Isles of Scilly", 
"Humberside", "Dunfermline", NA, NA, "Renfrew"), C12_1981 = c("Kent", 
NA, NA, "Ross and Cromarty", "Cornwall and Isles of Scilly", 
"Humberside", "Dunfermline", NA, NA, "Renfrew")), row.names = c(NA, 
-10L), class = c("tbl_df", "tbl", "data.frame"), .Names = c("C86_1981", 
"C96_1981", "C00_1981", "C04_1981", "C08_1981", "C12_1981")) 

我想dplyr::count()每一列。预期成果:

# A tibble: 10 x 3 
         C86_1981 dat86_n dat96_n ... 
          <chr> <int> <int> 
1    Buckinghamshire  1  1 
2 Cornwall and Isles of Scilly  1  NA 
3      Devon  1  NA 
4       Kent  1  NA 
5     Kirkcaldy  1  1 
6     Outer London  1  1 
7      Renfrew  1  NA 
8   Ross and Cromarty  1  1 
9       <NA>  2  5 
10   Not known/missing  NA  1 

目前我在做这个手工然后dplyr::full_join()荷兰国际集团的结果:

library("tidyverse") 

dat86_n = dat %>% 
    count(C86_1981) %>% 
    rename(dat86_n = n) 
dat96_n = dat %>% 
    count(C96_1981) %>% 
    rename(dat96_n = n) 
# ... 

dat_counts = dat86_n %>% 
    full_join(dat96_n, by = c("C86_1981" = "C96_1981")) 
    # ... 

其中一期工程,但并不完全一样强劲,如果我的任何数据更改后。我曾希望以编程方式做到这一点。

我试过一个循环:

lapply(dat, count) 
# Error in UseMethod("groups") : 
# no applicable method for 'groups' applied to an object of class "character" 

purrr::map()给出了同样的错误)。我认为,这个错误是因为count()需要一个tbl并作为独立参数变量,所以我尝试太:

lapply(dat, function(x) { 
    count(dat, x) 
}) 
# Error in grouped_df_impl(data, unname(vars), drop) : 
# Column `x` is unknown 

再次,purrr::map()给出了同样的错误。我也试过的summarise_all()变种:

dat %>% 
    summarise_all(count) 
    # Error in summarise_impl(.data, dots) : 
    # Evaluation error: no applicable method for 'groups' applied to an object of class "character". 

我觉得我失去了一些东西明显,解决方案应该是简单的。 dplyr解决方案特别受欢迎,因为这是我最常用的解决方案。

回答

2

也使用tidyr包,下面的代码将这样的伎俩:

dat %>% tidyr::gather(name, city) %>% dplyr::group_by(name, city) %>% dplyr::count() %>% dplyr::ungroup %>% tidyr::spread(name, n) 

结果:

# A tibble: 15 x 7 
          city C00_1981 C04_1981 C08_1981 C12_1981 C86_1981 C96_1981 
*      <chr> <int> <int> <int> <int> <int> <int> 
1    Buckinghamshire  NA  NA  NA  NA  1  1 
2 Cornwall and Isles of Scilly  NA  NA  1  1  1  NA 
3      Devon  NA  NA  NA  NA  1  NA 
4     Dunfermline  NA  NA  1  1  NA  NA 
5     Humberside  1  1  1  1  NA  NA 
6     Inner London  1  NA  NA  NA  NA  NA 
7       Kent  NA  1  1  1  1  NA 
8     Kirkcaldy  1  NA  NA  NA  1  1 
9     Lancashire  1  NA  NA  NA  NA  NA 
10   Not known/missing  NA  1  NA  NA  NA  1 
11     Outer London  1  NA  NA  NA  1  1 
12     Oxfordshire  NA  NA  1  NA  NA  NA 
13      Renfrew  NA  1  1  1  1  NA 
14   Ross and Cromarty  1  1  1  1  1  1 
15       <NA>  4  5  3  4  2  5 
+0

是的!谢谢!一个调整:你可以不用'ungroup()'作为'count()'来为你做,但是否则完美。 – Phil

2

@友leee刚刚击败我给它;)

使用整体;

library(tidyverse) 

df <- 
    dat %>% 
    gather (year, county) %>% 
    group_by(year, county) %>% 
    summarise(no = n()) %>% 
    spread (year, no) 

# A tibble: 15 x 7 
         county C00_1981 C04_1981 C08_1981 C12_1981 C86_1981 C96_1981 
*      <chr> <int> <int> <int> <int> <int> <int> 
1    Buckinghamshire  NA  NA  NA  NA  1  1 
2 Cornwall and Isles of Scilly  NA  NA  1  1  1  NA 
3      Devon  NA  NA  NA  NA  1  NA 
4     Dunfermline  NA  NA  1  1  NA  NA 
5     Humberside  1  1  1  1  NA  NA 
6     Inner London  1  NA  NA  NA  NA  NA 
7       Kent  NA  1  1  1  1  NA 
8     Kirkcaldy  1  NA  NA  NA  1  1 
9     Lancashire  1  NA  NA  NA  NA  NA 
10   Not known/missing  NA  1  NA  NA  NA  1 
11     Outer London  1  NA  NA  NA  1  1 
12     Oxfordshire  NA  NA  1  NA  NA  NA 
13      Renfrew  NA  1  1  1  1  NA 
14   Ross and Cromarty  1  1  1  1  1  1 
15       <NA>  4  5  3  4  2  5 
+1

仍值得赞赏。谢谢:) – Phil

+0

谢谢@Phil,总是需要点来提高声誉! ;) – sorearm