大和优雅的答案。我对你的问题的第二部分感兴趣。具体说明你将如何使用第一部分来创建你提到的新的9列。我不知道我是否缺少某些东西,但不是每次检查第一个元素是否与1,2,3等相匹配,都可以简单地捕获第一个元素。事情是这样的:
library(dplyr)
library(tidyr)
set.seed(123)
A <- 1:100
type.a <- rnorm(100, mean=5000, sd=1433)
type.b <- rnorm(100, mean=5000, sd=1425)
type.c <- rnorm(100, mean=5000, sd=1125)
type.d <- rnorm(100, mean=5000, sd=1233)
df1 <- data.frame(A, type.a, type.b, type.c, type.d)
df1 %>%
group_by(A) %>%
mutate_each(funs(substr(.,1,1))) %>% # keep first digit
ungroup %>%
gather(variable, type, -A) %>% # create combinations of rows and digits
select(-variable) %>%
mutate(type = paste0("type_",type),
value = 1) %>%
group_by(A,type) %>%
summarise(value = sum(value)) %>% # count how many times the row belongs to each type
ungroup %>%
spread(type, value, fill=0) %>% # create the new columns
inner_join(df1, by="A") %>% # join back initial info
select(A, starts_with("type."), starts_with("type_")) # order columns
# A type.a type.b type.c type.d type_1 type_2 type_3 type_4 type_5 type_6 type_7 type_8 type_9
# 1 1 4196.838 3987.671 7473.662 4118.106 0 0 1 2 0 0 1 0 0
# 2 2 4670.156 5366.059 6476.465 4071.935 0 0 0 2 1 1 0 0 0
# 3 3 7233.629 4648.464 4701.712 3842.782 0 0 1 2 0 0 1 0 0
# 4 4 5101.039 4504.752 5611.093 3702.251 0 0 1 1 2 0 0 0 0
# 5 5 5185.269 3643.944 4533.868 4460.982 0 0 1 2 1 0 0 0 0
# 6 6 7457.688 4935.835 4464.222 5408.344 0 0 0 2 1 0 1 0 0
# 7 7 5660.493 3881.511 4112.822 2516.478 0 1 1 1 1 0 0 0 0
# 8 8 3187.167 2623.183 4331.056 5261.372 0 1 1 1 1 0 0 0 0
# 9 9 4015.740 4458.177 6857.271 6524.820 0 0 0 2 0 2 0 0 0
# 10 10 4361.366 6309.570 4939.218 7512.329 0 0 0 2 0 1 1 0 0
# .. .. ... ... ... ... ... ... ... ... ... ... ... ... ...
例如,当我们有列A和B开头:
library(dplyr)
library(tidyr)
set.seed(123)
A <- 1:100
B <- 101:200
type.a <- rnorm(100, mean=5000, sd=1433)
type.b <- rnorm(100, mean=5000, sd=1425)
type.c <- rnorm(100, mean=5000, sd=1125)
type.d <- rnorm(100, mean=5000, sd=1233)
df1 <- data.frame(A,B, type.a, type.b, type.c, type.d)
# work by grouping on A and B
df1 %>%
group_by(A,B) %>%
mutate_each(funs(substr(.,1,1))) %>%
ungroup %>%
gather(variable, type, -c(A,B)) %>%
select(-variable) %>%
mutate(type = paste0("type_",type),
value = 1) %>%
group_by(A,B,type) %>%
summarise(value = sum(value)) %>%
ungroup %>%
spread(type, value, fill=0) %>%
inner_join(df1, by=c("A","B")) %>%
select(A,B, starts_with("type."), starts_with("type_"))
# A B type.a type.b type.c type.d type_1 type_2 type_3 type_4 type_5 type_6 type_7 type_8 type_9
# 1 1 101 4196.838 3987.671 7473.662 4118.106 0 0 1 2 0 0 1 0 0
# 2 2 102 4670.156 5366.059 6476.465 4071.935 0 0 0 2 1 1 0 0 0
# 3 3 103 7233.629 4648.464 4701.712 3842.782 0 0 1 2 0 0 1 0 0
# 4 4 104 5101.039 4504.752 5611.093 3702.251 0 0 1 1 2 0 0 0 0
# 5 5 105 5185.269 3643.944 4533.868 4460.982 0 0 1 2 1 0 0 0 0
# 6 6 106 7457.688 4935.835 4464.222 5408.344 0 0 0 2 1 0 1 0 0
# 7 7 107 5660.493 3881.511 4112.822 2516.478 0 1 1 1 1 0 0 0 0
# 8 8 108 3187.167 2623.183 4331.056 5261.372 0 1 1 1 1 0 0 0 0
# 9 9 109 4015.740 4458.177 6857.271 6524.820 0 0 0 2 0 2 0 0 0
# 10 10 110 4361.366 6309.570 4939.218 7512.329 0 0 0 2 0 1 1 0 0
# .. .. ... ... ... ... ... ... ... ... ... ... ... ... ... ...
然而,在这种情况下,你应该注意到,您有一个每行的值。所以,为了定义你的行(以一种独特的方式),B并不是真的需要。因此,您可以准确地工作,像以前那样(当B是不存在)和刚刚加入B到你的结果:
df1 %>%
select(-B) %>%
group_by(A) %>%
mutate_each(funs(substr(.,1,1))) %>%
ungroup %>%
gather(variable, type, -A) %>%
select(-variable) %>%
mutate(type = paste0("type_",type),
value = 1) %>%
group_by(A,type) %>%
summarise(value = sum(value)) %>% # count how many times the row belongs to each type
ungroup %>%
spread(type, value, fill=0) %>%
inner_join(df1, by="A") %>%
mutate(B=B) %>%
select(A,B, starts_with("type."), starts_with("type_"))
如何只'$ DF1 TYPE_1 < - rowSums((DF1 <2000)(DF1> 999))而不是那个巨大且不必要的'ifelse'语句? (或'+(!! rowSums((df1 <2000)&(df1> 999)))'如果在同一行内有多个列匹配条件) –
我从来没有使用过它。我想要一个字符串,当TRUE/FALSE矢量全部等于FALSE时等于0,当TRUE/FALSE至少有一个真值时,字符串为1 TRUE – lukeg
你可以用'any',即'lapply(yourdf [-1],函数x)+(any(substr(x,1,1)== 1)))' – akrun