来自tidyverse
的解决方案。 dt3
是第一个需要的输出,而dt5
是第二个需要的输出。这里不需要使用loops
。
# Create example data frame
dt <- read.table(text = "OPENING CLOSE
2007 2008
2005 2008
2004 NA ",
header = TRUE, stringsAsFactors = FALSE)
# Load package
library(tidyverse)
dt2 <- dt %>%
mutate(ID = 1:n(), EndYear = ifelse(is.na(CLOSE), 2010, CLOSE)) %>%
# Create year range list
mutate(YearRange = map2(OPENING, EndYear, `:`)) %>%
# Unnest the list column
unnest() %>%
mutate(YearRange = paste0("Y", YearRange)) %>%
mutate(Value = 1) %>%
# Spread based on YearRange and Value
spread(YearRange, Value)
# Desired output 1
dt3 <- dt2 %>%
arrange(ID) %>%
select(-ID, -EndYear)
dt4 <- dt2 %>%
gather(YearRange, Value, Y2004:Y2010) %>%
arrange(ID) %>%
group_by(ID) %>%
# Set the lag year here, using 3 years ago as an example
mutate(Value2 = lag(Value, 2)) %>%
# Evaluate the condition bewteen one year and 3 years ago
mutate(Value3 = ifelse(Value %in% 1 & Value2 %in% 1, 1, 0)) %>%
mutate(YearRange = sub("Y", "S", YearRange)) %>%
select(ID, YearRange, Value3) %>%
# Filter for S2007 o S2009
filter(YearRange %in% paste0("S", 2007:2009)) %>%
spread(YearRange, Value3)
# Desired output 2
dt5 <- dt2 %>%
left_join(dt4, by = "ID") %>%
arrange(ID) %>%
select(-ID, -EndYear)
来源
2017-07-19 04:41:31
www
这是我伟大的答案之一,因为我看到了!谢谢。啊!我想在代码下面设置额外的过滤器(filter(YearRange%in%〜))。我想做Y2007〜Y2009的子集,以便我编写代码下面的代码,但它不起作用。如何修改代码以使用两种方式进行过滤? –