2016-05-14 154 views
1

我有这样一个数据帧:剧情值

> dput(df) 
structure(list(OBBLIGATORIO = structure(c(2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("no", 
"yes"), class = "factor"), COUNTRY = structure(c(16L, 16L, 16L, 
16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 
16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 
16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 
16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 
16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 
16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 
16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 
16L, 16L, 16L, 16L), .Label = c("Austria", "Belgium", "Bulgaria", 
"Croatia", "Cyprus", "Czech Republic", "Denmark", "Estonia", 
"Finland", "France", "Germany", "Greece", "Hungary", "Iceland", 
"Ireland", "Italy", "Latvia", "Lithuania", "Luxembourg", "Malta", 
"Norway", "Poland", "Portugal", "Romania", "Slovakia", "Slovenia", 
"Spain", "Sweden", "United Kingdom of Great Britain and Northern Ireland" 
), class = "factor"), YEAR = c(2003L, 2006L, 2007L, 2008L, 2009L, 
2010L, 1995L, 1996L, 1997L, 1998L, 1999L, 2000L, 2001L, 2002L, 
2003L, 2006L, 2007L, 2008L, 2009L, 2010L, 1995L, 1996L, 1997L, 
1998L, 1999L, 2000L, 2001L, 2002L, 2003L, 2006L, 2007L, 2008L, 
2009L, 2010L, 1995L, 1996L, 1997L, 1998L, 1999L, 2000L, 2001L, 
2002L, 2003L, 2006L, 2007L, 2008L, 2009L, 2010L, 1995L, 1996L, 
1997L, 1998L, 1999L, 2000L, 2001L, 2002L, 2003L, 2006L, 2007L, 
2008L, 2009L, 2010L, 1995L, 1996L, 1997L, 1998L, 1999L, 2000L, 
2001L, 2002L, 2003L, 2006L, 2007L, 2008L, 2009L, 2010L, 1995L, 
1996L, 1997L, 1998L, 1999L, 2000L, 2001L, 2002L, 2003L, 2006L, 
2007L, 2008L, 2009L, 2010L, 1995L, 1996L, 1997L, 1998L, 1999L, 
2000L, 2001L, 2002L), AGE = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "Total", class = "factor"), 
    `CAUSE OF DEATH` = c("Acute poliomyelitis", "Acute poliomyelitis", 
    "Acute poliomyelitis", "Acute poliomyelitis", "Acute poliomyelitis", 
    "Acute poliomyelitis", "Acute poliomyelitis", "Acute poliomyelitis", 
    "Acute poliomyelitis", "Acute poliomyelitis", "Acute poliomyelitis", 
    "Acute poliomyelitis", "Acute poliomyelitis", "Acute poliomyelitis", 
    "Diphtheria", "Diphtheria", "Diphtheria", "Diphtheria", "Diphtheria", 
    "Diphtheria", "Diphtheria", "Diphtheria", "Diphtheria", "Diphtheria", 
    "Diphtheria", "Diphtheria", "Diphtheria", "Diphtheria", "Measles", 
    "Measles", "Measles", "Measles", "Measles", "Measles", "Measles", 
    "Measles", "Measles", "Measles", "Measles", "Measles", "Measles", 
    "Measles", "Tetanus", "Tetanus", "Tetanus", "Tetanus", "Tetanus", 
    "Tetanus", "Tetanus", "Tetanus", "Tetanus", "Tetanus", "Tetanus", 
    "Tetanus", "Tetanus", "Tetanus", "Tuberculosis", "Tuberculosis", 
    "Tuberculosis", "Tuberculosis", "Tuberculosis", "Tuberculosis", 
    "Tuberculosis", "Tuberculosis", "Tuberculosis", "Tuberculosis", 
    "Tuberculosis", "Tuberculosis", "Tuberculosis", "Tuberculosis", 
    "Viral hepatitis", "Viral hepatitis", "Viral hepatitis", 
    "Viral hepatitis", "Viral hepatitis", "Viral hepatitis", 
    "Viral hepatitis", "Viral hepatitis", "Viral hepatitis", 
    "Viral hepatitis", "Viral hepatitis", "Viral hepatitis", 
    "Viral hepatitis", "Viral hepatitis", "Whooping cough", "Whooping cough", 
    "Whooping cough", "Whooping cough", "Whooping cough", "Whooping cough", 
    "Whooping cough", "Whooping cough", "Whooping cough", "Whooping cough", 
    "Whooping cough", "Whooping cough", "Whooping cough", "Whooping cough" 
    ), VALUE = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 4L, 2L, 2L, 2L, 1L, 1L, 6L, 7L, 7L, 1L, 2L, 
    3L, 2L, 5L, 12L, 9L, 13L, 9L, 13L, 8L, 17L, 14L, 16L, 18L, 
    15L, 19L, 11L, 10L, 25L, 24L, 21L, 22L, 23L, 20L, 34L, 32L, 
    31L, 30L, 29L, 28L, 27L, 26L, 41L, 42L, 43L, 45L, 46L, 47L, 
    33L, 35L, 36L, 37L, 38L, 39L, 40L, 44L, 1L, 2L, 1L, 1L, 1L, 
    2L, 2L, 2L, 1L, 3L, 1L, 1L, 1L, 1L), .Label = c("0", "1", 
    "2", "3", "6", "7", "9", "17", "18", "19", "21", "22", "27", 
    "28", "30", "31", "37", "41", "42", "301", "329", "333", 
    "344", "350", "396", "413", "415", "460", "517", "558", "597", 
    "609", "622", "647", "681", "1087", "1349", "1413", "1448", 
    "1499", "1576", "1654", "1725", "1948", "2531", "2665", "2757" 
    ), class = "factor"), ID = 1:98), .Names = c("OBBLIGATORIO", 
"COUNTRY", "YEAR", "AGE", "CAUSE OF DEATH", "VALUE", "ID"), row.names = c(NA, 
-98L), class = "data.frame") 

欲获得的图表:

  • 上x轴有从YEAR列值
  • 上y轴有
  • 来自VALUE列数据的值除以死亡原因栏

因此,像: enter image description here

我尝试:

x11() 
ggplot(df, aes(x = df$`YEAR`, y = df$`VALUE`, fill = df$`CAUSE OF DEATH`, colour = df$`CAUSE OF DEATH`)) + 
    geom_density(alpha = 0.1) + 
    xlim(1995, 2010) 

但结果是从一个我想要完全不同。

感谢

+1

您需要运行'ggplot(DF,AES(X = DF $ YEAR,Y = DF $值,组= DF'$原因死亡',color = df $'死亡原因'))+ geom_line()' – Divi

+0

您的问题不清楚。死因是绝对的。您想如何按照分类值划分价值?请调整数据示例。确保它包含您想要使用的所有变量。 – Thierry

回答

1

我不知道你的实际问题是什么,但一个问题,您的数据框是,VALUE列目前被定义为因素,还不如一个数字。我认为解决这个问题将会很快解决您的问题。我在后面(即在数据框已经创建之后)执行这个事后处理,但是如果您通过read.table()或类似命令将数据传入R,则可以在创建数据帧时指定列的class,这很可能一个更好的方法。

在我的代码中,我使用dplyr包来处理数据帧。它非常强大,但对于这个特殊的例子,它并没有做任何基于R无法做到的事情。

require(ggplot2) 
require(dplyr) 
require(magrittr) 

df <- ### YOUR dput output goes here ### 

# fix the problem with the `VALUE` column 
df %<>% mutate(VALUE = VALUE %>% as.character %>% as.numeric) 

# equivalent in base R: 
# df$VALUE <- as.numeric(as.character(df$VALUE)) 

# make a graph (is it the one you want?) 
df %>% group_by(YEAR, `CAUSE OF DEATH`) %>% 
     summarize(value = sum(VALUE)) %>% 
     ggplot(aes(x = YEAR, y = value, color = `CAUSE OF DEATH`)) + 
      geom_line() + 
      theme_bw() + 
      geom_point() 

# save graph for uploading to SO 
ggsave('SO37230266.png') 

结果是这样的图:

enter image description here