2012-09-10 36 views
3

我一直在群集我的一些值,然后对它们进行分组。然后我使用ggplot2绘制一些密度图并覆盖这些群。示例图像如下: enter image description hereggplot2中的颜色密度图通过群集组

对于群集中的每个组,我绘制一个密度图并覆盖它们。密度图中的着色对应于聚类中的分组。

我的问题是,我已经根据分组手动分割数据,并将它们放在它们自己的单独文本表中(请参阅下面的代码)。这是非常低效的,对于大数据集可能会非常繁琐。我如何动态绘制ggplot2中的密度图而不将它们分离到各自的文本表中?

原来的输入表看起来像这样它被分裂之前:

scores <- read.table(textConnection(" 
file  max  min  avg    lowest 
132   5112.0  6520.0  5728.0   5699.0 
133   4720.0  6064.0  5299.0   5277.0 
5   4617.0  5936.0  5185.0   5165.0 
1   4384.0  5613.0  4917.0   4895.0 
1010  5008.0  6291.0  5591.0   5545.0 
104   4329.0  5554.0  4858.0   4838.0 
105   4636.0  5905.0  5193.0   5165.0 
35   4304.0  5578.0  4842.0   4831.0 
36   4360.0  5580.0  4891.0   4867.0 
37   4444.0  5663.0  4979.0   4952.0 
31   4328.0  5559.0  4858.0   4839.0 
39   4486.0  5736.0  5031.0   5006.0 
32   4334.0  5558.0  4864.0   4843.0 
"), header=TRUE) 

我用来生成剧情代码: 请注意结合基本图形与电网仍无法正常工作

library(ggplot2) 
library(grid) 

layout(matrix(c(1,2,3,1,4,5), 2, 3, byrow = TRUE)) 

# define function to create multi-plot setup (nrow, ncol) 
vp.setup <- function(x,y){ 
grid.newpage() 
pushViewport(viewport(layout = grid.layout(x,y))) 
} 

# define function to easily access layout (row, col) 
vp.layout <- function(x,y){ 
viewport(layout.pos.row=x, layout.pos.col=y) 
} 

vp.setup(2,3) 

file_vals <- read.table(textConnection(" 
file  avg_vals 
133   1.5923 
132   1.6351 
1010  1.6532 
104   1.6824 
105   1.6087 
39   1.8694 
32   1.9934 
31   1.9919 
37   1.8638 
36   1.9691 
35   1.9802 
1   1.7283 
5   1.7637 
"), header=TRUE) 

red <- read.table(textConnection(" 
file  max  min  avg    lowest 
31   4328.0  5559.0  4858.0   4839.0 
32   4334.0  5558.0  4864.0   4843.0 
36   4360.0  5580.0  4891.0   4867.0 
35   4304.0  5578.0  4842.0   4831.0 
"), header=TRUE) 

blue <- read.table(textConnection(" 
file  max  min  avg    lowest 
133   4720.0  6064.0  5299.0   5277.0 
105   4636.0  5905.0  5193.0   5165.0 
104   4329.0  5554.0  4858.0   4838.0 
132   5112.0  6520.0  5728.0   5699.0 
1010  5008.0  6291.0  5591.0   5545.0 
"), header=TRUE) 

green <- read.table(textConnection(" 
file  max  min  avg    lowest 
39   4486.0  5736.0  5031.0   5006.0 
37   4444.0  5663.0  4979.0   4952.0 
5   4617.0  5936.0  5185.0   5165.0 
1   4384.0  5613.0  4917.0   4895.0 
"), header=TRUE) 


# Perform Cluster 
d <- dist(file_vals$avg_vals, method = "euclidean") 
fit <- hclust(d, method="ward") 
plot(fit, labels=file_vals$file) 
groups <- cutree(fit, k=3) 

cols = c('red', 'blue', 'green', 'purple', 'orange', 'magenta', 'brown', 'chartreuse4','darkgray','cyan1') 
rect.hclust(fit, k=3, border=cols) 


# Desnity plots 
dat = rbind(data.frame(Cluster='Red', max_vals = red$max), data.frame(Cluster='Blue', max_vals = blue$max), data.frame(Cluster='Green', max_vals = green$max)) 
max = (ggplot(dat,aes(x=max_vals))) 
max = max + geom_density(aes(fill=factor(Cluster)), alpha=.3) + xlim(c(3500, 5500)) + scale_fill_manual(values=c("red",'blue',"green")) 
max = max + labs(fill = 'Clusters') 
print(max, vp=vp.layout(1,2)) 

dat = rbind(data.frame(Cluster='Red', min_vals = red$min), data.frame(Cluster='Blue', min_vals = blue$min), data.frame(Cluster='Green', min_vals = green$min)) 
min = (ggplot(dat,aes(x=min_vals))) 
min = min + geom_density(aes(fill=factor(Cluster)), alpha=.3) + xlim(c(5000, 7000)) + scale_fill_manual(values=c("red",'blue',"green")) 
min = min + labs(fill = 'Clusters') 
print(min, vp=vp.layout(1,3)) 

dat = rbind(data.frame(Cluster='Red', avg_vals = red$avg), data.frame(Cluster='Blue', avg_vals = blue$avg), data.frame(Cluster='Green', avg_vals = green$avg)) 
avg = (ggplot(dat,aes(x=avg_vals))) 
avg = avg + geom_density(aes(fill=factor(Cluster)), alpha=.3) + xlim(c(4000, 6000)) + scale_fill_manual(values=c("red",'blue',"green")) 
avg = avg + labs(fill = 'Clusters') 
print(avg, vp=vp.layout(2,2)) 

dat = rbind(data.frame(Cluster='Red', lowest_vals = red$lowest), data.frame(Cluster='Blue', lowest_vals = blue$lowest), data.frame(Cluster='Green', lowest_vals = green$lowest)) 
lowest = (ggplot(dat,aes(x=lowest_vals))) 
lowest = lowest + geom_density(aes(fill=factor(Cluster)), alpha=.3) + xlim(c(4000, 6000)) + scale_fill_manual(values=c("red",'blue',"green")) 
lowest = lowest + labs(fill = 'Clusters') 
print(lowest, vp=vp.layout(2,3)) 

回答

1

通过这种方式,您可以使用4个面板自动创建所需的绘图。

首先,数据:

scores <- read.table(textConnection(" 
file  max  min  avg    lowest 
132   5112.0  6520.0  5728.0   5699.0 
133   4720.0  6064.0  5299.0   5277.0 
5   4617.0  5936.0  5185.0   5165.0 
1   4384.0  5613.0  4917.0   4895.0 
1010  5008.0  6291.0  5591.0   5545.0 
104   4329.0  5554.0  4858.0   4838.0 
105   4636.0  5905.0  5193.0   5165.0 
35   4304.0  5578.0  4842.0   4831.0 
36   4360.0  5580.0  4891.0   4867.0 
37   4444.0  5663.0  4979.0   4952.0 
31   4328.0  5559.0  4858.0   4839.0 
39   4486.0  5736.0  5031.0   5006.0 
32   4334.0  5558.0  4864.0   4843.0 
"), header=TRUE) 

file_vals <- read.table(textConnection(" 
file  avg_vals 
            133   1.5923 
            132   1.6351 
            1010  1.6532 
            104   1.6824 
            105   1.6087 
            39   1.8694 
            32   1.9934 
            31   1.9919 
            37   1.8638 
            36   1.9691 
            35   1.9802 
            1   1.7283 
            5   1.7637 
            "), header=TRUE) 

两个数据帧可以被合并成一个单一的一个:

dat <- merge(scores, file_vals, by = "file") 

拟合:

d <- dist(dat$avg_vals, method = "euclidean") 
fit <- hclust(d, method="ward") 
groups <- cutree(fit, k=3) 
cols <- c('red', 'blue', 'green', 'purple', 'orange', 'magenta', 'brown', 'chartreuse4','darkgray','cyan1') 

添加列与颜色名(基于拟合):

dat$group <- cols[groups] 

重塑从广角到长格式的数据:

dat_re <- reshape(dat, varying = c("max", "min", "avg", "lowest"), direction = "long", drop = c("file", "avg_vals"), v.names = "value", idvar = "group", times = c("max", "min", "avg", "lowest"), new.row.names = seq(nrow(scores) * 4)) 

简介:

p <- (ggplot(dat_re ,aes(x = value))) + 
geom_density(aes(fill = group), alpha=.3) + 
scale_fill_manual(values=cols) + 
labs(fill = 'Clusters') + 
facet_wrap(~ time) 

print(p) 

enter image description here

+0

感谢您的回答,但我要如何扩展和x轴添加到图?这些图不一定适用于相同的x轴比例。 – Harpal

+0

您可以使用'facet_wrap'的另一个参数,以允许基于每个面板中数据范围的x轴范围:'facet_wrap(〜time,scales =“free_x”)'。请注意,使用ggplot2不能为面板指定不同的x轴。如果所有面板都具有相同的x轴就足够了,则可以将以下内容添加到您的图中:'+ coord_cartesian(xlim = c(3500,7000))'。 –