2016-08-25 62 views
1

用下面的data.frame(来源:http://eric.univ-lyon2.fr/~ricco/tanagra/fichiers/en_Tanagra_PLSR_Software_Comparison.pdf绘制偏最小二乘回归(PLSR)双标图与GGPLOT2

数据

df <- read.table(text = c(" 
diesel twodoors sportsstyle wheelbase length width height curbweight enginesize horsepower horse_per_weight conscity price symboling 
0 1 0 97 172 66 56 2209 109 85 0.0385 8.7 7975 2 
0 0 0 100 177 66 54 2337 109 102 0.0436 9.8 13950 2 
0 0 0 116 203 72 57 3740 234 155 0.0414 14.7 34184 -1 
0 1 1 103 184 68 52 3016 171 161 0.0534 12.4 15998 3 
0 0 0 101 177 65 54 2765 164 121 0.0438 11.2 21105 0 
0 1 0 90 169 65 52 2756 194 207 0.0751 13.8 34028 3 
1 0 0 105 175 66 54 2700 134 72 0.0267 7.6 18344 0 
0 0 0 108 187 68 57 3020 120 97 0.0321 12.4 11900 0 
0 0 1 94 157 64 51 1967 90 68 0.0346 7.6 6229 1 
0 1 0 95 169 64 53 2265 98 112 0.0494 9.0 9298 1 
1 0 0 96 166 64 53 2275 110 56 0.0246 6.9 7898 0 
0 1 0 100 177 66 53 2507 136 110 0.0439 12.4 15250 2 
0 1 1 94 157 64 51 1876 90 68 0.0362 6.4 5572 1 
0 0 0 95 170 64 54 2024 97 69 0.0341 7.6 7349 1 
0 1 1 95 171 66 52 2823 152 154 0.0546 12.4 16500 1 
0 0 0 103 175 65 60 2535 122 88 0.0347 9.8 8921 -1 
0 0 0 113 200 70 53 4066 258 176 0.0433 15.7 32250 0 
0 0 0 95 165 64 55 1938 97 69 0.0356 7.6 6849 1 
1 0 0 97 172 66 56 2319 97 68 0.0293 6.4 9495 2 
0 0 0 97 172 66 56 2275 109 85 0.0374 8.7 8495 2"), header = T) 

代码

library(pls) 
Y <- as.matrix(df[,14]) 
X <- as.matrix(df[,1:11]) 
df.pls <- mvr(Y ~ X, ncomp = 3, method = "oscorespls", scale = T) 
plot(df.pls, "biplot") 

我得到这个

双标图

enter image description here

任何帮助绘制使用ggplot2pls双标图可以理解的?

+0

看看GitHub上ggvegan。它可能不会直接做到,但会显示如何完成它(简而言之,从pls对象中提取分数) –

+0

@RichardTelford 感谢您的时间,帮助和建议 – aelwan

回答

1
#Read data 
df <- read.table(text = c(" 
diesel twodoors sportsstyle wheelbase length width height curbweight enginesize horsepower horse_per_weight conscity price symboling 
          0 1 0 97 172 66 56 2209 109 85 0.0385 8.7 7975 2 
          0 0 0 100 177 66 54 2337 109 102 0.0436 9.8 13950 2 
          0 0 0 116 203 72 57 3740 234 155 0.0414 14.7 34184 -1 
          0 1 1 103 184 68 52 3016 171 161 0.0534 12.4 15998 3 
          0 0 0 101 177 65 54 2765 164 121 0.0438 11.2 21105 0 
          0 1 0 90 169 65 52 2756 194 207 0.0751 13.8 34028 3 
          1 0 0 105 175 66 54 2700 134 72 0.0267 7.6 18344 0 
          0 0 0 108 187 68 57 3020 120 97 0.0321 12.4 11900 0 
          0 0 1 94 157 64 51 1967 90 68 0.0346 7.6 6229 1 
          0 1 0 95 169 64 53 2265 98 112 0.0494 9.0 9298 1 
          1 0 0 96 166 64 53 2275 110 56 0.0246 6.9 7898 0 
          0 1 0 100 177 66 53 2507 136 110 0.0439 12.4 15250 2 
          0 1 1 94 157 64 51 1876 90 68 0.0362 6.4 5572 1 
          0 0 0 95 170 64 54 2024 97 69 0.0341 7.6 7349 1 
          0 1 1 95 171 66 52 2823 152 154 0.0546 12.4 16500 1 
          0 0 0 103 175 65 60 2535 122 88 0.0347 9.8 8921 -1 
          0 0 0 113 200 70 53 4066 258 176 0.0433 15.7 32250 0 
          0 0 0 95 165 64 55 1938 97 69 0.0356 7.6 6849 1 
          1 0 0 97 172 66 56 2319 97 68 0.0293 6.4 9495 2 
          0 0 0 97 172 66 56 2275 109 85 0.0374 8.7 8495 2"), header = T) 

#Run OP's code 
library(pls) 
library(ggplot2) 
Y <- as.matrix(df[,14]) 
X <- as.matrix(df[,1:11]) 
df.pls <- mvr(Y ~ X, ncomp = 3, method = "oscorespls", scale = T) 

#Extract information from mvr object 
df2<-df.pls$scores 
comp1a<-df2[,1] 
comp2a<-df2[,2] 
df2<-as.data.frame(cbind(comp1a, comp2a)) 

df1<-df.pls$loadings 
comp1<-df1[,1] 
comp2<-df1[,2] 
names<-df1[,0] 
df1<-as.data.frame(cbind(names, comp1, comp2)) 

#Generate two plots and overlay 
#Plot 1 
p1<-ggplot(data=df1, aes(comp1,comp2))+ 
    ylab("")+xlab("")+ggtitle("X scores and X Loadings")+ 
    theme_bw() + theme(panel.border = element_rect(colour = "black", fill=NA, size=1),panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank(), 
        axis.line = element_line(colour = "black"))+ 
    geom_text(aes(label=rownames(df1)), color="red")+ 
    scale_x_continuous(breaks = c(-0.6,-0.4,-0.2,0,0.2,0.4,0.6))+ 
    scale_y_continuous(breaks = c(-0.6,-0.4,-0.2,0,0.2,0.4,0.6))+ 
    coord_fixed(ylim=c(-0.6, 0.6),xlim=c(-0.6, 0.6))+ 
    theme(axis.ticks = element_line(colour = "red")) + 
    theme(axis.text.y=element_text(angle = 90, hjust = 0.65)) + 
    theme(axis.text.y = element_text(margin=margin(10,10,10,5,"pt"))) 

#Plot 2 
p2<-ggplot(data=df2, aes(comp1a,comp2a))+ 
    ylab("Comp 2")+xlab("Comp 1")+ggtitle("X scores and X Loadings")+ 
    theme_bw() + theme(panel.border = element_rect(colour = "black", fill=NA, size=1), 
        panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank(), 
        axis.line = element_line(colour = "black"))+ 
    geom_text(aes(label=rownames(df2)))+ 
    xlim(-4,4)+ylim(-4,4)+ 
    scale_y_continuous(breaks = c(-4,-2,0,2))+ 
    coord_cartesian(ylim=c(-4, 4))+ 
    scale_x_continuous(breaks = c(-4,-2,0,2)) + 
    theme(plot.title = element_text(face="bold"))+ 
    theme(axis.text.y=element_text(angle = 90, hjust = 0.65)) 

#Function to overlay plots in order to get two graphs with different axes on same plot 
library(grid) 
library(gtable) 
ggplot_dual_axis = function(plot1, plot2, which.axis = "x") { 
    # Update plot with transparent panel 
    plot2 = plot2 + theme(panel.background = element_rect(fill = NA)) 
    grid.newpage() 
    # Increase right margin if which.axis == "y" 
    if(which.axis == "y") plot1 = plot1 + theme(plot.margin = unit(c(0.7, 1.5, 0.4, 0.4), "cm")) 
    # Extract gtable 
    g1 = ggplot_gtable(ggplot_build(plot1)) 
    g2 = ggplot_gtable(ggplot_build(plot2)) 
    # Overlap the panel of the second plot on that of the first 
    pp = c(subset(g1$layout, name == "panel", se = t:r)) 
    g = gtable_add_grob(g1, g2$grobs[[which(g2$layout$name=="panel")]], pp$t, pp$l, pp$b, pp$l) 

    # Steal axis from second plot and modify 
    axis.lab = ifelse(which.axis == "x", "axis-b", "axis-l") 
    ia = which(g2$layout$name == axis.lab) 
    ga = g2$grobs[[ia]] 
    ax = ga$children[[2]] 
    # Switch position of ticks and labels 
    if(which.axis == "x") ax$heights = rev(ax$heights) else ax$widths = rev(ax$widths) 
    ax$grobs = rev(ax$grobs) 
    if(which.axis == "x") 
    ax$grobs[[2]]$y = ax$grobs[[2]]$y - unit(1, "npc") + unit(0.15, "cm") else 
     ax$grobs[[1]]$x = ax$grobs[[1]]$x - unit(1, "npc") + unit(0.15, "cm") 
    # Modify existing row to be tall enough for axis 
    if(which.axis == "x") g$heights[[2]] = g$heights[g2$layout[ia,]$t] 
    # Add new row or column for axis label 
    if(which.axis == "x") { 
    g = gtable_add_grob(g, ax, 2, 4, 2, 4) 
    g = gtable_add_rows(g, g2$heights[1], 1) 
    g = gtable_add_grob(g, g2$grob[[6]], 2, 4, 2, 4) 
    } else { 
    g = gtable_add_cols(g, g2$widths[g2$layout[ia, ]$l], length(g$widths) - 1) 
    g = gtable_add_grob(g, ax, pp$t, length(g$widths) - 1, pp$b) 
    g = gtable_add_grob(g, g2$grob[[7]], pp$t, length(g$widths), pp$b - 1) 
    } 
    # Draw it 
    grid.draw(g) 
} 

#Run function on individual plots 
ggplot_dual_axis(p2, p1, "y") 

enter image description here

+1

没问题,我的荣幸!考虑upvoting我的答案:) –

+0

我想为变量添加箭头并添加依赖(y)变量以及**符号**。 类似于这个[**图**](https://www.dropbox.com/s/06f2owvmq77pjmp/Rplot11.png?dl=0),我使用这个代码库(plsdepot) df.plsdepot = plsreg1(df [,1:11],df [,14,drop = FALSE],comps = 3) plot(df.plsdepot,comps = c(1,2))' 任何帮助将不胜感激。如果您认为这应该是一个单独的问题,请让我知道。 – aelwan

+1

我想这样做。不幸的是,我已经消耗了我今天的所有选票:-(明天,我保证我会upvote它:-) – aelwan