这里是一种可能性:
df1$pages <- as.character(df1$pages) # prevent use of factors
df1$pages <- sapply(sapply(df1$pages,function(x) strsplit(x,",")),function(x) paste(sort(unlist(x)),collapse=',')) #split at commas, order words alphabetically, and restore the description
df1 <- aggregate(sum.num. ~ ., df1, sum) #sum over identical 'pages'
# pages sum.num.
#1 Badezimmer,Baumarkt,Büromöbel 22
#2 Badezimmer,Baumarkt,Dekoration 14
#3 Badezimmer,Baumarkt,Flur 70
#4 Badezimmer,Baumarkt,Garten 18
#5 Badezimmer,Baumarkt,Heimtextilien 100
#6 Badezimmer,Baumarkt,Kinder 28
数据:
df1 <- structure(list(pages = structure(1:8,
.Label = c("Badezimmer,Baumarkt,Büromöbel",
"Badezimmer,Baumarkt,Dekoration", "Badezimmer,Baumarkt,Flur",
"Badezimmer,Baumarkt,Garten", "Badezimmer,Baumarkt,Heimtextilien",
"Badezimmer,Baumarkt,Kinder", "Badezimmer,Büromöbel,Baumarkt",
"Badezimmer,Flur,Baumarkt"), class = "factor"),
sum.num. = c(6L, 14L, 30L, 18L, 100L, 28L, 16L, 40L)),
.Names = c("pages", "sum.num."), class = "data.frame",
row.names = c(NA, -8L))
是啊,这有助于,很好的包知道,谢谢 – PSraj