我有一个已经阻止了我2天的问题,希望我能在这里找到一个解决方案:错误与列数据框有R
我创建一个包含单词列表的数据帧和其正,负极性为情感分析任务。
word positive.polarity negative.polarity
1 interesting 1 0
2 boring 0 1
对于每个字I提取它的上下文,其是一组3分以上的字句。
我一直的助推器单词的列表:
-booster_words <- c("more","enough", "a lot", "as", "so")
-negative_words <- c("not", "rien", "ni", "aucun", "nul", "jamais", "pas", "non plus", "sans")
我想创建一个新的列positive.ponderate.polarity其中包含如果有一个助推器和否定词,每3 devided的正极性值上下文,如果在上下文中只有助推词(在上下文中有ni个否定词),则每3个字符相乘。
当我用这句话运行:
"The course was so interesting, but the professor was not boring"
我得到这个数据帧:
word positive.polarity negative.polarity positive.ponderate.polarity
1 interesting 1 0 0.3333333
2 boring 0 1 0.0000000
但我已经找到,因为结果这个数据帧:
word positive.polarity negative.polarity positive.ponderate.polarity
1 interesting 1 0 3
2 boring 0 1 0.0000000
这里代码:
calcPolarity <- function(sentiment_DF,sentences){
booster_words <- c("more","enough", "a lot", "as", "so")
negative_words <- c("not", "rien", "ni", "aucun", "nul", "jamais", "pas", "non plus", "sans")
reduce_words <- c("peu", "presque", "moins", "seulement")
# pre-allocate the polarity result vector with size = number of sentences
polarity <- rep.int(0,length(sentences))
# loop per sentence
for(i in 1:length(polarity)){
sentence <- sentences[i]
# separate each sentence in words using regular expression
wordsOfASentence <- unlist(regmatches(sentence,gregexpr("[[:word:]]+",sentence,perl=TRUE)))
# get the rows of sentiment_DF corresponding to the words in the sentence using match
# N.B. if a word occurs twice, there will be two equal rows
# (but I think it's correct since in this way you count its polarity twice)
subDF <- sentiment_DF[match(wordsOfASentence,sentiment_DF$word,nomatch = 0),]
# Find (number) of matching word.
wordOfInterest <- wordsOfASentence[which(wordsOfASentence %in% levels(sentiment_DF$word))] # No multigrepl, so working with duplicates instead. eg interesting
regexOfInterest <- paste0("([^\\s]+\\s){0,3}", wordOfInterest, "(\\s[^\\s]+){0,3}")
# extract a context of 3 words before the word in the dataframe
context <- stringr::str_extract(sentence, regexOfInterest)
names(context) <- wordOfInterest # Helps in forloop
print(context)
if(any(unlist(strsplit(context, " ")) %in% booster_words))
{
print(booster_words)
if(any(unlist(strsplit(context, " ")) %in% negative_words))
{
subDF$positive.ponderate.polarity <- subDF$positive.polarity/3
}
else
{
subDF$positive.ponderate.polarity <- subDF$positive.polarity * 3
}
}
# Debug option
print(subDF)
# calculate the total polarity of the sentence and store in the vector
polarity[i] <- sum(subDF$positive.ponderate.polarity) - sum(subDF$negative.ponderate.polarity)
}
return(polarity)
}
sentiment_DF <- data.frame(word=c('interesting','boring','pretty'),
positive.polarity=c(1,0,1),
negative.polarity=c(0,1,0))
sentences <- c("The course was so interesting, but the professor was not boring")
result <- calcPolarity(sentiment_DF,sentences)
用法:
result <- calcPolarity(sentiment_DF,sentences)
interesting boring
"course was so interesting" "professor was not boring"
[1] "more" "enough" "a lot" "as" "so"
word positive.polarity negative.polarity positive.ponderate.polarity
1 interesting 1 0 0.3333333
2 boring 0 1 0.0000000
编辑:
calcPolarity <- function(sentiment_DF,sentences){
booster_words <- c("more","enough", "a lot", "as", "so")
negative_words <- c("not", "rien", "ni", "aucun", "nul", "jamais", "pas", "non plus", "sans")
reduce_words <- c("peu", "presque", "moins", "seulement")
# pre-allocate the polarity result vector with size = number of sentences
polarity <- rep.int(0,length(sentences))
# loop per sentence
for(i in 1:length(polarity)){
sentence <- sentences[i]
# separate each sentence in words using regular expression
wordsOfASentence <- unlist(regmatches(sentence,gregexpr("[[:word:]]+",sentence,perl=TRUE)))
# get the rows of sentiment_DF corresponding to the words in the sentence using match
# N.B. if a word occurs twice, there will be two equal rows
# (but I think it's correct since in this way you count its polarity twice)
subDF <- sentiment_DF[match(wordsOfASentence,sentiment_DF$word,nomatch = 0),]
# Find (number) of matching word.
wordOfInterest <- wordsOfASentence[which(wordsOfASentence %in% levels(sentiment_DF$word))] # No multigrepl, so working with duplicates instead. eg interesting
regexOfInterest <- paste0("([^\\s]+\\s){0,3}", wordOfInterest, "(\\s[^\\s]+){0,3}")
# extract a context of 3 words before the word in the dataframe
context <- stringr::str_extract(sentence, regexOfInterest)
names(context) <- wordOfInterest # Helps in forloop
print(context)
for(i in 1:length(context)){
if(any(unlist(strsplit(context[i], " ")) %in% booster_words))
{
print(booster_words)
if(any(unlist(strsplit(context[i], " ")) %in% negative_words))
{
subDF$positive.ponderate.polarity <- subDF$positive.polarity + 4
}
else
{
subDF$positive.ponderate.polarity <- subDF$positive.polarity + 9
}
}
}
# Debug option
print(subDF)
# calculate the total polarity of the sentence and store in the vector
polarity[i] <- sum(subDF$positive.ponderate.polarity) - sum(subDF$negative.ponderate.polarity)
}
return(polarity)
}
sentiment_DF <- data.frame(word=c('interesting','boring','pretty'),
positive.polarity=c(1,0,1),
negative.polarity=c(0,1,0))
sentences <- c("The course was interesting, but the professor was not so boring")
result <- calcPolarity(sentiment_DF,sentences)
我得到这样的结果:
word positive.polarity negative.polarity positive.ponderate.polarity
1 interesting 1 0 5
2 boring 0 1 4
但它是incorrest,我必须有这样的结果
word positive.polarity negative.polarity positive.ponderate.polarity
1 interesting 1 0 1
2 boring 0 1 4
有什么想法吗?
我不知道为什么你的排名我的-1的问题?我尊重所有规则,我发布我的代码,我的问题,。:/ – Poisson
为什么-1指向我的帖子?我尊重SO规则......? – Poisson