2013-04-30 34 views
0

这可能是PICNIC,但是当我尝试构建一个函数时,我会比在控制台中执行操作时收到一组相当奇怪的行为。R-在函数外部和内部分配不同的变量类别?

我试图使用cut2来产生一组间隔并将它们分配给一个变量供以后重用。在函数外部使用会产生合适的向量,但在函数内部,分配会强制使用不同的方式。我试着围绕它包装as.vector,但是生成了一个char [210]。

谁能告诉我我做错了什么?

感谢, 斯蒂芬 PS全局分配是这样的间隔可以被修改,后来重用

设置

library("Hmisc") 
library("caret") 
# functions in use ---------------------------------------------------------------- 
# functions for splitting data according to Max Kuhn's preferences in caret vignette, code condensed to save space 
splitDataset<-function(dataset=rawdata, nPrimaryKeyCol=1, nOutcomeCol=1) { 
    end<-as.numeric(ncol(dataset)) 
    stopifnot(is.numeric(nPrimaryKeyCol),nPrimaryKeyCol<=end,is.numeric(nOutcomeCol),nOutcomeCol<=end,(nPrimaryKeyCol+nOutcomeCol)<=end) 
    predstart<-nPrimaryKeyCol+1 
    predend<-ncol(dataset)-nOutcomeCol 
    assign(x="keys",value=dataset[, 0:nPrimaryKeyCol],envir = parent.frame()) 
    assign(x="outcomes",value=dataset[,(predend+1):end],envir = parent.frame()) 
    assign(x="predictors",value=dataset[,predstart:predend], envir = parent.frame()) 
} 
    partitionDataset<-function(proportion=0.7){ 
    require("caret") 
    assign(x="inTrain", value=createDataPartition(outcomes,p=proportion, list=FALSE), envir = parent.frame()) 
    assign(x="trainKeys", value=keys[inTrain], envir = parent.frame()) 
    assign(x="trainPredictors",value=predictors[inTrain,],envir = parent.frame()) 
    assign(x="trainOutcomes", value=outcomes[inTrain],envir = parent.frame()) 
    assign(x="testKeys",value=keys[-inTrain],envir = parent.frame()) 
    assign(x="testPredictors", value=predictors[-inTrain,], envir = parent.frame()) 
    assign(x="testOutcomes",value=outcomes[-inTrain], envir = parent.frame()) 
    assign(x="trainPredictors.Bad", value=subset(trainPredictors,trainOutcomes=="bad"), envir = parent.frame()) 
} 

# this is the problem function 
equalFreqBins.derive<-function(characteristic,deriveDataset,g=20){ 

    stopifnot(is.numeric(deriveDataset[,characteristic]),is.numeric(g)) 
    dnam<-paste0("interval.",characteristic) 
# this is the assignment 
    intervals<-cut2(deriveDataset[,characteristic],g,onlycuts=TRUE) 
    assign(
    x=dnam, 
    value=intervals, 
    envir=as.environment(".GlobalEnv") 
    ) 
# return to make it easier to look at variable 
    return(str(intervals)) 
    } 

# data loadup---------------------------------------------------------------- 
data(GermanCredit) 
GermanCredit$outcome<-GermanCredit$Class 
GermanCredit$Class<-NULL 
levels(GermanCredit$outcome)<-c("bad","good") 
basedata<-GermanCredit 
splitDataset(basedata,nPrimaryKeyCol=0,nOutcomeCol=1) 
partitionDataset(proportion=0.7) 

问题再现

# this is outputting a factor of 210 
equalFreqBins.derive(characteristic="Age",deriveDataset=trainPredictors.Bad,g=20) 
# this is outputting a num [1:20] 
intervals<-cut2(trainPredictors.Bad[,"Age"],g=20,onlycuts=TRUE) 
str(intervals) 

回答

1

的简介cut2

cut2(x, cuts, m, g, levels.mean, digits, minmax=TRUE, oneval=TRUE, onlycuts=FALSE) 

所以当你打电话

intervals <- cut2(deriveDataset[,characteristic], g, onlycuts = TRUE) 

g变量,是第二个未命名参数,则取为cuts说法。解决方案:用命名参数调用你的函数:

intervals <- cut2(deriveDataset[,characteristic], g = g, onlycuts = TRUE) 
+0

非常感谢! – 2013-05-01 07:04:56

相关问题