2014-09-10 35 views
-1

我已经写出了一个适用于年复一年的季度增长率的代码。但是,我的代码只适用于我用来编写代码的数据。我希望能够使用不同长度的数据运行整个代码,而不必更改任何内容。推广我的同比季度增长代码以适应不同的数据

这里是我的代码:

>lastyr<-tail(datan,horiz) #selects the last values from the original data 

>percentf<-((Arimab2f/lastyr)-1)*100 #finds the percentage growth of the forecasted data 

>percentfr<-round(percentf,digits=2) #rounds the results to 3d.p 

>percentf_percent<-paste(percentfr,"%",sep="") 

>plot.ts(percentf,xaxt="n",ylab="Percentage growth rate",xlab="Quarter",main="Percentage Growth of the Forecasts") #plots the percentage growth of the forecasts and removes the x-axis values 

>axis(1,at=seq(1,horiz,by=1),las=1) #adds the choosen values onto the x-axis at the points 

>points(percentf,col="blue") #puts a blue circle around each of the points 

>for(i in 1:length(percentfr)) 
+{text(x=i,y=percentfr[i],labels=percentf_percent[i],cex=0.5,pos=1) 
+text(x=i,y=percentfr[i],labels=percentf_percent[i],cex=0.5,pos=3)} #adds the percentage values next to the points on both the left and right side 

>fulldata<-c(datan,Arimab2f,0,0) #this makes a vector of all the data with the forecasts, the added zeros make it easier to seperate the years if it doesnt finish in the 4th quarter 

>fullmat<-matrix(fulldata,nrow=freqdata) #produces a matrix of the full data with the years seperated into columns 

>full1mat<-fullmat[,-1] #removes the first column from the matrix 

>full2mat<-matrix(c(full1mat,0,0,0,0),nrow=freqdata) #makes a matrix with a zero column at the end to account for the one removed in the last line 

>percent1<-((full2mat[,1]/fullmat[,1])-1)*100 
>percent2<-((full2mat[,2]/fullmat[,2])-1)*100 
>percent3<-((full2mat[,3]/fullmat[,3])-1)*100 
>percent4<-((full2mat[,4]/fullmat[,4])-1)*100 
>percent5<-((full2mat[,5]/fullmat[,5])-1)*100 
>percent6<-((full2mat[,6]/fullmat[,6])-1)*100 
>percent7<-((full2mat[,7]/fullmat[,7])-1)*100 
>percent8<-((full2mat[,8]/fullmat[,8])-1)*100 
#percent9<-((full2mat[,9]/fullmat[,9])-1)*100 #add as many percents as there is years in the data 
#percent10<-((full2mat[,10]/fullmat[,10])-1)*100 
#percent11<-((full2mat[,11]/fullmat[,11])-1)*100 
#percent12<-((full2mat[,12]/fullmat[,12])-1)*100 
#percent13<-((full2mat[,13]/fullmat[,13])-1)*100 
#percent14<-((full2mat[,14]/fullmat[,14])-1)*100 

>percentagegrowth<-c(percent1,percent2,percent3,percent4,percent5,percent6,percent7,percent8)#,percent9,percent10,percent11,percent12,percent13,percent14) #puts the percentage growths for each year in the same vector 

>percentagegrowth1<-head(percentagegrowth,-(length(fullmat)-length(datan))) #removes the unnecessary values from the end of the matrix 

>zero<-matrix(,nrow=(length(percentagegrowth1)-length(percentf))) #creates a matrix with no values 

>percentf1<-c(zero,percentf) #creates a vector with the NA values and the percantage growth of the forecast data 

>percentagegrowth1r<-round(percentagegrowth1,1) 

>names(percentagegrowth1)<-c("09 q1","09 q2","09 q3","09 q4","10 q1","10 q2","10 q3","10 q4","11 q1","11 q2","11 q3","11 q4","12 q1","12 q2","12 q3","12 q4","13 q1","13 q2","13 q3","13 q4","14 q1","14 q2","14 q3","14 q4","15 q1","15 q2") 

>percentagegrowth1_percent<-paste(percentagegrowth1r,"%",sep="") 

>plot.ts(percentagegrowth1,xaxt="n",xlab="Quarter",ylab="Percentage growth(%)",main="Year-over-Year Quarterly Percentage Growth") #plots all the percentage growth points 

>for(i in 1:length(percentagegrowth1r)) 
+{text(x=i,y=percentagegrowth1r[i],labels=percentagegrowth1_percent[i],cex=0.5,font=2,pos=1) 
+ text(x=i,y=percentagegrowth1r[i],labels=percentagegrowth1_percent[i],cex=0.5,font=2,pos=3)} 

>lines(percentf1,col="red") #adds the forecasted data as a red line 

>points(percentagegrowth1r,col="blue") #circles each of the points making them easier to see 

>axis(1,at=seq(1,length(percentagegrowth1),by=1),labels=names(percentagegrowth1),las=2,cex.axis=0.6) 

>legend("topright",c("Red=Forecasted data")) 

有什么办法缩短它具有percent1,percent2 ...#percent14的代码?并且在名称(百分比增长1)中考虑到数据的长度可能会改变,因此名称会改变?

下面是在这段代码中使用的数据,所以你可以看到我做了什么:

http://s21.postimg.org/t6nldfo13/datan.png(打探)

http://s14.postimg.org/vmn2kjatp/arimab2f.png(Arimab2f(使用ARIMA的forcasted数据))

HORIZ = 4

freqdata = 4

您可以通过复制运行我的整个代码粘贴了这一切(包括数据):

datan<-c(79160.56,91759.73,91186.48,106353.82,70346.47,80279.15,82611.60,131392.72,93798.99,105944.78,103913.13,154530.69,110157.40,117416.09,127423.42,156752.00,120097.81,121307.75,115021.12,150657.83,113711.53,115353.14,112701.98,154319.18,116803.54,118352.54) 
freqdata<-4 #set frequency of the data (e.g. 4 for quarterly, 12 for monthly etc) 
startdata<-c(8,1) #where the first interval is ((8,1) means first quarter in 2008) 
horiz<-4 #how many predictions to be made (4 predicts a year for the quarterly data)  
datats<-ts(datan,frequency=freqdata,start=startdata) #turns the data into a time series with the frequency of data and where it starts 
plot(datats,ylab="Total",xlab="Time",main="Original Time Series Plot") #plots a time series graph of the original data 
seasonplot(datats,ylab="Total",xlab="Time",main="Seasonal plot",year.labels=TRUE,year.labels.left=TRUE,col=1:20,pch=19) 
fit<-stl(datats,s.window=5) 
lines(fit$time.series[,1],col="red",ylab="Trend") 
plot(fit) 
force.log<-"log" 
datadates<-as.character(data[,1]) #creates a character vector of the data column 
dataMAT<-matrix(0,ncol=freqdata,nrow=(length(datats)+freqdata),byrow=TRUE) #creates a zero matrix of size specified and 'byrow=TRUE' specifies you want it to fill the matrix row-wise (column-wise is the default) 
for(i in 1:freqdata) 
{dataMAT[,i]<-c(rep(0,length=i-1),lag(datats,k=-i+1),rep(0,length=freqdata-i+1))} #for every i in 1 to freqdata creates a vector entry in the i column of the zero matrix. 'rep(0,length=i-1)' is 0 repeated i-1 times, 'lag(datan,k=-i+1)' shifts the time space of datan back by -i+1 observations 
dataind<-dataMAT[c(-1:(-freqdata+1),-(length(dataMAT[,1])-freqdata+1):-(length(dataMAT[,1]))),] #fills in the zero matrix diagonally with the values from the above input 
dataind2<-data.frame(dataind) #creates a data frame of the matrix making it easier for R 
lm1<-lm(X1~.,data=dataind2) #creates a full linear model with x1 being the dependant variable using data from dataind2 
lm2<-lm(X1~X2+dataind2[,length(dataind2[1,])],data=dataind2) #creates a linear model with dependant variable x1 with x2 and 'dataind2[,length(dataind2[1,])]' being variables in the model (includes 1 lag and 1y lag) using data from dataind2. 
library(lmtest) #activates the lmtest package 
library(car) #activates the car package 
bptest1<-bptest(lm1) #does a Breusch-Pagan test on lm1 to test for heteroscedasticity (see http://en.wikipedia.org/wiki/Heteroscedasticity for description) 
bptest2<-bptest(lm2) #does a Breusch-Pagan test on lm2 to test for heteroscedasticity 
gqtest1<-gqtest(lm1) #does a Goldfeld-Quandt test on lm1 to test for homoscedasticity (if all random variable in model have the same finite variance) 
ncvtest1<-ncvTest(lm1) #a non-constant error variance test on lm1 (much like the Breusch-Pagan). add depending on model 
ncvtest2<-ncvTest(lm2) #a non-constant error variance test on lm2. add depending on model 
if(force.log=="level") 
{aslog<-"n"}else 
{{if(force.log=="log") 
    {aslog<-"y"}else 
     {if(bptest1$p.value<0.1|bptest2$p.value<0.1|gqtest1$p.value<0.1|ncvtest1$p<0.1|ncvtest2$p<0.1) #if the p-value of bptest1/bptest2/gqtest1 is < 0.1 name aslog 'y' 
     {aslog<-"y"}else 
      {aslog<-"n"}}}} 
if(aslog=="y") 
{dataa<-log(datats)}else 
{dataa<-datats} #if there is evidence to show that the data should be transformed then it makes a log of the data if not then it remains the same 
startLa<-startdata[1]+trunc((1/freqdata)*(length(dataa)-horiz)) #finds a start year 
startLb<-1+((1/freqdata)*(length(dataa)-horiz)-trunc((1/freqdata)*(length(dataa)-horiz)))*freqdata #finds a start quarter 
startL<-c(startLa,startLb) #creates a vector of the date 
K<-ts(rep(dataa,length=length(dataa)-horiz),frequency=freqdata,start=startdata) #split series into two, K is in sample (the original sample) 
L<-ts(dataa[-1:-(length(dataa)-horiz)],frequency=freqdata,start=startL) #split series into two, L is out sample (predictions) 
library(strucchange) #activates strucchange package 
efp1rc<-efp(lm1,data=dataind2,type="Rec-CUSUM") #returns a one-dimensional empirical process of cumulative sums of residuals from lm1 
efp2rc<-efp(lm2,data=dataind2,type="Rec-CUSUM") #returns a one-dimensional empirical process of cumulative sums of residuals from lm2 
efp1rm<-efp(lm1,data=dataind2,type="Rec-MOSUM") #returns a one-dimensional empirical process of moving sums of residuals from lm1 
efp2rm<-efp(lm2,data=dataind2,type="Rec-MOSUM") #returns a one-dimensional empirical process of moving sums of residuals from lm2 
plot(efp2rc) #plots the recursive cumulative sum of residuals for lm2 
lines(efp1rc$process,col ="darkblue") #plots the recursive cumulative sum of residuals for lm1 on the same graph 
plot(efp2rm) 
lines(efp1rm$process,col="darkblue") 
gefp2<-gefp(lm2,data=dataind2) #plots a M-fluctuation of lm2 
plot(gefp2) 
plot(dataa) #plots a graph of dataa 
pacf(dataa) #plots a correlogram of dataa 
sctest(efp2rc) #tests for structural change in regression model 
cat("log series,y/n?:",aslog) #shows if series has been logged or not 

########## ARIMA ######## 

library(tseries) #activates tseries package 
library(forecast) #activates forecast package 
max.sdiff<-3 #set the maximum seasonal differences allowed 
arima.force.seasonality<-"n" 
kpssW<-kpss.test(dataa,null="Level") #computes the Kwiatkowski-Phillips-Schmidt-Shin test for the null-hypothesis that dataa is level 
#kpssW<-ndiffs(dataa,alpha=0.05,test="kpss") #if the above doesn't work 
ppW<-tryCatch({ppW<-pp.test(dataa,alternative="stationary")},error=function(ppW){ppW<-list(error="TRUE",p.value=0.99)}) #performs a Phillips-Perron Unit Root test for the null hypothesis that dataa has a unit root instead of a stationary alternative. if p.value>0.05 then assume unit root 
adfW<-adf.test(dataa,alternative="stationary",k=trunc((length(dataa)-1)^(1/3))) #performs the Augmented Dickey-Fuller test that the null of dataa has unit root 
if(kpssW$p.value<0.05|ppW$p.value>0.05|adfW$p.value>0.05) 
{ndiffsW=1}else 
{ndiffsW=0} #finds the estimate of the number of differences required to make time series stationary 
aaW<-auto.arima(dataa,max.D=max.sdiff,d=ndiffsW,seasonal=TRUE,allowdrift=FALSE,stepwise=FALSE,trace=TRUE,seasonal.test="ch") #fits the best ARIMA model 
orderWA<-c(aaW$arma[1],aaW$arma[6],aaW$arma[2]) 
orderWS<-c(aaW$arma[3],aaW$arma[7],aaW$arma[4]) 
if(sum(aaW$arma[1:2])==0) 
{orderWA[1]<-1}else 
{NULL} 
if(arima.force.seasonality=="y") 
{if(sum(aaW$arma[3:4])==0) 
{orderWS[1]<-1}else 
    {NULL}}else 
    {NULL} 
Arimab<-Arima(dataa,order=orderWA,seasonal=list(order=orderWS),method="ML") 
fArimab<-forecast(Arimab,h=8,simulate=TRUE,fan=TRUE) #returns the forecasts for the Arima model. h=number of periods for forecasting 
if(aslog=="y") 
{fArimabF<-exp(fArimab$mean[1:horiz])}else 
{fArimabF<-fArimab$mean[1:horiz]} #if data was logged then its converted back by using the exponetial function as only interested in original data not transformed data 
plot(fArimab,main="ARIMA Forecast",sub="blue=fitted,red=actual") #plots the forecast 
lines(dataa,col="red",lwd=2) #changes colour and size of dataa 
lines(ts(append(fitted(Arimab),fArimab$mean[1]),frequency=freqdata,start=startdata),col="blue",lwd=2) #shows the fitted arima on the same graph 
if(aslog=="y") 
{Arimab2f<-exp(fArimab$mean[1:horiz])}else 
{Arimab2f<-fArimab$mean[1:horiz]} #if data was logged then its converted back by using the exponetial function as only interested in original data not transformed data 
start(fArimab$mean)->startARIMA 
ArimaALTf<-ts(prettyNum(Arimab2f,big.interval=3L,big.mark=","),frequency=freqdata,start=startARIMA) #puts forecasts in table 
View(ArimaALTf,title="ARIMA2 final forecast") #brings up table of the forecasts 
summary(Arimab) 

######Percentage growth for Arima###### 

###when using this feature you will need to change lines 118-132, 137 to suit your data### 

lastyr<-tail(datan,horiz) #selects the last values from the original data 
percentf<-((Arimab2f/lastyr)-1)*100 #finds the percentage growth of the forecasted data 
percentfr<-round(percentf,digits=2) #rounds the results to 3d.p 
percentf_percent<-paste(percentfr,"%",sep="") 
plot.ts(percentf,xaxt="n",ylab="Percentage growth rate",xlab="Quarter",main="Percentage Growth of the Forecasts") #plots the percentage growth of the forecasts and removes the x-axis values 
axis(1,at=seq(1,horiz,by=1),las=1) #adds the choosen values onto the x-axis at the points 
points(percentf,col="blue") #puts a blue circle around each of the points 
for(i in 1:length(percentfr)) 
{text(x=i,y=percentfr[i],labels=percentf_percent[i],cex=0.5,pos=1) 
text(x=i,y=percentfr[i],labels=percentf_percent[i],cex=0.5,pos=3)} #adds the percentage values next to the points on both the left and right side 
fulldata<-c(datan,Arimab2f,0,0) #this makes a vector of all the data with the forecasts, the added zeros make it easier to seperate the years if it doesnt finish in the 4th quarter 
fullmat<-matrix(fulldata,nrow=freqdata) #produces a matrix of the full data with the years seperated into columns 
full1mat<-fullmat[,-1] #removes the first column from the matrix 
full2mat<-matrix(c(full1mat,0,0,0,0),nrow=freqdata) #makes a matrix with a zero column at the end to account for the one removed in the last line 
percent1<-((full2mat[,1]/fullmat[,1])-1)*100 
percent2<-((full2mat[,2]/fullmat[,2])-1)*100 
percent3<-((full2mat[,3]/fullmat[,3])-1)*100 
percent4<-((full2mat[,4]/fullmat[,4])-1)*100 
percent5<-((full2mat[,5]/fullmat[,5])-1)*100 
percent6<-((full2mat[,6]/fullmat[,6])-1)*100 
percent7<-((full2mat[,7]/fullmat[,7])-1)*100 
percent8<-((full2mat[,8]/fullmat[,8])-1)*100 
#percent9<-((full2mat[,9]/fullmat[,9])-1)*100 #add as many percents as there is years in the data 
#percent10<-((full2mat[,10]/fullmat[,10])-1)*100 
#percent11<-((full2mat[,11]/fullmat[,11])-1)*100 
#percent12<-((full2mat[,12]/fullmat[,12])-1)*100 
#percent13<-((full2mat[,13]/fullmat[,13])-1)*100 
#percent14<-((full2mat[,14]/fullmat[,14])-1)*100 
percentagegrowth<-c(percent1,percent2,percent3,percent4,percent5,percent6,percent7,percent8)#,percent9,percent10,percent11,percent12,percent13,percent14) #puts the percentage growths for each year in the same vector 
percentagegrowth1<-head(percentagegrowth,-(length(fullmat)-length(datan))) #removes the unnecessary values from the end of the matrix 
zero<-matrix(,nrow=(length(percentagegrowth1)-length(percentf))) #creates a matrix with no values 
percentf1<-c(zero,percentf) #creates a vector with the NA values and the percantage growth of the forecast data 
percentagegrowth1r<-round(percentagegrowth1,1) 
names(percentagegrowth1)<-c("09 q1","09 q2","09 q3","09 q4","10 q1","10 q2","10 q3","10 q4","11 q1","11 q2","11 q3","11 q4","12 q1","12 q2","12 q3","12 q4","13 q1","13 q2","13 q3","13 q4","14 q1","14 q2","14 q3","14 q4","15 q1","15 q2") 
percentagegrowth1_percent<-paste(percentagegrowth1r,"%",sep="") 
plot.ts(percentagegrowth1,xaxt="n",xlab="Quarter",ylab="Percentage growth(%)",main="Year-over-Year Quarterly Percentage Growth") #plots all the percentage growth points 
for(i in 1:length(percentagegrowth1r)) 
{text(x=i,y=percentagegrowth1r[i],labels=percentagegrowth1_percent[i],cex=0.5,font=2,pos=1) 
text(x=i,y=percentagegrowth1r[i],labels=percentagegrowth1_percent[i],cex=0.5,font=2,pos=3)} 
lines(percentf1,col="red") #adds the forecasted data as a red line 
points(percentagegrowth1r,col="blue") #circles each of the points making them easier to see 
axis(1,at=seq(1,length(percentagegrowth1),by=1),labels=names(percentagegrowth1),las=2,cex.axis=0.6) 
legend("topright",c("Red=Forecasted data")) 
+0

还有就是你的键盘上的空格键。编写代码时使用它。这是不可读的。此外,您可以包含换行符和适当的缩进(例如,在for循环中)也有助于可靠性。 – Roland 2014-09-10 08:52:27

+1

谢谢你的建议,我如何为'for'循环做适当的缩进? – 2014-09-10 08:55:29

+0

你如何获得你的freqdata? – 2014-09-10 09:14:52

回答

3

你举的例子是远远最好的,但尝试以此为开端

lastyr<-tail(datan, horiz) #selects the last values from the original data 
percentf <- ((arimab2f/lastyr) - 1) * 100 #finds the percentage growth of the forecasted data 
percentfr<-round(percentf,digits = 2) #rounds the results to 3d.p 
percentf_percent<-paste(percentfr, "%", sep="") 

plot.ts(percentf, xaxt="n", ylab="Percentage growth rate", 
     xlab="Quarter", 
     main="Percentage Growth of the Forecasts") #plots the percentage growth of the forecasts and removes the x-axis values 
axis(1, at = seq(1, horiz, by=1), las=1) #adds the choosen values onto the x-axis at the points 
points(percentf, col="blue") #puts a blue circle around each of the points 

for(i in 1:length(percentfr)){ 
    text(x=i,y=percentfr[i], labels=percentf_percent[i], cex=0.5, pos=1) 
    text(x=i,y=percentfr[i], labels=percentf_percent[i], cex=0.5, pos=3) 
} #adds the percentage values next to the points on both the left and right side 

fulldata<-c(datan, arimab2f) #this makes a vector of all the data with the forecasts, the added zeros make it easier to seperate the years if it doesnt finish in the 4th quarter 
fullmat <- matrix(0, ncol=floor(length(fulldata)/4)+1, nrow = 4) #produces a matrix of the full data with the years seperated into columns 
fullmat[1:length(fulldata)] <- fulldata 
full1mat <- fullmat[ ,-1] #removes the first column from the matrix 
full2mat <- cbind(full1mat, 0) #makes a matrix with a zero column at the end to account for the one removed in the last line 
percent1 <- list() 
for(i in 1:ncol(full2mat)){ 
    percent1[[i]] <- ((full2mat[ ,i]/fullmat[ ,i])-1) * 100 
} 
percentagegrowth <- unlist(percent1) 
#removes the unnecessary values from the end of the matrix 
percentagegrowth1<-head(percentagegrowth,-(length(fullmat)-length(datan))) 
#creates a matrix with no values 
zero <- matrix(,nrow = (length(percentagegrowth1) - length(percentf))) 
#creates a vector with the NA values and the percantage growth of the forecast data 
percentf1 <- c(zero, percentf) 
percentagegrowth1r <- round(percentagegrowth1, 1) 
names(percentagegrowth1) <- paste('yq', 1:length(percentagegrowth1)) # alternative names 
percentagegrowth1_percent <- paste(percentagegrowth1r, "%", sep = "") 

#plots all the percentage growth points 
plot.ts(percentagegrowth1, xaxt = "n", xlab = "Quarter", 
     ylab="Percentage growth(%)", 
     main = "Year-over-Year Quarterly Percentage Growth") 

for(i in 1:length(percentagegrowth1r)){ 
    text(x = i,y = percentagegrowth1r[i],labels=percentagegrowth1_percent[i], 
     cex = 0.5, font = 2, pos = 1) 
    text(x=i,y=percentagegrowth1r[i],labels=percentagegrowth1_percent[i], 
     cex=0.5,font=2,pos=3) 
} 
lines(percentf1,col="red") #adds the forecasted data as a red line 
points(percentagegrowth1r,col="blue") #circles each of the points making them easier to see 
axis(1, at = seq(1, length(percentagegrowth1), by = 1), 
    labels = names(percentagegrowth1), 
    las = 2,cex.axis = 0.6) 
legend("topright", c("Red=Forecasted data")) 

plot ts

+1

你有没有改变代码中的任何东西?或者只是添加在图表中? – 2014-09-10 10:40:13

+0

您是否尝试过使用其他数据集?现在我刚刚删除了你的任意(手动)矩阵建筑物。没有其他办法。 – 2014-09-10 10:46:09

+1

啊好的,但我希望能够运行任何数据,而无需更改代码(通过添加百分比或带走percent1:percent14或必须更改名称(percentagegrowth1))是否有特定的循环我可以使用或事情就像我尝试过的所有东西一直出错 – 2014-09-10 10:48:54