2011-06-29 42 views
1

我对R相当陌生,一直试图让我的头脑更加优雅地使用许多基于矢量的函数。许多其他答案提供了一个解决方案的一部分,但我似乎无法得到一个组合工作。一些指导会很好。在多个折叠的数据帧上计算多个函数

我有股票的交易结果与多个列的数据帧,我只显示了两个位置:

structure(list(Trade.Profit.Loss.Perc = c(4.25, 0.32, 3.57, -14.06, 
-11.68, 0, 3.03, -22.76, 19.25, 0, 7.2, 1.56, 4.1, -2.27, -9.68, 
7.14, 1.41, -4.96, -7.95, 17.59, -9.09, 4.41, -4.92, -3.75, -13.58, 
-2.44, -3.79, -6.08, -6.63, 7.06, 74.29, -7.64, 45.53, -11.42, 
13.6, -6.47, 18.77, 20.69, -9.67, -4.64, 93.88, 11.96, 0, 3.33, 
-18.27, 0, -8.5, 7.92, 50.86, 10, 24.39, -9.68, -3.91, -14.6, 
3.03, -13.43, -2.09, -0.5, 16.67, 14.52, 0.62, 6, -7.28, -0.47, 
-4.01, 17.4, 12.41, 24.63, 4.71, -15.02, -1.68, 41.13, 0.33, 
0, -6.49, 8.51, -4.29, -2.28, 7.51, 32.27, -9.7, 151.57, 2.31, 
0.48, 31.49, 2.46, 6.61, -0.22, 11.19, -7.63, -4.56, 2.48, -0.62, 
-6.25, 50, -3.2, 27.33, -6.25, 36.03, 29, 11.69, -2.34, -3.77, 
-1.45, -9.76, -5.68, 17.62, -2.7, -1.6, 42.36, -5.41, 0, 7.69, 
15.6, -2.6, -9.4, -4.75, 5.36, 1.07, 24.32, 14.58, 3.64, -29.17, 
-8.27, 8.68, -11.56, -3.57, -2.4, -1.36, -16.98, 3.75, -4.2, 
-8.62, -6.19, -6.31, -11.38, -4.65, 35.74, -6.59, -3.5, -12.53, 
18.18, 1.77, -8.14, -1.75, -0.75, 5.56, 58.54, -6.19, -7.31, 
4.39, 3.25, 2.52, -2.33, -5.3, 27.09, -1.11, -37.14, -7.26, -1.02, 
0, -3.14, 6.56, -7.81, -13.04, 22.38, -1.36, -1.2, -7.49, -9.09, 
11.68, -8.46, -4.42, -1.54, -2.32, 1.2, 0, 4.65, -5.75, -2.5, 
11.74, 7.52, 12.37, 1.22, 14.18, -10.47, 20.63, -6.28, -0.01, 
16.67, 3.12, 5.9, 9.96, 3.45, 38.64, -1.98, 0.71, 6.1, 29.41), 
    Entry.ATRVE = c(2.022, 2.031, 3.979, 3.478, 2.75, 1.784, 
    1.971, 2.904, 1.69, 1.268, 2.624, 1.37, 2.531, 3.379, 1.871, 
    1.989, 3.235, 1.26, 2.423, 0.907, 2.406, 3.459, 1.328, 2.885, 
    3.803, 3.506, 1.738, 2.245, 2.279, 3.661, 2.256, 1.633, 1.777, 
    3.558, 2.336, 1.746, 1.896, 3.075, 3.812, 2.489, 3.372, 3.295, 
    2.022, 1.215, 2.007, 2.644, 3.605, 1.576, 2.596, 3.212, 1.4, 
    3.031, 2.916, 2.254, 2.19, 3.661, 2.046, 2.537, 3.246, 2.091, 
    2.762, 2.742, 1.345, 1.91, 1.293, 1.348, 2.842, 2, 2.672, 
    1.779, 2.068, 3.433, 3.006, 3.525, 3.575, 3.36, 2.168, 1.688, 
    2.567, 0.835, 1.652, 1.422, 3.11, 2.454, 2.27, 1.199, 2.571, 
    1.85, 3.092, 1.932, 1.397, 1.859, 3.141, 2.821, 3.549, 1.594, 
    1.941, 2.526, 2.274, 1.723, 2.058, 3.126, 2.941, 1.953, 2.55, 
    1.909, 2.715, 2.394, 3.152, 2.691, 2.063, 3.163, 1.034, 1.059, 
    0.362, 1.979, 2.075, 1.087, 0.893, 1.56, 2.121, 3.507, 2.813, 
    2.56, 1.459, 2.314, 3.137, 1.798, 2.551, 3.36, 1.41, 1.083, 
    2.641, 2.703, 3.341, 3.831, 2.336, 2.856, 1.906, 2.14, 3.138, 
    1.047, 2.043, 2.467, 1.328, 1.092, 1.821, 1.972, 3.94, 2.412, 
    2.314, 1.668, 0.626, 1.862, 3.442, 2.479, 2.598, 3.307, 1.433, 
    3.45, 3.47, 3.082, 1.978, 2.764, 3.775, 1.908, 2.695, 2.571, 
    1.934, 3.324, 2.411, 1.573, 2.579, 2.34, 3.684, 2.008, 3.225, 
    1.435, 3.283, 1.993, 1.754, 1.063, 1.208, 1.903, 1.312, 2.875, 
    2.676, 2.426, 0.997, 2.654, 1.323, 1.717, 2.547, 1.346, 2.055, 
    1.551, 2.215, 1.396, 3.314), X = c(0.177371087, 0.906809191, 
    0.208469544, 0.151138531, 0.345978372, 0.574636442, 0.385996104, 
    0.861229906, 0.141104879, 0.561812589, 0.340836813, 0.756585548, 
    0.058332616, 0.349425684, 0.932898281, 0.004165203, 0.41053952, 
    0.077378432, 0.058095648, 0.497867816, 0.216763889, 0.571999138, 
    0.800967707, 0.147823619, 0.568139526, 0.952335278, 0.414131303, 
    0.930967507, 0.951035818, 0.655865856, 0.363502228, 0.319418729, 
    0.803576908, 0.302248103, 0.374374328, 0.306277968, 0.562535965, 
    0.45865619, 0.528323919, 0.228827088, 0.090731023, 0.810054676, 
    0.040312554, 0.936651623, 0.613018811, 0.323526959, 0.938253046, 
    0.59613313, 0.096626581, 0.295683059, 0.691093101, 0.370450864, 
    0.953446954, 0.002676696, 0.036867464, 0.304420348, 0.108535358, 
    0.642157095, 0.683799239, 0.751778295, 0.394137427, 0.888028432, 
    0.008651793, 0.41384267, 0.004815106, 0.25080828, 0.022183931, 
    0.598519722, 0.867941507, 0.987371256, 0.56446045, 0.0576671, 
    0.32560138, 0.835096526, 0.585997663, 0.028029888, 0.982674378, 
    0.135163749, 0.717039075, 0.764402586, 0.648966357, 0.48131708, 
    0.364586775, 0.664295882, 0.594879264, 0.131346724, 0.153489409, 
    0.396639421, 0.240334462, 0.970626753, 0.872674761, 0.965853941, 
    0.018889313, 0.500850411, 0.020251346, 0.477868178, 0.441027497, 
    0.464538719, 0.650548595, 0.113164376, 0.953159401, 0.828417591, 
    0.909501277, 0.974955387, 0.005544144, 0.796553436, 0.82135113, 
    0.196713654, 0.161114125, 0.040137343, 0.60080166, 0.731497271, 
    0.484325649, 0.195530412, 0.133857393, 0.052481523, 0.52277393, 
    0.258780518, 0.305323357, 0.609780934, 0.873767561, 0.897915264, 
    0.803924978, 0.844458949, 0.551162924, 0.707426453, 0.756850677, 
    0.782465398, 0.034789665, 0.722527887, 0.430711179, 0.312993878, 
    0.04175475, 0.661157221, 0.377607192, 0.293913794, 0.208493996, 
    0.58177576, 0.828698155, 0.057039434, 0.211224526, 0.269573861, 
    0.169065249, 0.672225791, 0.673528094, 0.258476807, 0.027798473, 
    0.739185813, 0.77504509, 0.76611552, 0.60618332, 0.215831874, 
    0.660387201, 0.529556218, 0.948005457, 0.99885867, 0.774597927, 
    0.885260589, 0.947632843, 0.624328215, 0.253942724, 0.721110859, 
    0.74616445, 0.042866232, 0.166422973, 0.473853092, 0.411103767, 
    0.284882459, 0.672563947, 0.367742347, 0.947790843, 0.981166871, 
    0.646502822, 0.130204379, 0.069492267, 0.115210676, 0.126707369, 
    0.247698717, 0.757196762, 0.607707261, 0.621710909, 0.859949287, 
    0.437920926, 0.245296529, 0.645008787, 0.564204912, 0.800645722, 
    0.767176007, 0.874495379, 0.037927782, 0.595771062, 0.073163803, 
    0.518055909, 0.765729075, 0.646506897, 0.077860654, 0.990334821, 
    0.220478109, 0.196541822), profitable = c(1, 1, 1, 0, 0, 
    1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 0, 
    0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 
    1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 
    0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 
    1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1, 
    1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 
    1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 
    0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 
    0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 
    1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 
    0, 1, 1, 1), ATRVE.cut = structure(c(3L, 3L, 4L, 4L, 3L, 
    2L, 2L, 3L, 2L, 2L, 3L, 2L, 3L, 4L, 2L, 2L, 4L, 2L, 3L, 1L, 
    3L, 4L, 2L, 3L, 4L, 4L, 2L, 3L, 3L, 4L, 3L, 2L, 2L, 4L, 3L, 
    2L, 2L, 4L, 4L, 3L, 4L, 4L, 3L, 2L, 3L, 3L, 4L, 2L, 3L, 4L, 
    2L, 4L, 3L, 3L, 3L, 4L, 3L, 3L, 4L, 3L, 3L, 3L, 2L, 2L, 2L, 
    2L, 3L, 3L, 3L, 2L, 3L, 4L, 4L, 4L, 4L, 4L, 3L, 2L, 3L, 1L, 
    2L, 2L, 4L, 3L, 3L, 2L, 3L, 2L, 4L, 2L, 2L, 2L, 4L, 3L, 4L, 
    2L, 2L, 3L, 3L, 2L, 3L, 4L, 3L, 2L, 3L, 2L, 3L, 3L, 4L, 3L, 
    3L, 4L, 2L, 2L, 1L, 2L, 3L, 2L, 1L, 2L, 3L, 4L, 3L, 3L, 2L, 
    3L, 4L, 2L, 3L, 4L, 2L, 2L, 3L, 3L, 4L, 4L, 3L, 3L, 2L, 3L, 
    4L, 2L, 3L, 3L, 2L, 2L, 2L, 2L, 4L, 3L, 3L, 2L, 1L, 2L, 4L, 
    3L, 3L, 4L, 2L, 4L, 4L, 4L, 2L, 3L, 4L, 2L, 3L, 3L, 2L, 4L, 
    3L, 2L, 3L, 3L, 4L, 3L, 4L, 2L, 4L, 2L, 2L, 2L, 2L, 2L, 2L, 
    3L, 3L, 3L, 1L, 3L, 2L, 2L, 3L, 2L, 3L, 2L, 3L, 2L, 4L), .Label = c("[0,1)", 
    "[1,2)", "[2,3)", "[3,4)", "[4,5)", "[5,6)", "[6,7)", "[7,8)", 
    "[8,9)", "[9,32)"), class = "factor")), .Names = c("Trade.Profit.Loss.Perc", 
"Entry.ATRVE", "X", "profitable", "ATRVE.cut"), row.names = c(NA, 
-199L), class = "data.frame") 

我想它崩溃后,多种功能应用到这个数据帧,也适用同样的功能到这个数据帧的子集;特别是利润交易和损失交易。到目前为止,我已经采取很多重复的蛮力方法:

Trades = read.csv("Trades.csv") 

#Bin the data according the breaks. 
breaks = seq(0,9,by=1) 
breaks = c(breaks,32) 

#Break Trades into Profits and Losses 
logP = Trades$Trade.Profit.Loss.Perc >= 0 
Profits = Trades[logP,] 
logL = Trades$Trade.Profit.Loss.Perc < 0 
Losses = Trades[logL,] 

#Collapse the dataframes by the ATRVE bins. 
T_ATRVE.cut = cut(Trades$Entry.ATRVE, breaks, right=FALSE) 
P_ATRVE.cut = cut(Profits$Entry.ATRVE, breaks, right=FALSE) 
L_ATRVE.cut = cut(Losses$Entry.ATRVE, breaks, right=FALSE) 

#Using Trades, on the specified column vector, use a bin list to calculate each bin statistic. 
#Number of Trades 
Trade_Num <- with (Trades, tapply(Trades$Trade.Profit.Loss.Perc,list(T_ATRVE.cut),length)) 
Profits_Num <- with (Profits, tapply(Profits$Trade.Profit.Loss.Perc,list(P_ATRVE.cut),length)) 
Losses_Num <- with (Losses, tapply(Losses$Trade.Profit.Loss.Perc,list(L_ATRVE.cut),length)) 


# Mean Return 
Trades_Mean <- with (Trades, tapply(Trades$Trade.Profit.Loss.Perc,list(T_ATRVE.cut),mean)) 
Profits_Mean <- with (Profits, tapply(Profits$Trade.Profit.Loss.Perc,list(P_ATRVE.cut),mean)) 
Losses_Mean <- with (Losses, tapply(Losses$Trade.Profit.Loss.Perc,list(L_ATRVE.cut),mean)) 

#Combine the results into one table. 
Results = cbind(Trade_Num, Profits_Num, Losses_Num, Trades_Mean, Profits_Mean, Losses_Mean) 

我看过使用plyr包以及排列的lapply功能与vector lists和功能不同的问题,但不能使他们的工作。我看到组件,但不能将它们粘合在一起! .......

从Ramnath将冷凝代码后我得到以下输出:

profitable ATRVE.cut Num  Mean 
1   0  [0,1) 2 -1.305000 
2   0  [1,2) 30 -5.135667 
3   0  [2,3) 40 -6.994250 
4   0  [3,4) 27 -8.513333 
5   1  [0,1) 4 13.362500 
6   1  [1,2) 39 15.673846 
7   1  [2,3) 36 15.483056 
8   1  [3,4) 21 15.055714 

我想输出到这个样子,有或没有额外的列:

profitable ATRVE.cut Num Mean Profitable Num Mean 
1   0  [0,1) 2 -1.305000 1 4 13.362500 
2   0  [1,2) 30 -5.135667 1 39 15.673846 
3   0  [2,3) 40 -6.994250 1 36 15.483056 
4   0  [3,4) 27 -8.513333 1 21 15.055714 

回答

1

这是我的方法。它使用Hadley Wickham出色的plyr图书馆。

library(plyr) 
Trades = read.csv("Trades.csv") 

# define breaks to bin the data 
breaks = c(0:9, 32) 

# define profitable and ATRVE.cut variables 
Trades = transform(Trades, 
    profitable = ifelse(Trade.Profit.Loss.Perc >=0, 1, 0), 
    ATRVE.cut = cut(Entry.ATRVE, breaks, right = FALSE) 
) 

# split data frame by ATRVE.cut and profitable and apply functions 
Trades_Summary = ddply(Trades, .(ATRVE.cut, profitable), summarize, 
    Num = length(Trade.Profit.Loss.Perc), 
    Mean = mean(Trade.Profit.Loss.Perc) 
) 

print(Trades_Summary) 

这将产生输出

ATRVE.cut profitable Num  Mean 
1  [0,1)   0 6 -5.383333 
2  [1,2)   0 3 -13.883333 

编辑:为了通过OP为所需格式的输出,这里有几个附加的行

p0 = subset(Trades_Summary, profitable == 0) 
p1 = subset(Trades_Summary, profitable == 1) 
merge(p0, p1, by = 'ATRVE.cut') 

这将产生输出

ATRVE.cut profitable.x Num.x Mean.x  profitable.y 
1  [0,1)   0  2 -1.305000   1 
2  [1,2)   0 30 -5.135667   1 
3  [2,3)   0 40 -6.994250   1 
4  [3,4)   0 27 -8.513333   1 
    Num.y Mean.y 
1  4 13.36250 
2 39 15.67385 
3 36 15.48306 
4 21 15.05571 
+0

谢谢你,这更清晰。总结一下,根据条件添加额外的列,然后将这些列作为变量传递给ddply函数以供其拆分。 1.如果我想列绑定一个变量而不是行绑定,我该怎么做? 2.我也注意到长度和平均值是错误的;他们似乎正在使用错误的元素数量。当我可以附加文件时,我会的。 –

+0

dput(Trades)= structure(list(Entry.ATRVE = c(0.487,0.61,0.663,0.771,0.816, 1.5,0.5,0.5,1.4,1.5),Trade.Profit.Loss.Perc = c(-4.76 ,-4.2,0.4,10,-3.09,-8.14,4,4,4,4),有利可图= c(0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0 ),ATRVE.cut = structure(c(1L,1L,1L,1L,1L, 2L,1L,1L,2L,2L),.Label = c(“[0,1)”,“[1,2 )“,”[2,3]“,”[3,4]“, ”[4,5]“,”[5,6]“,”[6,7]“,”[7,8] “,”[8,9)“,”[9,32)“),class =”factor“)),.Names = c(”Entry.ATRVE“, ”Trade.Profit.Loss.Perc“ ATRVE.cut“),row.names = c(NA, -10L),class =”data.frame“) –

+0

@查看左边。看到我编辑的答案。它工作正常。对于“有利可图”和“ATRVE.cut”的每个值,它计算交易的数量和平均值。如果你可以发布你正在寻找的输出是什么,我可以与我的进行比较并进一步调试 – Ramnath