2008-11-17 116 views
5

我仍在努力研究F#的事情 - 试图弄清楚如何在F#中“思考”,而不是仅仅从我认识的其他语言翻译出来。F#中的移动平均值计算#

我最近一直在考虑你之前和之后没有1:1地图的情况。 List.map掉落的情况。

其中一个例子是移动平均值,其中通常在n项中取平均值时,您将获得长度为len的列表的len-n + 1个结果。

对于那里的高手,这是一个很好的方法来做到这一点(使用队列捏Jomo Fisher)?

//Immutable queue, with added Length member 
type Fifo<'a> = 
    new()={xs=[];rxs=[]} 
    new(xs,rxs)={xs=xs;rxs=rxs} 

    val xs: 'a list; 
    val rxs: 'a list; 

    static member Empty() = new Fifo<'a>() 
    member q.IsEmpty = (q.xs = []) && (q.rxs = []) 
    member q.Enqueue(x) = Fifo(q.xs,x::q.rxs) 
    member q.Length() = (List.length q.xs) + (List.length q.rxs) 
    member q.Take() = 
     if q.IsEmpty then failwith "fifo.Take: empty queue" 
     else match q.xs with 
       | [] -> (Fifo(List.rev q.rxs,[])).Take() 
       | y::ys -> (Fifo(ys, q.rxs)),y 

//List module, add function to split one list into two parts (not safe if n > lst length) 
module List = 
    let splitat n lst = 
     let rec loop acc n lst = 
      if List.length acc = n then 
       (List.rev acc, lst) 
      else 
       loop (List.hd lst :: acc) n (List.tl lst) 
     loop [] n lst 

//Return list with moving average accross len elements of lst 
let MovingAverage (len:int) (lst:float list) = 
    //ugly mean - including this in Fifo kills genericity 
    let qMean (q:Fifo<float>) = ((List.sum q.xs) + (List.sum q.rxs))/(float (q.Length())) 

    //get first part of list to initialise queue 
    let (init, rest) = List.splitat len lst 

    //initialise queue with first n items 
    let q = new Fifo<float>([], init) 

    //loop through input list, use fifo to push/pull values as they come 
    let rec loop (acc:float list) ls (q:Fifo<float>) = 
     match ls with 
     | [] -> List.rev acc 
     | h::t -> 
      let nq = q.Enqueue(h) //enqueue new value 
      let (nq, _) = nq.Take() //drop old value 
      loop ((qMean nq)::acc) t nq //tail recursion 

    loop [qMean q] rest q 

//Example usage  
MovingAverage 3 [1.;1.;1.;1.;1.;2.;2.;2.;2.;2.] 

(也许一个更好的方式是由先进先出继承来实现MovingAverageQueue?)

回答

7

如果你不在乎太多的表现,这里是一个非常简单的解决办法:

#light 

let MovingAverage n s = 
    Seq.windowed n s 
    |> Seq.map Array.average 

let avgs = MovingAverage 5000 (Seq.map float [|1..999999|]) 

for avg in avgs do 
    printfn "%f" avg 
    System.Console.ReadKey() |> ignore 

这将重新计算从头每个“窗口”的平均值,因此,如果窗户是大是很差。

在任何情况下,请Seq.windowed:

http://research.microsoft.com/projects/cambridge/fsharp/manual/FSharp.Core/Microsoft.FSharp.Collections.Seq.html

,因为它很方便的在你这样的事情后面的口袋。

+0

非常好,这是帮助我“成长”的那种答案 - 即发现已经存在的东西,而不是重新发明轮子! – Benjol 2008-11-18 06:38:13

+1

死链接,我想现在所有的文档都被移到了msdn,所以类似的页面将是http://msdn.microsoft.com/en-us/library/dd233209(VS.100).aspx或http:// msdn .Microsoft.com/en-us/library/ee353635(VS.100).aspx – 2010-01-20 02:35:35

-2

据我所看到的,你的代码是充满了let声明。我不熟悉F#,但做过一些Haskell。功能范式意味着不考虑“如何”,而是考虑“什么”:你认为Fifo,但实际上应该指定移动平均的语义。

-- the limited average of a list 
limitedaverage 0 _ = 0 
limited verage n (x:xs) = (x/n) + (limited average (n-1) xs) 

-- a list, transformed into a sequence of moving averages of 
movingaverages n [] = [] 
movingaverages n (x:xs) = (movingaverage n (x:xs) : movingaverages n xs) 
+0

嗯,测试? 我不是一个Haskellite,但有限平均每次除以最初的n,否则结果是错误的。 移动平均值应读取(限制平均数n(x:xs):限制平均数n xs)? – Benjol 2008-11-17 13:36:45

1

这里的哈斯克尔解决方案的(纠正,我希望)F#版本提出here

编辑:现在尾递归,没有任何快,但不与爆炸N = 50000(请参阅非尾递归版本的编辑历史)

let LimitedAverage n ls = 
    let rec loop acc i n ls = 
     match i with 
     | 0 -> acc //i counts down from n to 0, when we hit 0 we stop 
     | _ -> match ls with 
       | [] -> acc //if we hit empty list before end of n, we stop too 
       | x::xs -> (loop (acc + (x/float n)) (i - 1) n xs) //divide this value by n, perform average on 'rest' of list 
    loop 0. n n ls 

LimitedAverage 50000 (List.map float [1..9999999]) 
//>val it : float = 25000.5 

let rec MovingAverage3 n ls = 
    let rec acc loop i n ls = 
     match i with 
     | 0 -> List.rev acc //i counts down from n to 0, when we hit 0 we stop 
     | _ -> match ls with 
       | [] -> List.rev acc //if we hit empty list before end of n, we stop too 
       | x::xs -> loop (LimitedAverage2 n ls :: acc) (i - 1) n xs // limited average on whole list, then moving average on tail 
    loop [] (n + 1) n ls 

MovingAverage3 50000 (List.map float [1..9999999]) 
//>val it : float list = [25000.5; 25001.5; 25002.5; ...] 
+1

@Jon Harrop,我看到你:)。我的downvote评论在哪里? – Benjol 2010-08-11 13:22:05

2

如果做一下性能护理,那么你就可以计算出一个平均有效利用这样的移动(假设我们正在计算在3天的窗口移动平均)

 
Numbers[n] Running Total[n] 
---------  --------------- 
n[0] = 7  7 = Numbers[0] 
n[1] = 1  8 = RunningTotal[1-1] + Numbers[1] 
n[2] = 2  10 = RunningTotal[2-1] + Numbers[2] 
n[3] = 8  11 = RunningTotal[3-1] + Numbers[3] - Numbers[3-3] 
n[4] = 4  14 = RunningTotal[4-1] + Numbers[4] - Numbers[4-3] 
n[5] = 1  13 = RunningTotal[5-1] + Numbers[5] - Numbers[5-3] 
n[6] = 9  14 = RunningTotal[6-1] + Numbers[6] - Numbers[6-3] 
... 
N    RunningTotal[N] = RunningTotal[N-1] + Numbers[N] - Numbers[N-3] 

硬部分关于这是持有您以前的运行总数和号码 N窗口。我想出了下面的代码:

let movingAverage days l = 
    seq { 
     let queue = new Queue<_>(days : int) 
     let divisor = decimal days 

     let total = ref 0m 
     for cur in l do 
      queue.Enqueue(cur) 
      total := !total + cur 
      if queue.Count < days then 
       yield (cur, 0m) 
      else 
       yield (cur, !total/divisor) 
       total := !total - (queue.Dequeue()) 
    } 

这个版本是不是好看的Haskell代码,但应避免在每次运行时重新计算你的“窗口”,相关的性能问题。它保持运行总数并将先前使用的数字保存在队列中,所以它应该非常快。

只是为了好玩,我写了一个简单的基准:

#light 
open System 
open System.Collections.Generic 
open System.Diagnostics; 

let windowAverage days (l : #seq<decimal>) = Seq.windowed days l |> Seq.map (Seq.average) 

let princessAverage days l = 
    seq { 
     let queue = new Queue<_>(days : int) 
     let divisor = decimal days 

     let total = ref 0m 
     for cur in l do 
      queue.Enqueue(cur) 
      total := !total + cur 
      if queue.Count < days then 
       yield (cur, 0m) 
      else 
       yield (cur, !total/divisor) 
       total := !total - (queue.Dequeue()) 
    } 

let testData = 
    let rnd = new System.Random() 
    seq { for a in 1 .. 1000000 -> decimal (rnd.Next(1000)) } 

let benchmark msg f iterations = 
    let rec loop = function 
     | 0 ->() 
     | n -> f 3 testData |> ignore; loop (n - 1) 

    let stopWatch = Stopwatch.StartNew() 
    loop iterations 
    stopWatch.Stop() 
    printfn "%s: %f" msg stopWatch.Elapsed.TotalMilliseconds 

let _ = 
    let iterations = 10000000 
    benchmark "princessAverage" princessAverage iterations 
    benchmark "windowAverage" windowAverage iterations 
    printfn "Done" 

结果:

princessAverage: 1670.791800 
windowAverage: 2986.146900

我的版本是〜1.79x较快。

1

如果你关心性能,像优雅的代码,然后尝试

module MovingAverage = 
    let selfZip n l = 
     Seq.skip n l |> Seq.zip l 

    let runTotal i z = 
     Seq.scan (fun sum (s, e) -> sum - s + e) i z 

    let average n l:seq<'a> = 
     Seq.skip n l 
     |> selfZip n 
     |> runTotal (l |> Seq.take n |> Seq.sum) 
     |> Seq.map (fun sum -> decimal sum/decimal n) 

let ma = MovingAverage.average 2 myseq 

使用FSUnit我们可以测试它

let myseq = seq { for i in 0..10 do yield i } 

Seq.nth 0 ma |> should equal 0.5 
    Seq.nth 1 ma |> should equal 1.5 
    Seq.nth 2 ma |> should equal 2.5 
    Seq.nth 3 ma |> should equal 3.5 

算法的诀窍是第一和第n个数和 然后通过添加窗口的头部 并减去窗口的尾部来保持运行总计。滑动窗口为 ,通过对该序列执行自拉伸实现,但第二个参数为按窗口大小进行拉伸的第一个 参数。

在管道的末端,我们只是将窗口的运行总量除以窗口 的大小。

注意扫描就像折叠,但产生的每个版本的状态为 一个序列。

一个更优雅的解决方案,虽然可能与性能命中是 ,使观察,如果我们零填充序列,我们不需要 来计算初始和。

namespace Utils 

module MovingAverage = 
    let selfZip n l = 
     Seq.skip n l |> Seq.zip l 

    let rec zeros = 
     seq { yield 0.0; yield! zeros} 

    // Create a running total given 
    let runTotal z = 
     Seq.scan (fun sum (s,e) -> sum - s + e) 0.0 z 

    let average n l = 
     Seq.concat [(Seq.take n zeros); l] 
     |> selfZip n 
     |> runTotal 
     |> Seq.map (fun sum -> sum/float n) 
     |> Seq.skip n 

有可能是由于命中相关 包装两个序列的第二间接性能,但也许它的窗口​​

0

的大小取决于 这是我的版本是不显著。

let sma list n = 
    let len = List.length list 
    let rec loop acc sum cnt = 
     if cnt >= len then List.rev acc 
     else if cnt < n-1 then loop (0.0::acc) (sum + List.nth list cnt) (cnt+1) 
     else loop (((sum + List.nth list cnt)/(float n))::acc) (sum + (List.nth list cnt) - (List.nth list (cnt-n+1))) (cnt+1) 
    loop [] 0.0 0 

实施例:

sma (List.map float [5..50]) 5 
[0, 0, 0, 0, 7, 8, 9, ...]