2013-06-04 28 views
3

我自己学习F#(这是为了好玩,它不适用于工作/学校),我正在尝试编写一个简单的解析器来计算多个市场中的评论数一个Windows Phone应用程序。毫无疑问,我迄今为止的代码很难看,但我正试图改进它,并遵循函数式编程范例。因为我来自C,C++,C#世界,所以很难。下载Windows Phone应用程序评论使用F#

  • 来自C世界,我喜欢空值。我知道函数式编程/ F#不鼓励使用null,但是我找不到一种不使用它的方法。例如,在函数解析中有一个空检查。我怎么不这样做?

  • 现在,我的代码只计算第一页上的评论数量,但应用程序有可能超过10条评论,并因此可能会出现多个页面。我如何递归遍历所有页面(functuion downloadReviews或parse)。

  • 我们如何将这段代码扩展为完全异步?

以下是我到目前为止的代码。除了上面的问题之外,我真的很希望有人能够帮助我,并告诉我如何改进我的代码的整体结构。

open System 
open System.IO 
open System.Xml 
open System.Xml.Linq 
open Printf 

type DownloadPageResult = { 
    Uri: System.Uri; 
    ErrorOccured: bool; 
    Source: string; 
} 

type ReviewData = { 
    CurrentPageUri: System.Uri; 
    NextPageUri: System.Uri; 
    NumberOfReviews: int; 
} 

module ReviewUrl = 
    let getBaseUri path = 
     new Uri(sprintf "http://cdn.marketplaceedgeservice.windowsphone.com/%s" path) 

    let getUri country locale appId = 
     getBaseUri(sprintf "/v8/ratings/product/%s/reviews?os=8.0.0.0&cc=%s&oc=&lang=%s&hw=520170499&dm=Test&chunksize=10" appId country locale) 

let downloadPage (uri: System.Uri) = 
    try 
     use webClient = new System.Net.WebClient() 
     printfn "%s" (uri.ToString()) 
     webClient.Headers.Add("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8") 
     webClient.Headers.Add("Accept-Encoding", "zip,deflate,sdch") 
     webClient.Headers.Add("Accept-Language", "en-US,en;q=0.8,fr;q=0.6") 
     webClient.Headers.Add("User-Agent", "Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1482.0 Safari/537.36") 
     { Uri = uri; Source = webClient.DownloadString(uri); ErrorOccured = false } 
    with error -> { Uri = uri; Source = String.Empty; ErrorOccured = true } 

let downloadReview country locale appId = 
    let uri = ReviewUrl.getUri country locale appId 
    downloadPage uri 

let parse(pageResult: DownloadPageResult) = 
    if pageResult.ErrorOccured then { CurrentPageUri = pageResult.Uri; NextPageUri = null; NumberOfReviews = 0 } 
    else 
     let reader = new StringReader(pageResult.Source) 
     let doc = XDocument.Load(reader) 
     let ns = XNamespace.Get("http://www.w3.org/2005/Atom") 

     let nextUrl = query { for link in doc.Descendants(ns + "link") do 
           where (link.Attribute(XName.Get("rel")).Value = "next") 
           select link.Value 
           headOrDefault } 

     if nextUrl = null then 
      { CurrentPageUri = pageResult.Uri; NextPageUri = null; NumberOfReviews = doc.Descendants(ns + "entry") |> Seq.length } 
     else 
      { CurrentPageUri = pageResult.Uri; NextPageUri = ReviewUrl.getBaseUri(nextUrl); NumberOfReviews = doc.Descendants(ns + "entry") |> Seq.length } 

let downloadReviews(locale: string) = 
    let appId = "4e08377c-1240-4f80-9c35-0bacde2c66b6" 
    let country = locale.Substring(3) 
    let pageResult = downloadReview country locale appId 
    let parseResult = parse pageResult 
    parseResult 


[<EntryPoint>] 
let main argv = 
    let locales = [| "en-US"; "en-GB"; |] 
    let results = locales |> Array.map downloadReviews 

    printfn "%A" results 
    0 
+0

在回答关于不使用空值的问题 - 检查F#中的Option类型:htt电话号码://msdn.microsoft.com/en-us/library/dd233245.aspx。一旦开始不必编写代码来检查空值,就永远不会回头。 –

回答

10

我这个问题有点多玩,并使用从F#数据的XML类型的供应商和其他功能试过。这是不完整的代码,但它应该足够给你的想法(并表明型供应商是非常好的:-)):

首先,我需要一些参考:

#r "System.Xml.Linq.dll" 
#r "FSharp.Data.dll" 
open FSharp.Data 
open FSharp.Net 

下一页,我写了下面的代码来下载一个示例页面。

let data = 
    Http.Request 
    ("http://cdn.marketplaceedgeservice.windowsphone.com//v8/ratings/product/4e08377c-1240-4f80-9c35-0bacde2c66b6/reviews", 
    query=["os", "8.0.0.0"; "cc", "US"; "lang", "en-US"; "hw", "520170499"; "dm", "Test"; "chunksize", "10" ], 
    headers=["User-Agent", "F#"]) 

我保存的样品为D:\temp\appstore.xml,然后使用XML类型的供应商得到一个不错的类型解析页:

type PageDocument = XmlProvider< @"D:\temp\appstore.xml" > 

然后你就可以下载&解析这样的页面(这个节目如何获得评论数量和有关下一个链接的信息):

let parseAsync (locale:string) appId = async { 
    let country = locale.Substring(3) 

    // Make the request (asynchronously) using the parameters specified 
    let! data = 
    Http.AsyncRequest 
    ("http://cdn.marketplaceedgeservice.windowsphone.com//v8/ratings/product/" 
     + appId + "/reviews", 
     query=[ "os", "8.0.0.0"; "cc", country; "lang", locale; 
       "hw", "520170499"; "dm", "Test"; "chunksize", "10" ], 
     headers=["User-Agent", "F#"]) 

    // Parse the result using the type-provider generated type 
    let page = PageDocument.Parse(data) 

    // Now you can type 'page' followed by '.' and explore the results! 
    // page.GetLinks() returns all links and page.GetEntries() returns 
    // review entries. Each link also has 'Rel' and 'Href' properties: 
    let nextLink = 
    page.GetLinks() 
    |> Seq.tryFind (fun link -> link.Rel = "next") 
    |> Option.map (fun link -> link.Href) 
    let reviewsCount = page.GetEntries().Length  
    return (reviewsCount, nextLink) } 
+0

哇!谢谢托马斯,这是超级干净而有趣的! :)但是,当我尝试运行parseAsync时,出现以下错误:类型约束不匹配。类型(string - > Async )[]与类型seq >不兼容。类型'Async <'a>'与'string - > Async '类型不匹配。我仍在努力理解这些错误信息。任何想法是什么意思? – Martin

+0

此片段只下载一个区域设置,因此您可以使用'parseAsync“en-US”“{... guid ...}”|> Async.RunSynchronously运行它# –

+0

有关异步工作流的更多信息,请查看在此MSDN页面上:http://msdn.microsoft.com/en-us/library/dd233250.aspx –

2

制作代码异步的一般模式是找到I/O昂贵的操作(调用树某处),然后进入“上升”从那里,让使用它的异步过,直到所有的代码达到您需要阻止的点。

在您的例子,原始操作下载,所以你会通过使downloadPage异步启动:

let downloadPage (uri: System.Uri) = async { 
    try 
     use webClient = new System.Net.WebClient() 
     printfn "%s" (uri.ToString()) 
     // (Headers omitted) 
     let! source = webClient.AsyncDownloadString(uri) 
     return { Uri = uri; Source = source; ErrorOccured = false } 
    with error -> 
     return { Uri = uri; Source = String.Empty; ErrorOccured = true } } 

您需要async { ... }包装代码,使用let!拨打电话到DownloadString异步版本,返回结果使用return(在两个分支中)。

然后你需要的功能,如downloadReviewdownloadReviews(同样,在异步块包好,调用其他异步操作使用let!或使用return!downloadPage等)。最后,如果您正在编写控制台应用程序,则需要阻止,但可以并行运行针对不同语言环境的下载。假设downloadReviews是异步的:

let locales = [| "en-US"; "en-GB"; |] 
let results = 
    locales 
    |> Array.map downloadReviews // Build an array of asynchronous computations 
    |> Async.Parallel    // Compose them into a single, parallel computation 
    |> Async.RunSynchronously  // Run the computation and wait 

回答其他问题,我觉得在例如使用null上面可能是好的(你调用LINQ返回它,所以没有简单的方法来避免这种情况)。实际上可以使用选项类型,但它有点棘手 - 请参阅this snippet if you're interested

此外,您可以使用来自F# Data LibraryHttp.AsyncRequest方法,让你简单一点的方法来构建复杂的HTTP请求(但我的贡献者该库之一,所以我有偏见!)

2

正如Tomas所说,创建一个基于异步的版本DownloadString(或者只是使用他的FSharp.Data库来处理它)会更“功能性”。

您还可以将FSharp.DataExtCore结合使用以充分利用ExtCore中的asyncMaybeasyncChoice工作流程。这些工作流程在正常的工作流程之上提供了非常易于使用的错误处理。

无论如何,我花了几分钟清理你的代码。这不是很多,但它确实简化了几处代码:

open System 
open System.IO 
open System.Xml 
open System.Xml.Linq 
open Printf 

type DownloadPageResult = { 
    Uri : System.Uri; 
    ErrorOccured : bool; 
    Source : string; 
} 

type ReviewData = { 
    CurrentPageUri : System.Uri; 
    NextPageUri : System.Uri option; 
    NumberOfReviews : uint32; 
} 

module ReviewUrl = 
    let baseUri = Uri ("http://cdn.marketplaceedgeservice.windowsphone.com/", UriKind.Absolute) 

    let getUri country locale (appId : System.Guid) = 
     let localUri = 
      let appIdStr = appId.ToString "D" 
      sprintf "/v8/ratings/product/%s/reviews?os=8.0.0.0&cc=%s&oc=&lang=%s&hw=520170499&dm=Test&chunksize=10" appIdStr country locale 
     Uri (baseUri, localUri) 

let downloadPage (uri : System.Uri) = 
    try 
     use webClient = new System.Net.WebClient() 
     printfn "%s" (uri.ToString()) 
     webClient.Headers.Add("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8") 
     webClient.Headers.Add("Accept-Encoding", "zip,deflate,sdch") 
     webClient.Headers.Add("Accept-Language", "en-US,en;q=0.8,fr;q=0.6") 
     webClient.Headers.Add("User-Agent", "Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1482.0 Safari/537.36") 
     { Uri = uri; Source = webClient.DownloadString uri; ErrorOccured = false } 
    with error -> 
     { Uri = uri; Source = String.Empty; ErrorOccured = true } 


let parse (pageResult : DownloadPageResult) = 
    if pageResult.ErrorOccured then 
     { CurrentPageUri = pageResult.Uri; NextPageUri = None; NumberOfReviews = 0u } 
    else 
     use reader = new StringReader (pageResult.Source) 
     let doc = XDocument.Load reader 
     let ns = XNamespace.Get "http://www.w3.org/2005/Atom" 

     let nextUrl = 
      query { 
      for link in doc.Descendants(ns + "link") do 
      where (link.Attribute(XName.Get("rel")).Value = "next") 
      select link.Value 
      headOrDefault } 

     { CurrentPageUri = pageResult.Uri; 
      NextPageUri = 
       if System.String.IsNullOrEmpty nextUrl then None 
       else Some <| Uri (ReviewUrl.baseUri, nextUrl); 
      NumberOfReviews = 
       doc.Descendants (ns + "entry") |> Seq.length |> uint32; } 

let downloadReviews (locale : string) = 
    System.Guid "4e08377c-1240-4f80-9c35-0bacde2c66b6" 
    |> ReviewUrl.getUri (locale.Substring 3) locale 
    |> downloadPage 
    |> parse 


[<EntryPoint>] 
let main argv = 
    let locales = [| "en-US"; "en-GB"; |] 
    let results = locales |> Array.map downloadReviews 

    printfn "%A" results 
    0 
相关问题