2013-03-01 30 views
3

首先 - 道歉。这是我编译的第一个Haskell代码。我正在编译Real World Haskell第24章中的一些代码。代码使用在另一个源文件中实现的MapReduce引擎来计算一行中的单词数。这里是代码:如何在真实世界的Haskell中执行代码?

module Main where 

import Control.Monad (forM_) 
import Data.Int (Int64) 
import qualified Data.ByteString.Lazy.Char8 as LB 
import System.Environment (getArgs) 

import LineChunks (chunkedReadWith) 
import MapReduce (mapReduce, rnf) 

lineCount :: [LB.ByteString] -> Int64 
lineCount = mapReduce rdeepseq (LB.count '\n') 
         rdeepseq sum 

main :: IO() 
main = do 
    args <- getArgs 
    forM_ args $ \path -> do 
    numLines <- chunkedReadWith lineCount path 
    putStrLn $ path ++ ": " ++ show numLines 

这段代码编译得很好,我得到一个LineCount.exe。

现在,我应该如何使用它来计算文件中的行数?我有一个文件“测试”,其中包含一些测试文本。但是,当我做的:

LineCount test 

在命令行中,我得到:

Exception: test: hGetBufSome: illegal operation (handle is closed) 

出了什么问题?

这里是另一个文件的详细代码:

module LineChunks 
    (
     chunkedReadWith 
    ) where 

import Control.Exception (bracket, finally) 
import Control.Monad (forM, liftM) 
import Control.Parallel.Strategies (NFData, rdeepseq) 
import Data.Int (Int64) 
import qualified Data.ByteString.Lazy.Char8 as LB 
import GHC.Conc (numCapabilities) 
import System.IO 

data ChunkSpec = CS { 
     chunkOffset :: !Int64 
    , chunkLength :: !Int64 
    } deriving (Eq, Show) 

withChunks :: (NFData a) => 
       (FilePath -> IO [ChunkSpec]) 
      -> ([LB.ByteString] -> a) 
      -> FilePath 
      -> IO a 
withChunks chunkFunc process path = do 
    (chunks, handles) <- chunkedRead chunkFunc path 
    let r = process chunks 
    (rdeepseq r `seq` return r) `finally` mapM_ hClose handles 

chunkedReadWith :: (NFData a) => 
        ([LB.ByteString] -> a) -> FilePath -> IO a 
chunkedReadWith func path = 
    withChunks (lineChunks (numCapabilities * 4)) func path 
{-- /snippet withChunks --} 

{-- snippet chunkedRead --} 
chunkedRead :: (FilePath -> IO [ChunkSpec]) 
      -> FilePath 
      -> IO ([LB.ByteString], [Handle]) 
chunkedRead chunkFunc path = do 
    chunks <- chunkFunc path 
    liftM unzip . forM chunks $ \spec -> do 
    h <- openFile path ReadMode 
    hSeek h AbsoluteSeek (fromIntegral (chunkOffset spec)) 
    chunk <- LB.take (chunkLength spec) `liftM` LB.hGetContents h 
    return (chunk, h) 
{-- /snippet chunkedRead --} 

{-- snippet lineChunks --} 
lineChunks :: Int -> FilePath -> IO [ChunkSpec] 
lineChunks numChunks path = do 
    bracket (openFile path ReadMode) hClose $ \h -> do 
    totalSize <- fromIntegral `liftM` hFileSize h 
    let chunkSize = totalSize `div` fromIntegral numChunks 
     findChunks offset = do 
      let newOffset = offset + chunkSize 
      hSeek h AbsoluteSeek (fromIntegral newOffset) 
      let findNewline off = do 
       eof <- hIsEOF h 
       if eof 
        then return [CS offset (totalSize - offset)] 
        else do 
        bytes <- LB.hGet h 4096 
        case LB.elemIndex '\n' bytes of 
         Just n -> do 
         [email protected](c:_) <- findChunks (off + n + 1) 
         let coff = chunkOffset c 
         return (CS offset (coff - offset):chunks) 
         Nothing -> findNewline (off + LB.length bytes) 
      findNewline newOffset 
    findChunks 0 
{-- /snippet lineChunks --} 

-- Ensure that a series of ChunkSpecs is contiguous and 
-- non-overlapping. 
prop_contig (CS o l:[email protected](CS o' _:_)) | o + l == o' = prop_contig cs 
            | otherwise = False 
prop_contig _ = True 
+0

听起来像懒惰'IO'饲养它丑陋的头。 – 2013-03-01 02:12:38

+0

如果不提供'chunkedReadWith',或许更多,你不会得到很多有用的建议。 – 2013-03-01 02:33:10

+0

编辑问题提供它... – 2013-03-01 04:19:08

回答

2

转到在真实世界哈斯克尔伴随码 “CH24” 目录,做如下修改和运行

GHC -02 --make -threaded LineCount & & ./LineCount LineCount.hs

然后它应该给输出

LineCount.hs: 22 

这里有必要的修改:

diff --git a/ch24/LineChunks.hs b/ch24/LineChunks.hs 
index 0e82805..bda104d 100644 
--- a/ch24/LineChunks.hs 
+++ b/ch24/LineChunks.hs 
@@ -6,7 +6,7 @@ module LineChunks 

import Control.Exception (bracket, finally) 
import Control.Monad (forM, liftM) 
-import Control.Parallel.Strategies (NFData, rnf) 
+import Control.DeepSeq(NFData,rnf) 
import Data.Int (Int64) 
import qualified Data.ByteString.Lazy.Char8 as LB 
import GHC.Conc (numCapabilities) 
diff --git a/ch24/LineCount.hs b/ch24/LineCount.hs 
index c6dd40b..46218e3 100644 
--- a/ch24/LineCount.hs 
+++ b/ch24/LineCount.hs 
@@ -7,11 +7,11 @@ import qualified Data.ByteString.Lazy.Char8 as LB 
import System.Environment (getArgs) 

import LineChunks (chunkedReadWith) 
-import MapReduce (mapReduce, rnf) 
+import MapReduce (mapReduce, rdeepseq) 

lineCount :: [LB.ByteString] -> Int64 
-lineCount = mapReduce rnf (LB.count '\n') 
-      rnf sum 
+lineCount = mapReduce rdeepseq (LB.count '\n') 
+      rdeepseq sum 

main :: IO() 
main = do 
diff --git a/ch24/MapReduce.hs b/ch24/MapReduce.hs 
index d0ff90b..87c79aa 100644 
--- a/ch24/MapReduce.hs 
+++ b/ch24/MapReduce.hs 
@@ -3,7 +3,7 @@ module MapReduce 
     mapReduce 
    , simpleMapReduce 
    -- exported for convenience 
- , rnf 
+ , rdeepseq 
    , rwhnf 
    ) where 

请参阅此答案的之前的版本,因为您得到的错误。

+0

好的,现在就做出这些改变!谢谢! – 2013-03-02 01:33:19

+0

它的工作!非常感谢! – 2013-03-02 15:53:44

1

这为我工作:

module Main where 

import Control.Monad (forM_) 
import Data.Int (Int64) 
import qualified Data.ByteString.Lazy.Char8 as LB 
import System.Environment (getArgs) 

import LineChunks (chunkedReadWith) 
import Control.Parallel.Strategies(rdeepseq) 
import MapReduce (mapReduce) 

lineCount :: [LB.ByteString] -> Int64 
lineCount = mapReduce rdeepseq (LB.count '\n') 
         rdeepseq sum 

lineCountFile :: FilePath -> IO Int64 
lineCountFile path = chunkedReadWith lineCount path 

我改变rnfrdeepseq因为rnf似乎不是在“并联包了。

这是本书的配套代码: http://examples.oreilly.com/9780596514983/rwh-examples2.zip

+0

哈哈对不起,我刚刚注意到你在上一个问题中解决了你的问题..请忘记这一点。 – mnish 2013-03-01 02:45:58

+0

其实我没有。对于混淆抱歉 - 我实际上在这里提供了错误的代码。我的意思是 - 我仍然有这个问题(在将rnf更改为rdeepseq后,“句柄已关闭”问题。也就是说,之前的问题出现在这个问题之前,而不是之后。编辑代码以反映这一点。 – 2013-03-01 04:17:59

+0

不,你不应该在函数'withChunks'中把'rnf'改成'rdeepseq'。这可以改变IO的顺序。由于“rnf”和“rdeepdeq”具有不同的类型和不同的用途,所以引起了混淆。试着在Hoogle上查找'rnf'的定义。 – mnish 2013-03-01 05:01:50

3

代替

LineCount < test 

使用

LineCount test 

说明:在主getArgs呼叫从命令行取ARGS。使用“<”意味着从标准输入读数。

+0

对不起,我正在做'LineCount测试'开始。编辑帖子以反映这一点。 – 2013-03-02 01:31:42