So, now finally my solution. I’m sorry, that I didn’t have more time to add the correct output.
This single threaded version now takes about 100s and 15GB of RAM on my computer, which is about twice as long as the equivalent Go solution.
{-# LANGUAGE StrictData #-}
{-# LANGUAGE ViewPatterns #-}
module Main (main) where
import Data.Array.IO qualified as A
import Data.ByteString.Char8 qualified as BS
import Data.Char (ord)
import Data.Foldable (traverse_)
import Data.HashMap.Strict qualified as HM
import Data.Word (Word64)
data TemperatureData = TemperatureData (A.IOUArray Word64 Word64) (A.IOUArray Word64 Int) (A.IOUArray Word64 Int) (A.IOUArray Word64 Int)
parse :: Word64 -> BS.ByteString -> (HM.HashMap BS.ByteString Word64, TemperatureData) -> IO (HM.HashMap BS.ByteString Word64, TemperatureData)
parse _ (BS.null -> True) acc = pure acc
parse idx content (accMap, TemperatureData countsT sumT minT maxT) = do
let (stationName, rest') = BS.break (== ';') content
let rest2 = BS.drop 1 rest'
let (temp, rest) = case BS.uncons rest2 of
Nothing -> (0, rest2)
Just ('-', rest2') -> case BS.uncons rest2' of
Nothing -> (0, rest2')
Just (ch1, rest3) -> case BS.uncons rest3 of
Nothing -> (0, rest3)
Just ('.', rest4) -> case BS.uncons rest4 of
Nothing -> (0, rest4)
Just (ch2, rest5) -> ((-1) * (10 * ord ch1 + ord ch2 - 11 * ord '0'), rest5)
Just (ch2, rest4) -> case BS.uncons rest4 of
Nothing -> (0, rest4)
-- Must be '.'
Just (_, rest5) -> case BS.uncons rest5 of
Nothing -> (0, rest5)
Just (ch3, rest6) -> ((-1) * (100 * ord ch1 + 10 * ord ch2 + ord ch3 - 111 * ord '0'), rest6)
Just (ch1, rest3) -> case BS.uncons rest3 of
Nothing -> (0, rest3)
Just ('.', rest4) -> case BS.uncons rest4 of
Nothing -> (0, rest4)
Just (ch2, rest5) -> (10 * ord ch1 + ord ch2 - 11 * ord '0', rest5)
Just (ch2, rest4) -> case BS.uncons rest4 of
Nothing -> (0, rest4)
-- Must be '.'
Just (_, rest5) -> case BS.uncons rest5 of
Nothing -> (0, rest5)
Just (ch3, rest6) -> (100 * ord ch1 + 10 * ord ch2 + ord ch3 - 111 * ord '0', rest6)
(newAcc, newIdx) <- case HM.lookup stationName accMap of
Nothing -> do
A.writeArray countsT idx 1
A.writeArray sumT idx temp
A.writeArray minT idx temp
A.writeArray maxT idx temp
pure ((HM.insert stationName idx accMap, TemperatureData countsT sumT minT maxT), idx + 1)
Just idxM -> do
A.modifyArray' countsT idxM (+ 1)
A.modifyArray' sumT idxM (+ temp)
A.modifyArray' minT idxM (min temp)
A.modifyArray' maxT idxM (max temp)
pure ((accMap, TemperatureData countsT sumT minT maxT), idx)
parse newIdx (BS.drop 1 rest) newAcc
main :: IO ()
main = do
content <- BS.readFile "../measurements_big.txt"
c <- A.newArray_ (0 :: Word64, 10000 :: Word64)
s <- A.newArray_ (0 :: Word64, 10000 :: Word64)
m <- A.newArray_ (0 :: Word64, 10000 :: Word64)
n <- A.newArray_ (0 :: Word64, 10000 :: Word64)
(ma, TemperatureData rC rS rM rN) <- parse 0 content (HM.empty, TemperatureData c s m n)
let keys = HM.keys ma
traverse_
( \k ->
do
let i = HM.findWithDefault 0 k ma
count <- A.readArray rC i
sum <- A.readArray rS i
tMin <- A.readArray rM i
tMax <- A.readArray rN i
print (k, i, count, sum, tMin, tMax)
)
keys