@@ -25,6 +25,8 @@ module Cardano.Unlog.LogObject
2525 , logObjectStreamInterpreterKeys
2626 , LOBody (..)
2727 , LOAnyType (..)
28+ , readLogObjectStream
29+ , textRefEquals
2830 )
2931where
3032
@@ -36,6 +38,7 @@ import qualified Data.Aeson.Key as Aeson
3638import qualified Data.Aeson.KeyMap as KeyMap
3739import Data.Aeson.Types (Parser)
3840import qualified Data.ByteString.Lazy as LBS
41+ import Data.Hashable (hash)
3942import qualified Data.Map.Strict as Map
4043import qualified Data.Text as LText
4144import Data.Text.Short (ShortText, fromText, toText)
@@ -54,6 +57,54 @@ import Cardano.Util
5457
5558type Text = ShortText
5659
60+ -- | Us of the a TextRef replaces commonly expected string parses with references
61+ -- into a Map, reducing memory footprint - given that large runs can contain
62+ -- >25mio log objects.
63+ data TextRef
64+ = TextRef {-# UNPACK #-} !Int
65+ | TextLit {-# UNPACK #-} !Text
66+ deriving Generic
67+ deriving anyclass NFData
68+
69+ {-# NOINLINE lookupTextRef #-}
70+ lookupTextRef :: Int -> Text
71+ lookupTextRef ref = Map.findWithDefault Text.empty ref dict
72+ where
73+ dict = Map.fromList [(hash t, t) | t <- concat [allKeys, kinds, legacy]]
74+ kinds = map ("Cardano.Node." <>) allKeys
75+ legacy = map ("cardano.node." <>)
76+ [ "BlockFetchClient"
77+ , "BlockFetchServer"
78+ , "ChainDB"
79+ , "ChainSyncClient"
80+ , "ChainSyncHeaderServer"
81+ , "DnsSubscription"
82+ , "Forge"
83+ , "IpSubscription"
84+ , "LeadershipCheck"
85+ , "Mempool"
86+ , "resources"
87+ , "TxInbound"
88+ ]
89+ allKeys =
90+ concatMap Map.keys [fst3 interpreters, snd3 interpreters, thd3 interpreters]
91+ & filter (not . Text.null)
92+
93+ toTextRef :: Text -> TextRef
94+ toTextRef t = let h = hash t in if Text.null (lookupTextRef h) then TextLit t else TextRef h
95+
96+ textRefEquals :: TextRef -> Text -> Bool
97+ textRefEquals (TextRef i) = (== lookupTextRef i)
98+ textRefEquals (TextLit t) = (== t)
99+
100+ instance Show TextRef where
101+ show (TextRef i) = show $ lookupTextRef i
102+ show (TextLit t) = show t
103+
104+ instance ToJSON TextRef where
105+ toJSON (TextRef i) = toJSON $ lookupTextRef i
106+ toJSON (TextLit t) = toJSON t
107+
57108-- | Input data.
58109data HostLogs a
59110 = HostLogs
@@ -65,6 +116,8 @@ data HostLogs a
65116 , hlLogs :: (JsonLogfile, a)
66117 , hlFilteredSha256 :: Hash
67118 , hlProfile :: [ProfileEntry I]
119+ , hlRawFirstAt :: Maybe UTCTime
120+ , hlRawLastAt :: Maybe UTCTime
68121 }
69122 deriving (Generic)
70123
@@ -128,7 +181,7 @@ readLogObjectStream f okDErr loAnyLimit =
128181 fmap (\bs ->
129182 AE.eitherDecode bs &
130183 either
131- (LogObject zeroUTCTime "Cardano.Analysis.DecodeError" "DecodeError" "" (TId "0")
184+ (LogObject zeroUTCTime (TextLit "Cardano.Analysis.DecodeError") (TextLit "DecodeError") "" (TId "0")
132185 . LODecodeError (Text.fromByteString (LBS.toStrict bs)
133186 & fromMaybe "#<ERROR decoding input fromByteString>")
134187 . Text.fromText
@@ -143,8 +196,8 @@ readLogObjectStream f okDErr loAnyLimit =
143196data LogObject
144197 = LogObject
145198 { loAt :: !UTCTime
146- , loNS :: !Text
147- , loKind :: !Text
199+ , loNS :: !TextRef
200+ , loKind :: !TextRef
148201 , loHost :: !Host
149202 , loTid :: !TId
150203 , loBody :: !LOBody
@@ -348,6 +401,8 @@ interpreters = map3ple Map.fromList . unzip3 . fmap ent $
348401 map3ple :: (a -> b) -> (a,a,a) -> (b,b,b)
349402 map3ple f (x,y,z) = (f x, f y, f z)
350403
404+
405+
351406logObjectStreamInterpreterKeysLegacy, logObjectStreamInterpreterKeys :: [Text]
352407logObjectStreamInterpreterKeysLegacy =
353408 logObjectStreamInterpreterKeysLegacy1 <> logObjectStreamInterpreterKeysLegacy2
@@ -457,8 +512,8 @@ instance FromJSON LogObject where
457512 "The 'ns' field must be either a string, or a singleton-String vector, was: " <> show x
458513 LogObject
459514 <$> v .: "at"
460- <*> pure ns
461- <*> pure kind
515+ <*> pure (toTextRef ns)
516+ <*> pure (toTextRef kind)
462517 <*> v .: "host"
463518 <*> v .: "thread"
464519 <*> case Map.lookup ns (thd3 interpreters)
0 commit comments