Hello! tl;dr: text package's pack function is creating huge chunks of code everywhere.
Michael Snoyman and I have been trying to nail the performance problems of persistent's Template Haskell code -- GHC was taking a lot of memory and CPU time to compile these. What we found out is that the code size was getting increased 20-fold by the simplifier on phase 0 on GHC 7.0 (c.f. http://groups.google.com/group/yesodweb/msg/9f625fcf85575263). So, what was increasing in size? Consider this extremely simple module (attached as Bug.hs): module Bug (text) where import qualified Data.Text as T text :: T.Text text = T.pack "text" Until simplifier phase 0, the code size floats but tops at 12. Here's the core: Bug.text :: Data.Text.Internal.Text [LclIdX, Unf=Unf{Src=<vanilla>, TopLvl=True, Arity=0, Value=False, ConLike=False, Cheap=False, Expandable=False, Guidance=IF_ARGS [] 11 0}] Bug.text = Data.Text.Fusion.unstream (Data.Text.Fusion.Common.streamList @ GHC.Types.Char (GHC.Base.map @ GHC.Types.Char @ GHC.Types.Char Data.Text.Internal.safe (GHC.Base.unpackCString# "text"))) Which is straightforward. However, on simplifier phase 0 the code size jumps to 391 (!!), a 32-fold increase. I've attached the core (after.hs) since it's too large to copy here on the body. So it seems that the (unstream . streamList) pair above is getting inlined to a HUGE chunk of code (at least Data.Text.Array.new is getting inlined). Worse yet, this happens for every single pack that you use, even those packs hidden by OverloadedStrings! Does anyone have any ideas why GHC is inlining so much code everywhere? While everything I said here was tested on GHC 7.0, we have evidence that GHC 7.4 suffers from the same problem. We don't know about GHC 6.12, though. This seems to be a problem for everyone who uses text, which we hope is everyone using Haskell ;-). Cheers, -- Felipe.
==================== Simplifier SimplMode {Phase = 0 [main], inline, rules, eta-expand, case-of-case} max-iterations=4 iteration=2 ==================== Bug.text :: Data.Text.Internal.Text [LclIdX, Unf=Unf{Src=<vanilla>, TopLvl=True, Arity=0, Value=False, ConLike=False, Cheap=False, Expandable=False, Guidance=NEVER}] Bug.text = case GHC.Base.map @ GHC.Types.Char @ GHC.Types.Char Data.Text.Internal.safe (GHC.Base.unpackCString# "text") of tpl_arP { __DEFAULT -> letrec { a_stU :: forall s1_ao6. Data.Text.Array.MArray s1_ao6 -> GHC.Types.Int -> [GHC.Types.Char] -> GHC.Types.Int -> GHC.Prim.State# s1_ao6 -> (# GHC.Prim.State# s1_ao6, (Data.Text.Array.MArray s1_ao6, GHC.Types.Int) #) [LclId, Arity=5, Unf=Unf{Src=<vanilla>, TopLvl=False, Arity=5, Value=True, ConLike=True, Cheap=True, Expandable=True, Guidance=NEVER}] a_stU = \ (@ s1_ao6) (arr_ao7 :: Data.Text.Array.MArray s1_ao6) (top_ao8 :: GHC.Types.Int) (eta_B3 :: [GHC.Types.Char]) (eta_B2 :: GHC.Types.Int) (eta_B1 :: GHC.Prim.State# s1_ao6) -> letrec { a_stS :: [GHC.Types.Char] -> GHC.Types.Int -> GHC.Prim.State# s1_ao6 -> (# GHC.Prim.State# s1_ao6, (Data.Text.Array.MArray s1_ao6, GHC.Types.Int) #) [LclId, Arity=3, Unf=Unf{Src=<vanilla>, TopLvl=False, Arity=3, Value=True, ConLike=True, Cheap=True, Expandable=True, Guidance=NEVER}] a_stS = \ (s_aoa :: [GHC.Types.Char]) (i_aob :: GHC.Types.Int) (eta_Xf :: GHC.Prim.State# s1_ao6) -> case s_aoa of s1_aoI { __DEFAULT -> case i_aob of i1_aoJ { GHC.Types.I# ipv_aoL -> case s1_aoI of _ { [] -> (# eta_Xf, (arr_ao7, i1_aoJ) #); : x_arU xs_arV -> case x_arU of tpl1_arX { GHC.Types.C# ipv_stw -> case xs_arV of tpl2_arY { __DEFAULT -> let { a_ape :: GHC.Prim.State# GHC.Prim.RealWorld -> GHC.Prim.State# s1_ao6 -> (# GHC.Prim.State# s1_ao6, (Data.Text.Array.MArray s1_ao6, GHC.Types.Int) #) [LclId, Arity=2, Unf=Unf{Src=<vanilla>, TopLvl=False, Arity=2, Value=True, ConLike=True, Cheap=True, Expandable=True, Guidance=IF_ARGS [0 0] 38 12}] a_ape = \ _ (s2_apg :: GHC.Prim.State# s1_ao6) -> let { x1_aph :: GHC.Prim.Int# [LclId, Unf=Unf{Src=<vanilla>, TopLvl=False, Arity=0, Value=False, ConLike=False, Cheap=True, Expandable=True, Guidance=IF_ARGS [] 1 0}] x1_aph = GHC.Prim.ord# ipv_stw } in case GHC.Prim.<# x1_aph 65536 of _ { GHC.Bool.False -> case arr_ao7 of _ { Data.Text.Array.MArray ds2_apu -> let { x#_apt :: GHC.Prim.Int# [LclId, Unf=Unf{Src=<vanilla>, TopLvl=False, Arity=0, Value=False, ConLike=False, Cheap=True, Expandable=True, Guidance=IF_ARGS [] 1 0}] x#_apt = GHC.Prim.-# x1_aph 65536 } in case GHC.Prim.writeWord16Array# @ s1_ao6 ds2_apu ipv_aoL (GHC.Prim.narrow16Word# (GHC.Prim.int2Word# (GHC.Prim.+# (GHC.Prim.uncheckedIShiftRA# x#_apt 10) 55296))) s2_apg of s2#_apw { __DEFAULT -> case GHC.Prim.writeWord16Array# @ s1_ao6 ds2_apu (GHC.Prim.+# ipv_aoL 1) (GHC.Prim.narrow16Word# (GHC.Prim.int2Word# (GHC.Prim.+# (GHC.Prim.word2Int# (GHC.Prim.and# (GHC.Prim.int2Word# x#_apt) __word 1023)) 56320))) s2#_apw of s2#1_apx { __DEFAULT -> ((loop_ao9 tpl2_arY (GHC.Types.I# (GHC.Prim.+# ipv_aoL 2))) `cast` (GHC.ST.NTCo:ST s1_ao6 (Data.Text.Array.MArray s1_ao6, GHC.Types.Int) :: GHC.ST.ST s1_ao6 (Data.Text.Array.MArray s1_ao6, GHC.Types.Int) ~ GHC.ST.STRep s1_ao6 (Data.Text.Array.MArray s1_ao6, GHC.Types.Int))) s2#1_apx } } }; GHC.Bool.True -> case arr_ao7 of _ { Data.Text.Array.MArray ds2_apC -> case GHC.Prim.writeWord16Array# @ s1_ao6 ds2_apC ipv_aoL (GHC.Prim.narrow16Word# (GHC.Prim.int2Word# x1_aph)) s2_apg of s2#_apE { __DEFAULT -> ((loop_ao9 tpl2_arY (GHC.Types.I# (GHC.Prim.+# ipv_aoL 1))) `cast` (GHC.ST.NTCo:ST s1_ao6 (Data.Text.Array.MArray s1_ao6, GHC.Types.Int) :: GHC.ST.ST s1_ao6 (Data.Text.Array.MArray s1_ao6, GHC.Types.Int) ~ GHC.ST.STRep s1_ao6 (Data.Text.Array.MArray s1_ao6, GHC.Types.Int))) s2#_apE } } } } in let { a1_apF :: GHC.Prim.State# GHC.Prim.RealWorld -> GHC.Prim.State# s1_ao6 -> (# GHC.Prim.State# s1_ao6, (Data.Text.Array.MArray s1_ao6, GHC.Types.Int) #) [LclId, Arity=2, Unf=Unf{Src=<vanilla>, TopLvl=False, Arity=2, Value=True, ConLike=True, Cheap=True, Expandable=True, Guidance=NEVER}] a1_apF = \ _ (s2_apH :: GHC.Prim.State# s1_ao6) -> case top_ao8 of _ { GHC.Types.I# x1_apL -> let { x#_apK :: GHC.Prim.Int# [LclId, Unf=Unf{Src=<vanilla>, TopLvl=False, Arity=0, Value=False, ConLike=False, Cheap=True, Expandable=True, Guidance=IF_ARGS [] 2 0}] x#_apK = GHC.Prim.uncheckedIShiftL# (GHC.Prim.+# x1_apL 1) 1 } in let { a2_apN :: GHC.Prim.State# GHC.Prim.RealWorld -> GHC.ST.ST s1_ao6 (Data.Text.Array.MArray s1_ao6) [LclId, Arity=1, Unf=Unf{Src=<vanilla>, TopLvl=False, Arity=1, Value=True, ConLike=True, Cheap=True, Expandable=True, Guidance=IF_ARGS [0] 14 0}] a2_apN = \ _ -> GHC.Err.error @ (GHC.ST.ST s1_ao6 (Data.Text.Array.MArray s1_ao6)) (GHC.Base.unpackCString# "Data.Text.Array.new: size overflow") } in case GHC.Prim.<# x#_apK 0 of _ { GHC.Bool.False -> case GHC.Prim.word2Int# (GHC.Prim.and# (GHC.Prim.int2Word# x#_apK) __word 4611686018427387904) of _ { __DEFAULT -> (a2_apN GHC.Prim.realWorld#) `cast` (CoUnsafe (GHC.ST.ST s1_ao6 (Data.Text.Array.MArray s1_ao6)) (# GHC.Prim.State# s1_ao6, (Data.Text.Array.MArray s1_ao6, GHC.Types.Int) #) :: GHC.ST.ST s1_ao6 (Data.Text.Array.MArray s1_ao6) ~ (# GHC.Prim.State# s1_ao6, (Data.Text.Array.MArray s1_ao6, GHC.Types.Int) #)); 0 -> case GHC.Prim.newByteArray# @ s1_ao6 (GHC.Prim.uncheckedIShiftL# x#_apK 1) s2_apH of _ { (# s2#_apV, marr#_apW #) -> case GHC.Prim.<=# x1_apL 0 of _ { GHC.Bool.False -> case arr_ao7 of _ { Data.Text.Array.MArray ds1_aq5 -> case {__pkg_ccall text-0.11.1.13 _hs_text_memcpy forall s. GHC.Prim.MutableByteArray# s -> GHC.Prim.Word# -> GHC.Prim.MutableByteArray# s -> GHC.Prim.Word# -> GHC.Prim.Word# -> GHC.Prim.State# GHC.Prim.RealWorld -> (# GHC.Prim.State# GHC.Prim.RealWorld #)}_aq4 @ s1_ao6 marr#_apW __word 0 ds1_aq5 __word 0 (GHC.Prim.int2Word# x1_apL) (s2#_apV `cast` (GHC.Prim.State# (CoUnsafe s1_ao6 GHC.Prim.RealWorld) :: GHC.Prim.State# s1_ao6 ~ GHC.Prim.State# GHC.Prim.RealWorld)) of _ { (# ds12_aq9 #) -> ((outer_ajS @ s1_ao6 (Data.Text.Array.MArray @ s1_ao6 marr#_apW) (GHC.Types.I# x#_apK) s1_aoI i1_aoJ) `cast` (GHC.ST.NTCo:ST s1_ao6 (Data.Text.Array.MArray s1_ao6, GHC.Types.Int) :: GHC.ST.ST s1_ao6 (Data.Text.Array.MArray s1_ao6, GHC.Types.Int) ~ GHC.ST.STRep s1_ao6 (Data.Text.Array.MArray s1_ao6, GHC.Types.Int))) (ds12_aq9 `cast` (GHC.Prim.State# (CoUnsafe GHC.Prim.RealWorld s1_ao6) :: GHC.Prim.State# GHC.Prim.RealWorld ~ GHC.Prim.State# s1_ao6)) } }; GHC.Bool.True -> ((outer_ajS @ s1_ao6 (Data.Text.Array.MArray @ s1_ao6 marr#_apW) (GHC.Types.I# x#_apK) s1_aoI i1_aoJ) `cast` (GHC.ST.NTCo:ST s1_ao6 (Data.Text.Array.MArray s1_ao6, GHC.Types.Int) :: GHC.ST.ST s1_ao6 (Data.Text.Array.MArray s1_ao6, GHC.Types.Int) ~ GHC.ST.STRep s1_ao6 (Data.Text.Array.MArray s1_ao6, GHC.Types.Int))) s2#_apV } } }; GHC.Bool.True -> (a2_apN GHC.Prim.realWorld#) `cast` (CoUnsafe (GHC.ST.ST s1_ao6 (Data.Text.Array.MArray s1_ao6)) (# GHC.Prim.State# s1_ao6, (Data.Text.Array.MArray s1_ao6, GHC.Types.Int) #) :: GHC.ST.ST s1_ao6 (Data.Text.Array.MArray s1_ao6) ~ (# GHC.Prim.State# s1_ao6, (Data.Text.Array.MArray s1_ao6, GHC.Types.Int) #)) } } } in case GHC.Prim.<# (GHC.Prim.ord# ipv_stw) 65536 of _ { GHC.Bool.False -> case top_ao8 of _ { GHC.Types.I# y_aqk -> case GHC.Prim.>=# (GHC.Prim.+# ipv_aoL 1) y_aqk of _ { GHC.Bool.False -> a_ape GHC.Prim.realWorld# eta_Xf; GHC.Bool.True -> a1_apF GHC.Prim.realWorld# eta_Xf } }; GHC.Bool.True -> case top_ao8 of _ { GHC.Types.I# y_aqv -> case GHC.Prim.>=# ipv_aoL y_aqv of _ { GHC.Bool.False -> a_ape GHC.Prim.realWorld# eta_Xf; GHC.Bool.True -> a1_apF GHC.Prim.realWorld# eta_Xf } } } } } } } }; loop_ao9 [Occ=LoopBreaker] :: [GHC.Types.Char] -> GHC.Types.Int -> GHC.ST.ST s1_ao6 (Data.Text.Array.MArray s1_ao6, GHC.Types.Int) [LclId, Arity=3, Unf=Unf{Src=<vanilla>, TopLvl=False, Arity=0, Value=True, ConLike=True, Cheap=True, Expandable=True, Guidance=ALWAYS_IF(unsat_ok=True,boring_ok=True)}] loop_ao9 = a_stS `cast` ([GHC.Types.Char] -> GHC.Types.Int -> sym (GHC.ST.NTCo:ST s1_ao6 (Data.Text.Array.MArray s1_ao6, GHC.Types.Int)) :: ([GHC.Types.Char] -> GHC.Types.Int -> GHC.ST.STRep s1_ao6 (Data.Text.Array.MArray s1_ao6, GHC.Types.Int)) ~ ([GHC.Types.Char] -> GHC.Types.Int -> GHC.ST.ST s1_ao6 (Data.Text.Array.MArray s1_ao6, GHC.Types.Int))); } in ((loop_ao9 eta_B3 eta_B2) `cast` (GHC.ST.NTCo:ST s1_ao6 (Data.Text.Array.MArray s1_ao6, GHC.Types.Int) :: GHC.ST.ST s1_ao6 (Data.Text.Array.MArray s1_ao6, GHC.Types.Int) ~ GHC.ST.STRep s1_ao6 (Data.Text.Array.MArray s1_ao6, GHC.Types.Int))) eta_B1; outer_ajS [Occ=LoopBreaker] :: forall s1_ajT. Data.Text.Array.MArray s1_ajT -> GHC.Types.Int -> [GHC.Types.Char] -> GHC.Types.Int -> GHC.ST.ST s1_ajT (Data.Text.Array.MArray s1_ajT, GHC.Types.Int) [LclId, Arity=5, Unf=Unf{Src=<vanilla>, TopLvl=False, Arity=0, Value=True, ConLike=True, Cheap=True, Expandable=True, Guidance=ALWAYS_IF(unsat_ok=True,boring_ok=True)}] outer_ajS = a_stU `cast` (forall s1_ao6. Data.Text.Array.MArray s1_ao6 -> GHC.Types.Int -> [GHC.Types.Char] -> GHC.Types.Int -> sym (GHC.ST.NTCo:ST s1_ao6 (Data.Text.Array.MArray s1_ao6, GHC.Types.Int)) :: (forall s1_ao6. Data.Text.Array.MArray s1_ao6 -> GHC.Types.Int -> [GHC.Types.Char] -> GHC.Types.Int -> GHC.ST.STRep s1_ao6 (Data.Text.Array.MArray s1_ao6, GHC.Types.Int)) ~ (forall s1_ao6. Data.Text.Array.MArray s1_ao6 -> GHC.Types.Int -> [GHC.Types.Char] -> GHC.Types.Int -> GHC.ST.ST s1_ao6 (Data.Text.Array.MArray s1_ao6, GHC.Types.Int))); } in case GHC.ST.runSTRep @ (Data.Text.Array.Array, GHC.Types.Int) (\ (@ s1_aqE) (s_aqF [Lbv=OneShot] :: GHC.Prim.State# s1_aqE) -> let { a_aqJ :: GHC.Prim.State# GHC.Prim.RealWorld -> GHC.ST.ST s1_aqE (Data.Text.Array.MArray s1_aqE) [LclId, Arity=1, Unf=Unf{Src=<vanilla>, TopLvl=False, Arity=1, Value=True, ConLike=True, Cheap=True, Expandable=True, Guidance=IF_ARGS [0] 14 0}] a_aqJ = \ _ -> GHC.Err.error @ (GHC.ST.ST s1_aqE (Data.Text.Array.MArray s1_aqE)) (GHC.Base.unpackCString# "Data.Text.Array.new: size overflow") } in case GHC.Prim.newByteArray# @ s1_aqE 8 s_aqF of _ { (# s2#_aqR, marr#_aqS #) -> case ((outer_ajS @ s1_aqE (Data.Text.Array.MArray @ s1_aqE marr#_aqS) (GHC.Types.I# 4) tpl_arP (GHC.Types.I# 0)) `cast` (GHC.ST.NTCo:ST s1_aqE (Data.Text.Array.MArray s1_aqE, GHC.Types.Int) :: GHC.ST.ST s1_aqE (Data.Text.Array.MArray s1_aqE, GHC.Types.Int) ~ GHC.ST.STRep s1_aqE (Data.Text.Array.MArray s1_aqE, GHC.Types.Int))) s2#_aqR of _ { (# new_s_ar4, r_ar5 #) -> case r_ar5 of _ { (marr_ar9, b_ara) -> case marr_ar9 of _ { Data.Text.Array.MArray ds1_are -> (# new_s_ar4, (Data.Text.Array.Array (ds1_are `cast` (CoUnsafe (GHC.Prim.MutableByteArray# s1_aqE) GHC.Prim.ByteArray# :: GHC.Prim.MutableByteArray# s1_aqE ~ GHC.Prim.ByteArray#)), b_ara) #) } } } }) of _ { (ds1_arB, y_arC) -> case y_arC of _ { GHC.Types.I# x_arG -> case x_arG of wild3_arI { __DEFAULT -> case ds1_arB of _ { Data.Text.Array.Array tpl1_arL -> Data.Text.Internal.Text tpl1_arL 0 wild3_arI }; 0 -> Data.Text.Internal.empty } } } }
module Bug (text) where import qualified Data.Text as T text :: T.Text text = T.pack "text"
_______________________________________________ Haskell-Cafe mailing list Haskell-Cafe@haskell.org http://www.haskell.org/mailman/listinfo/haskell-cafe