Hello!

tl;dr: text package's pack function is creating huge chunks of code everywhere.


Michael Snoyman and I have been trying to nail the performance
problems of persistent's Template Haskell code -- GHC was taking a lot
of memory and CPU time to compile these.  What we found out is that
the code size was getting increased 20-fold by the simplifier on phase
0 on GHC 7.0 (c.f.
http://groups.google.com/group/yesodweb/msg/9f625fcf85575263).  So,
what was increasing in size?

Consider this extremely simple module (attached as Bug.hs):

  module Bug (text) where
  import qualified Data.Text as T

  text :: T.Text
  text = T.pack "text"

Until simplifier phase 0, the code size floats but tops at 12.  Here's the core:

Bug.text :: Data.Text.Internal.Text
[LclIdX,
 Unf=Unf{Src=<vanilla>, TopLvl=True, Arity=0, Value=False,
         ConLike=False, Cheap=False, Expandable=False,
         Guidance=IF_ARGS [] 11 0}]
Bug.text =
  Data.Text.Fusion.unstream
    (Data.Text.Fusion.Common.streamList
       @ GHC.Types.Char
       (GHC.Base.map
          @ GHC.Types.Char
          @ GHC.Types.Char
          Data.Text.Internal.safe
          (GHC.Base.unpackCString# "text")))

Which is straightforward.  However, on simplifier phase 0 the code
size jumps to 391 (!!), a 32-fold increase.  I've attached the core
(after.hs) since it's too large to copy here on the body.  So it seems
that the (unstream . streamList) pair above is getting inlined to a
HUGE chunk of code (at least Data.Text.Array.new is getting inlined).
Worse yet, this happens for every single pack that you use, even those
packs hidden by OverloadedStrings!

Does anyone have any ideas why GHC is inlining so much code
everywhere?  While everything I said here was tested on GHC 7.0, we
have evidence that GHC 7.4 suffers from the same problem.  We don't
know about GHC 6.12, though.  This seems to be a problem for everyone
who uses text, which we hope is everyone using Haskell ;-).

Cheers,

-- 
Felipe.
==================== Simplifier SimplMode {Phase = 0 [main],
                      inline,
                      rules,
                      eta-expand,
                      case-of-case} max-iterations=4 iteration=2 ====================
Bug.text :: Data.Text.Internal.Text
[LclIdX,
 Unf=Unf{Src=<vanilla>, TopLvl=True, Arity=0, Value=False,
         ConLike=False, Cheap=False, Expandable=False, Guidance=NEVER}]
Bug.text =
  case GHC.Base.map
         @ GHC.Types.Char
         @ GHC.Types.Char
         Data.Text.Internal.safe
         (GHC.Base.unpackCString# "text")
  of tpl_arP { __DEFAULT ->
  letrec {
    a_stU
      :: forall s1_ao6.
         Data.Text.Array.MArray s1_ao6
         -> GHC.Types.Int
         -> [GHC.Types.Char]
         -> GHC.Types.Int
         -> GHC.Prim.State# s1_ao6
         -> (# GHC.Prim.State# s1_ao6,
               (Data.Text.Array.MArray s1_ao6, GHC.Types.Int) #)
    [LclId,
     Arity=5,
     Unf=Unf{Src=<vanilla>, TopLvl=False, Arity=5, Value=True,
             ConLike=True, Cheap=True, Expandable=True, Guidance=NEVER}]
    a_stU =
      \ (@ s1_ao6)
        (arr_ao7 :: Data.Text.Array.MArray s1_ao6)
        (top_ao8 :: GHC.Types.Int)
        (eta_B3 :: [GHC.Types.Char])
        (eta_B2 :: GHC.Types.Int)
        (eta_B1 :: GHC.Prim.State# s1_ao6) ->
        letrec {
          a_stS
            :: [GHC.Types.Char]
               -> GHC.Types.Int
               -> GHC.Prim.State# s1_ao6
               -> (# GHC.Prim.State# s1_ao6,
                     (Data.Text.Array.MArray s1_ao6, GHC.Types.Int) #)
          [LclId,
           Arity=3,
           Unf=Unf{Src=<vanilla>, TopLvl=False, Arity=3, Value=True,
                   ConLike=True, Cheap=True, Expandable=True, Guidance=NEVER}]
          a_stS =
            \ (s_aoa :: [GHC.Types.Char])
              (i_aob :: GHC.Types.Int)
              (eta_Xf :: GHC.Prim.State# s1_ao6) ->
              case s_aoa of s1_aoI { __DEFAULT ->
              case i_aob of i1_aoJ { GHC.Types.I# ipv_aoL ->
              case s1_aoI of _ {
                [] -> (# eta_Xf, (arr_ao7, i1_aoJ) #);
                : x_arU xs_arV ->
                  case x_arU of tpl1_arX { GHC.Types.C# ipv_stw ->
                  case xs_arV of tpl2_arY { __DEFAULT ->
                  let {
                    a_ape
                      :: GHC.Prim.State# GHC.Prim.RealWorld
                         -> GHC.Prim.State# s1_ao6
                         -> (# GHC.Prim.State# s1_ao6,
                               (Data.Text.Array.MArray s1_ao6, GHC.Types.Int) #)
                    [LclId,
                     Arity=2,
                     Unf=Unf{Src=<vanilla>, TopLvl=False, Arity=2, Value=True,
                             ConLike=True, Cheap=True, Expandable=True,
                             Guidance=IF_ARGS [0 0] 38 12}]
                    a_ape =
                      \ _ (s2_apg :: GHC.Prim.State# s1_ao6) ->
                        let {
                          x1_aph :: GHC.Prim.Int#
                          [LclId,
                           Unf=Unf{Src=<vanilla>, TopLvl=False, Arity=0, Value=False,
                                   ConLike=False, Cheap=True, Expandable=True,
                                   Guidance=IF_ARGS [] 1 0}]
                          x1_aph = GHC.Prim.ord# ipv_stw } in
                        case GHC.Prim.<# x1_aph 65536 of _ {
                          GHC.Bool.False ->
                            case arr_ao7 of _ { Data.Text.Array.MArray ds2_apu ->
                            let {
                              x#_apt :: GHC.Prim.Int#
                              [LclId,
                               Unf=Unf{Src=<vanilla>, TopLvl=False, Arity=0, Value=False,
                                       ConLike=False, Cheap=True, Expandable=True,
                                       Guidance=IF_ARGS [] 1 0}]
                              x#_apt = GHC.Prim.-# x1_aph 65536 } in
                            case GHC.Prim.writeWord16Array#
                                   @ s1_ao6
                                   ds2_apu
                                   ipv_aoL
                                   (GHC.Prim.narrow16Word#
                                      (GHC.Prim.int2Word#
                                         (GHC.Prim.+#
                                            (GHC.Prim.uncheckedIShiftRA# x#_apt 10) 55296)))
                                   s2_apg
                            of s2#_apw { __DEFAULT ->
                            case GHC.Prim.writeWord16Array#
                                   @ s1_ao6
                                   ds2_apu
                                   (GHC.Prim.+# ipv_aoL 1)
                                   (GHC.Prim.narrow16Word#
                                      (GHC.Prim.int2Word#
                                         (GHC.Prim.+#
                                            (GHC.Prim.word2Int#
                                               (GHC.Prim.and#
                                                  (GHC.Prim.int2Word# x#_apt) __word 1023))
                                            56320)))
                                   s2#_apw
                            of s2#1_apx { __DEFAULT ->
                            ((loop_ao9 tpl2_arY (GHC.Types.I# (GHC.Prim.+# ipv_aoL 2)))
                             `cast` (GHC.ST.NTCo:ST
                                       s1_ao6 (Data.Text.Array.MArray s1_ao6, GHC.Types.Int)
                                     :: GHC.ST.ST
                                          s1_ao6 (Data.Text.Array.MArray s1_ao6, GHC.Types.Int)
                                          ~
                                        GHC.ST.STRep
                                          s1_ao6 (Data.Text.Array.MArray s1_ao6, GHC.Types.Int)))
                              s2#1_apx
                            }
                            }
                            };
                          GHC.Bool.True ->
                            case arr_ao7 of _ { Data.Text.Array.MArray ds2_apC ->
                            case GHC.Prim.writeWord16Array#
                                   @ s1_ao6
                                   ds2_apC
                                   ipv_aoL
                                   (GHC.Prim.narrow16Word# (GHC.Prim.int2Word# x1_aph))
                                   s2_apg
                            of s2#_apE { __DEFAULT ->
                            ((loop_ao9 tpl2_arY (GHC.Types.I# (GHC.Prim.+# ipv_aoL 1)))
                             `cast` (GHC.ST.NTCo:ST
                                       s1_ao6 (Data.Text.Array.MArray s1_ao6, GHC.Types.Int)
                                     :: GHC.ST.ST
                                          s1_ao6 (Data.Text.Array.MArray s1_ao6, GHC.Types.Int)
                                          ~
                                        GHC.ST.STRep
                                          s1_ao6 (Data.Text.Array.MArray s1_ao6, GHC.Types.Int)))
                              s2#_apE
                            }
                            }
                        } } in
                  let {
                    a1_apF
                      :: GHC.Prim.State# GHC.Prim.RealWorld
                         -> GHC.Prim.State# s1_ao6
                         -> (# GHC.Prim.State# s1_ao6,
                               (Data.Text.Array.MArray s1_ao6, GHC.Types.Int) #)
                    [LclId,
                     Arity=2,
                     Unf=Unf{Src=<vanilla>, TopLvl=False, Arity=2, Value=True,
                             ConLike=True, Cheap=True, Expandable=True, Guidance=NEVER}]
                    a1_apF =
                      \ _ (s2_apH :: GHC.Prim.State# s1_ao6) ->
                        case top_ao8 of _ { GHC.Types.I# x1_apL ->
                        let {
                          x#_apK :: GHC.Prim.Int#
                          [LclId,
                           Unf=Unf{Src=<vanilla>, TopLvl=False, Arity=0, Value=False,
                                   ConLike=False, Cheap=True, Expandable=True,
                                   Guidance=IF_ARGS [] 2 0}]
                          x#_apK = GHC.Prim.uncheckedIShiftL# (GHC.Prim.+# x1_apL 1) 1 } in
                        let {
                          a2_apN
                            :: GHC.Prim.State# GHC.Prim.RealWorld
                               -> GHC.ST.ST s1_ao6 (Data.Text.Array.MArray s1_ao6)
                          [LclId,
                           Arity=1,
                           Unf=Unf{Src=<vanilla>, TopLvl=False, Arity=1, Value=True,
                                   ConLike=True, Cheap=True, Expandable=True,
                                   Guidance=IF_ARGS [0] 14 0}]
                          a2_apN =
                            \ _ ->
                              GHC.Err.error
                                @ (GHC.ST.ST s1_ao6 (Data.Text.Array.MArray s1_ao6))
                                (GHC.Base.unpackCString# "Data.Text.Array.new: size overflow") } in
                        case GHC.Prim.<# x#_apK 0 of _ {
                          GHC.Bool.False ->
                            case GHC.Prim.word2Int#
                                   (GHC.Prim.and#
                                      (GHC.Prim.int2Word# x#_apK) __word 4611686018427387904)
                            of _ {
                              __DEFAULT ->
                                (a2_apN GHC.Prim.realWorld#)
                                `cast` (CoUnsafe
                                          (GHC.ST.ST s1_ao6 (Data.Text.Array.MArray s1_ao6))
                                          (# GHC.Prim.State# s1_ao6,
                                             (Data.Text.Array.MArray s1_ao6, GHC.Types.Int) #)
                                        :: GHC.ST.ST s1_ao6 (Data.Text.Array.MArray s1_ao6)
                                             ~
                                           (# GHC.Prim.State# s1_ao6,
                                              (Data.Text.Array.MArray s1_ao6, GHC.Types.Int) #));
                              0 ->
                                case GHC.Prim.newByteArray#
                                       @ s1_ao6 (GHC.Prim.uncheckedIShiftL# x#_apK 1) s2_apH
                                of _ { (# s2#_apV, marr#_apW #) ->
                                case GHC.Prim.<=# x1_apL 0 of _ {
                                  GHC.Bool.False ->
                                    case arr_ao7 of _ { Data.Text.Array.MArray ds1_aq5 ->
                                    case {__pkg_ccall text-0.11.1.13 _hs_text_memcpy forall s.
                                            GHC.Prim.MutableByteArray# s
                                            -> GHC.Prim.Word#
                                            -> GHC.Prim.MutableByteArray# s
                                            -> GHC.Prim.Word#
                                            -> GHC.Prim.Word#
                                            -> GHC.Prim.State# GHC.Prim.RealWorld
                                            -> (# GHC.Prim.State# GHC.Prim.RealWorld #)}_aq4
                                           @ s1_ao6
                                           marr#_apW
                                           __word 0
                                           ds1_aq5
                                           __word 0
                                           (GHC.Prim.int2Word# x1_apL)
                                           (s2#_apV
                                            `cast` (GHC.Prim.State#
                                                      (CoUnsafe s1_ao6 GHC.Prim.RealWorld)
                                                    :: GHC.Prim.State# s1_ao6
                                                         ~
                                                       GHC.Prim.State# GHC.Prim.RealWorld))
                                    of _ { (# ds12_aq9 #) ->
                                    ((outer_ajS
                                        @ s1_ao6
                                        (Data.Text.Array.MArray @ s1_ao6 marr#_apW)
                                        (GHC.Types.I# x#_apK)
                                        s1_aoI
                                        i1_aoJ)
                                     `cast` (GHC.ST.NTCo:ST
                                               s1_ao6 (Data.Text.Array.MArray s1_ao6, GHC.Types.Int)
                                             :: GHC.ST.ST
                                                  s1_ao6
                                                  (Data.Text.Array.MArray s1_ao6, GHC.Types.Int)
                                                  ~
                                                GHC.ST.STRep
                                                  s1_ao6
                                                  (Data.Text.Array.MArray s1_ao6, GHC.Types.Int)))
                                      (ds12_aq9
                                       `cast` (GHC.Prim.State# (CoUnsafe GHC.Prim.RealWorld s1_ao6)
                                               :: GHC.Prim.State# GHC.Prim.RealWorld
                                                    ~
                                                  GHC.Prim.State# s1_ao6))
                                    }
                                    };
                                  GHC.Bool.True ->
                                    ((outer_ajS
                                        @ s1_ao6
                                        (Data.Text.Array.MArray @ s1_ao6 marr#_apW)
                                        (GHC.Types.I# x#_apK)
                                        s1_aoI
                                        i1_aoJ)
                                     `cast` (GHC.ST.NTCo:ST
                                               s1_ao6 (Data.Text.Array.MArray s1_ao6, GHC.Types.Int)
                                             :: GHC.ST.ST
                                                  s1_ao6
                                                  (Data.Text.Array.MArray s1_ao6, GHC.Types.Int)
                                                  ~
                                                GHC.ST.STRep
                                                  s1_ao6
                                                  (Data.Text.Array.MArray s1_ao6, GHC.Types.Int)))
                                      s2#_apV
                                }
                                }
                            };
                          GHC.Bool.True ->
                            (a2_apN GHC.Prim.realWorld#)
                            `cast` (CoUnsafe
                                      (GHC.ST.ST s1_ao6 (Data.Text.Array.MArray s1_ao6))
                                      (# GHC.Prim.State# s1_ao6,
                                         (Data.Text.Array.MArray s1_ao6, GHC.Types.Int) #)
                                    :: GHC.ST.ST s1_ao6 (Data.Text.Array.MArray s1_ao6)
                                         ~
                                       (# GHC.Prim.State# s1_ao6,
                                          (Data.Text.Array.MArray s1_ao6, GHC.Types.Int) #))
                        }
                        } } in
                  case GHC.Prim.<# (GHC.Prim.ord# ipv_stw) 65536 of _ {
                    GHC.Bool.False ->
                      case top_ao8 of _ { GHC.Types.I# y_aqk ->
                      case GHC.Prim.>=# (GHC.Prim.+# ipv_aoL 1) y_aqk of _ {
                        GHC.Bool.False -> a_ape GHC.Prim.realWorld# eta_Xf;
                        GHC.Bool.True -> a1_apF GHC.Prim.realWorld# eta_Xf
                      }
                      };
                    GHC.Bool.True ->
                      case top_ao8 of _ { GHC.Types.I# y_aqv ->
                      case GHC.Prim.>=# ipv_aoL y_aqv of _ {
                        GHC.Bool.False -> a_ape GHC.Prim.realWorld# eta_Xf;
                        GHC.Bool.True -> a1_apF GHC.Prim.realWorld# eta_Xf
                      }
                      }
                  }
                  }
                  }
              }
              }
              };
          loop_ao9 [Occ=LoopBreaker]
            :: [GHC.Types.Char]
               -> GHC.Types.Int
               -> GHC.ST.ST s1_ao6 (Data.Text.Array.MArray s1_ao6, GHC.Types.Int)
          [LclId,
           Arity=3,
           Unf=Unf{Src=<vanilla>, TopLvl=False, Arity=0, Value=True,
                   ConLike=True, Cheap=True, Expandable=True,
                   Guidance=ALWAYS_IF(unsat_ok=True,boring_ok=True)}]
          loop_ao9 =
            a_stS
            `cast` ([GHC.Types.Char]
                    -> GHC.Types.Int
                    -> sym
                         (GHC.ST.NTCo:ST
                            s1_ao6 (Data.Text.Array.MArray s1_ao6, GHC.Types.Int))
                    :: ([GHC.Types.Char]
                        -> GHC.Types.Int
                        -> GHC.ST.STRep
                             s1_ao6 (Data.Text.Array.MArray s1_ao6, GHC.Types.Int))
                         ~
                       ([GHC.Types.Char]
                        -> GHC.Types.Int
                        -> GHC.ST.ST
                             s1_ao6 (Data.Text.Array.MArray s1_ao6, GHC.Types.Int))); } in
        ((loop_ao9 eta_B3 eta_B2)
         `cast` (GHC.ST.NTCo:ST
                   s1_ao6 (Data.Text.Array.MArray s1_ao6, GHC.Types.Int)
                 :: GHC.ST.ST s1_ao6 (Data.Text.Array.MArray s1_ao6, GHC.Types.Int)
                      ~
                    GHC.ST.STRep
                      s1_ao6 (Data.Text.Array.MArray s1_ao6, GHC.Types.Int)))
          eta_B1;
    outer_ajS [Occ=LoopBreaker]
      :: forall s1_ajT.
         Data.Text.Array.MArray s1_ajT
         -> GHC.Types.Int
         -> [GHC.Types.Char]
         -> GHC.Types.Int
         -> GHC.ST.ST s1_ajT (Data.Text.Array.MArray s1_ajT, GHC.Types.Int)
    [LclId,
     Arity=5,
     Unf=Unf{Src=<vanilla>, TopLvl=False, Arity=0, Value=True,
             ConLike=True, Cheap=True, Expandable=True,
             Guidance=ALWAYS_IF(unsat_ok=True,boring_ok=True)}]
    outer_ajS =
      a_stU
      `cast` (forall s1_ao6.
              Data.Text.Array.MArray s1_ao6
              -> GHC.Types.Int
              -> [GHC.Types.Char]
              -> GHC.Types.Int
              -> sym
                   (GHC.ST.NTCo:ST
                      s1_ao6 (Data.Text.Array.MArray s1_ao6, GHC.Types.Int))
              :: (forall s1_ao6.
                  Data.Text.Array.MArray s1_ao6
                  -> GHC.Types.Int
                  -> [GHC.Types.Char]
                  -> GHC.Types.Int
                  -> GHC.ST.STRep
                       s1_ao6 (Data.Text.Array.MArray s1_ao6, GHC.Types.Int))
                   ~
                 (forall s1_ao6.
                  Data.Text.Array.MArray s1_ao6
                  -> GHC.Types.Int
                  -> [GHC.Types.Char]
                  -> GHC.Types.Int
                  -> GHC.ST.ST
                       s1_ao6 (Data.Text.Array.MArray s1_ao6, GHC.Types.Int))); } in
  case GHC.ST.runSTRep
         @ (Data.Text.Array.Array, GHC.Types.Int)
         (\ (@ s1_aqE) (s_aqF [Lbv=OneShot] :: GHC.Prim.State# s1_aqE) ->
            let {
              a_aqJ
                :: GHC.Prim.State# GHC.Prim.RealWorld
                   -> GHC.ST.ST s1_aqE (Data.Text.Array.MArray s1_aqE)
              [LclId,
               Arity=1,
               Unf=Unf{Src=<vanilla>, TopLvl=False, Arity=1, Value=True,
                       ConLike=True, Cheap=True, Expandable=True,
                       Guidance=IF_ARGS [0] 14 0}]
              a_aqJ =
                \ _ ->
                  GHC.Err.error
                    @ (GHC.ST.ST s1_aqE (Data.Text.Array.MArray s1_aqE))
                    (GHC.Base.unpackCString# "Data.Text.Array.new: size overflow") } in
            case GHC.Prim.newByteArray# @ s1_aqE 8 s_aqF
            of _ { (# s2#_aqR, marr#_aqS #) ->
            case ((outer_ajS
                     @ s1_aqE
                     (Data.Text.Array.MArray @ s1_aqE marr#_aqS)
                     (GHC.Types.I# 4)
                     tpl_arP
                     (GHC.Types.I# 0))
                  `cast` (GHC.ST.NTCo:ST
                            s1_aqE (Data.Text.Array.MArray s1_aqE, GHC.Types.Int)
                          :: GHC.ST.ST s1_aqE (Data.Text.Array.MArray s1_aqE, GHC.Types.Int)
                               ~
                             GHC.ST.STRep
                               s1_aqE (Data.Text.Array.MArray s1_aqE, GHC.Types.Int)))
                   s2#_aqR
            of _ { (# new_s_ar4, r_ar5 #) ->
            case r_ar5 of _ { (marr_ar9, b_ara) ->
            case marr_ar9 of _ { Data.Text.Array.MArray ds1_are ->
            (# new_s_ar4,
               (Data.Text.Array.Array
                  (ds1_are
                   `cast` (CoUnsafe
                             (GHC.Prim.MutableByteArray# s1_aqE) GHC.Prim.ByteArray#
                           :: GHC.Prim.MutableByteArray# s1_aqE ~ GHC.Prim.ByteArray#)),
                b_ara) #)
            }
            }
            }
            })
  of _ { (ds1_arB, y_arC) ->
  case y_arC of _ { GHC.Types.I# x_arG ->
  case x_arG of wild3_arI {
    __DEFAULT ->
      case ds1_arB of _ { Data.Text.Array.Array tpl1_arL ->
      Data.Text.Internal.Text tpl1_arL 0 wild3_arI
      };
    0 -> Data.Text.Internal.empty
  }
  }
  }
  }
module Bug (text) where

import qualified Data.Text as T

text :: T.Text
text = T.pack "text"
_______________________________________________
Haskell-Cafe mailing list
Haskell-Cafe@haskell.org
http://www.haskell.org/mailman/listinfo/haskell-cafe

Reply via email to