Neil Mitchell wrote:
Hi,

I am working on a Core -> Core optimising pass in Yhc Core, which then
spits out GHC Core at the back end. In response to Simon M's comment
that the lack of using GHC's IO Monad meant the results were
unreliable, I have now moved to integrating with GHC's notion of IO.
While the STG gets simpler, and should be quicker, the CMM does not
match this. The reason is that a heap check is in an inconvenient and
unnecessary place.

I have labelled these two version Old and New, Old runs faster by
about 10%. Both are at the bottom of this message.

The basic pattern is that the old version had a redundant case in the loop exit:

func x y = case ... of
                -1 -> case y of _ -> I# x
                _ -> f ( x +# 1# ) y

The new version lacks both the y, and the case on y. The only
allocation in the function is the I# in the loop exit. The new version
has floated this heap check to the top level which means there is one
heap check per iteration. The heap check could be pushed downwards,
which would boost performance.

Yes, in this case the heap check could be pushed in. I'll make a ticket for it. This optimisation can't be applied in general, however: a heap check can only occur at a safe point, which is the top of the basic block. So we can push the heapcheck in as long as we don't push it past any dangerous operations, like a side-effecting foreign call, for example (the call would be repeated after the GC had run).

The next issue that both exhibit is that there is a stack check on
every iteration. I suspect that if the stack check was omitted then
this code may end up producing the same assembly code as the C
version. The stack check may require a more deep analysis to
eliminate.

Yes...

Cheers,
        Simon


Thanks

Neil

----------------------------------------

Old Core:

Main.$sprelude_942_ll85 =
   \r [v162_s2MH sc_s2MI]
    case Main.$wccall GHC.Prim.realWorld# of wild_s2OD {
      (#,#) ds_s2OE ds1_s2MF ->
          case ds1_s2MF of wild1_s2OF {
        __DEFAULT ->
            case +# [sc_s2MI 1] of sat_s2MK {
              __DEFAULT -> Main.$sprelude_942_ll85 v162_s2MH sat_s2MK;
            };
        (-1) ->
            case v162_s2MH of tpl_s2OG { GHC.Base.I# a_s2OH -> GHC.Base.I#
[sc_s2MI]; };
          };
    };


Old CMM:

section "data" {
   Main_zdspreludezu942zull85_closure:
    const Main_zdspreludezu942zull85_info;
}

s2Pt_ret() {
    s2Pt_info {
        const 33;
        const 34;
    }
   c2PI:
    Hp = Hp + 8;
    if (Hp > HpLim) goto c2PM;
    I32[Hp + (-4)] = base_GHCziBase_Izh_con_info;
    I32[Hp + 0] = I32[Sp + 4];
    R1 = Hp + (-4);
    Sp = Sp + 8;
    jump (I32[Sp + 0]);
   c2PM:
    HpAlloc = 8;
    jump stg_gc_enter_1;
}

s2Pq_ret() {
    s2Pq_info {
        const 66;
        const 34;
    }
   c2PO:
    _s2Ps = R1;
    if (_s2Ps != (-1)) goto c2PQ;
    R1 = I32[Sp + 4];
    I32[Sp + 4] = s2Pt_info;
    Sp = Sp + 4;
    jump I32[R1];
   c2PQ:
    _s2MK = I32[Sp + 8] + 1;
    I32[Sp + 8] = _s2MK;
    Sp = Sp + 4;
    jump Main_zdspreludezu942zull85_info;
}

Main_zdspreludezu942zull85_entry() {
    Main_zdspreludezu942zull85_info {
        const 131083;
        const 0;
        const 15;
    }
   c2PS:
    if (Sp - 4 < SpLim) goto c2PU;
    I32[Sp + (-4)] = s2Pq_info;
    Sp = Sp + (-4);
    jump Main_zdwccall_info;
   c2PU:
    R1 = Main_zdspreludezu942zull85_closure;
    jump stg_gc_fun;
}


New Core:

Main.$sprelude_942_ll112 =
   \r [sc_s2cM]
    case Main.$wccall GHC.Prim.realWorld# of wild_s2e1 {
      (#,#) ds_s2e2 ds1_s2cK ->
          case ds1_s2cK of wild1_s2e3 {
        __DEFAULT ->
            case +# [sc_s2cM 1] of sat_s2cO {
              __DEFAULT -> Main.$sprelude_942_ll112 sat_s2cO;
            };
        (-1) -> GHC.Base.I# [sc_s2cM];
          };
    };


New CMM:

s2fF_ret() {
    s2fF_info {
        const 33;
        const 34;
    }
   c2fP:
    Hp = Hp + 8;
    if (Hp > HpLim) goto c2fS;
    _s2fH = R1;
    if (_s2fH != (-1)) goto c2fV;
    I32[Hp + (-4)] = base_GHCziBase_Izh_con_info;
    I32[Hp + 0] = I32[Sp + 4];
    R1 = Hp + (-4);
    Sp = Sp + 8;
    jump (I32[Sp + 0]);
   c2fS:
    HpAlloc = 8;
    R9 = 255;
    jump stg_gc_ut;
   c2fV:
    _s2cO = I32[Sp + 4] + 1;
    I32[Sp + 4] = _s2cO;
    Sp = Sp + 4;
    Hp = Hp + (-8);
    jump Main_zdspreludezu942zull112_info;
}

Main_zdspreludezu942zull112_entry() {
    Main_zdspreludezu942zull112_info {
        const 65540;
        const 0;
        const 15;
    }
   c2fX:
    if (Sp - 4 < SpLim) goto c2fZ;
    I32[Sp + (-4)] = s2fF_info;
    Sp = Sp + (-4);
    jump Main_zdwccall_info;
   c2fZ:
    R1 = Main_zdspreludezu942zull112_closure;
    jump stg_gc_fun;
}

_______________________________________________
Cvs-ghc mailing list
[email protected]
http://www.haskell.org/mailman/listinfo/cvs-ghc


_______________________________________________
Cvs-ghc mailing list
[email protected]
http://www.haskell.org/mailman/listinfo/cvs-ghc

Reply via email to