Issue 176349
Summary [RISC-V] Code size regression for Zilsd
Labels new issue
Assignees
Reporter christian-herber-nxp
    I am seeing a code size regression for a specific part of our SDK.
It is non trivial to extract a simple example, but I want to give it a start here. Hopefully we can guess the right root cause.

The offender is Clock_Ip_ClockPowerModeChangeNotification() which can be found here:
https://github.com/zephyrproject-rtos/hal_nxp/blob/9424596b87f119f5e4049855e9a73eb16efd2c80/s32/drivers/s32k1/Mcu/src/Clock_Ip_Specific.c#L1019

this is the diff I am seeing:

 Clock_Ip.c.o:     file format elf32-littleriscv
 ```diff
        00c51063                bne a0,a2,<Clock_Ip_ClockPowerModeChangeNotification+0x10>
        4509 li      a0,2
        00a59063                bne a1,a0,<Clock_Ip_ClockPowerModeChangeNotification+0x16>
-       00000437 lui     s0,0x0
-       00040413                mv      s0,s0
- 584c                    lw      a1,52(s0)
-       5c08 lw      a0,56(s0)
-       41cc                    lw      a1,4(a1)
- 00040023                sb      zero,0(s0) # <Clock_Ip_ClockPowerModeChangeNotification>
-       9582 jalr    a1
-       544c                    lw      a1,44(s0)
-       5808 lw      a0,48(s0)
-       498c                    lw a1,16(a1)
-       9582                    jalr    a1
-       544c lw      a1,44(s0)
-       5808                    lw a0,48(s0)
-       458c                    lw      a1,8(a1)
-       9582 jalr    a1
-       504c                    lw a1,36(s0)
-       5408                    lw      a0,40(s0)
-       45cc lw      a1,12(a1)
-       9582                    jalr a1
-       5048                    lw      a0,36(s0)
-       540c lw      a1,40(s0)
+       00000297                auipc   t0,0x0
+ 000282e7                jalr    t0,t0 # <Clock_Ip_ClockPowerModeChangeNotification+0x1a>
+       9602 jalr    a2
+       02c43503                ld      a0,44(s0)
+ 4910                    lw      a2,16(a0)
+       852e                    mv a0,a1
+       9602                    jalr    a2
+       02c43503 ld      a0,44(s0)
+       4510                    lw a2,8(a0)
+       852e                    mv      a0,a1
+       9602 jalr    a2
+       02443503                ld      a0,36(s0)
+ 4550                    lw      a2,12(a0)
+       852e mv      a0,a1
+       9602                    jalr    a2
+       02443503 ld      a0,36(s0)
        4510                    lw a2,8(a0)
        4188                    lw      a0,0(a1)
        9602 jalr    a2
-       446c                    lw a1,76(s0)
-       4828                    lw      a0,80(s0)
-       41cc lw      a1,4(a1)
-       9582                    jalr a1
-       486c                    lw      a1,84(s0)
-       4c28 lw      a0,88(s0)
-       418c                    lw a1,0(a1)
-       9582                    jalr    a1
-       4c6c lw      a1,92(s0)
-       5028                    lw a0,96(s0)
-       418c                    lw      a1,0(a1)
-       9582 jalr    a1
-       506c                    lw a1,100(s0)
-       5428                    lw      a0,104(s0)
-       a001 j       <Clock_Ip_ClockPowerModeChangeNotification+0x6c>
- e181                    bnez a1,<Clock_Ip_ClockPowerModeChangeNotification+0x6e>
+       04c43503 ld      a0,76(s0)
+       4150                    lw      a2,4(a0)
+ 852e                    mv      a0,a1
+       9602 jalr    a2
+       05443503                ld      a0,84(s0)
+       4110 lw      a2,0(a0)
+       852e                    mv a0,a1
+       9602                    jalr    a2
+       05c43503 ld      a0,92(s0)
+       4110                    lw      a2,0(a0)
+ 852e                    mv      a0,a1
+       9602 jalr    a2
+       06443503                ld      a0,100(s0)
+       411c lw      a5,0(a0)
+       852e                    mv a0,a1
+       a001                    j <Clock_Ip_ClockPowerModeChangeNotification+0x72>
+       e181 bnez    a1,<Clock_Ip_ClockPowerModeChangeNotification+0x74>
 00000437                lui     s0,0x0
        00040413                mv s0,s0
-       404c                    lw      a1,4(s0)
-       4408 lw      a0,8(s0)
-       41cc                    lw a1,4(a1)
-       4605                    li      a2,1
-       8810 sb      a2,0(s0)
-       9582                    jalr    a1
- 444c                    lw      a1,12(s0)
-       4808 lw      a0,16(s0)
-       418c                    lw      a1,0(a1)
- 9582                    jalr    a1
-       484c                    lw a1,20(s0)
-       4c08                    lw      a0,24(s0)
-       418c lw      a1,0(a1)
-       9582                    jalr a1
-       4c4c                    lw      a1,28(s0)
-       5008 lw      a0,32(s0)
-       418c                    lw a1,0(a1)
-       9582                    jalr    a1
-       5048 lw      a0,36(s0)
-       540c                    lw a1,40(s0)
+       00443503                ld      a0,4(s0) # <Clock_Ip_ClockPowerModeChangeNotification+0x4>
+       4150 lw      a2,4(a0)
+       4505                    li      a0,1
+ 8808                    sb      a0,0(s0)
+       852e                    mv a0,a1
+       9602                    jalr    a2
+       00c43503 ld      a0,12(s0)
+       4110                    lw a2,0(a0)
+       852e                    mv      a0,a1
+       9602 jalr    a2
+       01443503                ld      a0,20(s0)
+ 4110                    lw      a2,0(a0)
+       852e mv      a0,a1
+       9602                    jalr    a2
+       01c43503 ld      a0,28(s0)
+       4110                    lw a2,0(a0)
+       852e                    mv      a0,a1
+       9602 jalr    a2
+       02443503                ld      a0,36(s0)
 4910                    lw      a2,16(a0)
        4188 lw      a0,0(a1)
        9602                    jalr    a2
-       5448 lw      a0,44(s0)
-       580c                    lw a1,48(s0)
+       02c43503                ld      a0,44(s0)
        4550 lw      a2,12(a0)
        4188                    lw a0,0(a1)
        9602                    jalr    a2
-       5848 lw      a0,52(s0)
-       5c0c                    lw a1,56(s0)
+       03443503                ld      a0,52(s0)
        451c lw      a5,8(a0)
        4188                    lw a0,0(a1)
-       a001                    j <Clock_Ip_ClockPowerModeChangeNotification+0xb8>
+       a001 j       <Clock_Ip_ClockPowerModeChangeNotification+0xc6>
        4509 li      a0,2
-       00a59063                bne a1,a0,<Clock_Ip_ClockPowerModeChangeNotification+0xbc>
-       00000437 lui     s0,0x0
-       00040413                mv      s0,s0
- 584c                    lw      a1,52(s0)
-       5c08 lw      a0,56(s0)
-       41cc                    lw      a1,4(a1)
- 00040023                sb      zero,0(s0) # <Clock_Ip_ClockPowerModeChangeNotification>
-       9582 jalr    a1
-       544c                    lw      a1,44(s0)
-       5808 lw      a0,48(s0)
-       498c                    lw a1,16(a1)
-       9582                    jalr    a1
-       544c lw      a1,44(s0)
-       5808                    lw a0,48(s0)
-       458c                    lw      a1,8(a1)
-       9582 jalr    a1
-       504c                    lw a1,36(s0)
-       5408                    lw      a0,40(s0)
-       45cc lw      a1,12(a1)
-       9582                    jalr a1
-       5048                    lw      a0,36(s0)
-       540c lw      a1,40(s0)
+       00a59063                bne a1,a0,<Clock_Ip_ClockPowerModeChangeNotification+0xca>
+       00000297 auipc   t0,0x0
+       000282e7                jalr    t0,t0 # <Clock_Ip_ClockPowerModeChangeNotification+0xce>
+       9602 jalr    a2
+       02c43503                ld      a0,44(s0)
+ 4910                    lw      a2,16(a0)
+       852e                    mv a0,a1
+       9602                    jalr    a2
+       02c43503 ld      a0,44(s0)
+       4510                    lw a2,8(a0)
+       852e                    mv      a0,a1
+       9602 jalr    a2
+       02443503                ld      a0,36(s0)
+ 4550                    lw      a2,12(a0)
+       852e mv      a0,a1
+       9602                    jalr    a2
+       02443503 ld      a0,36(s0)
        4510                    lw a2,8(a0)
        4188                    lw      a0,0(a1)
        9602 jalr    a2
```

It seems that for good reasons, a lw, lw, lw sequence is replaced by ld, lw. However, an additional mv is introduced.
This is because the value in the higher address needs to go into a0 for the next call. 
Using the ld instruction reverses the mapping of values to registers, and thus creates the need to move a1 to a0 later on.
>From the code, I am not sure if there is a reason for the order in memory of the values we are loading with ld.

Hopefully, this information is enough to solve the root cause.

_______________________________________________
llvm-bugs mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-bugs

Reply via email to