Issue 164153
Summary [RISCV] Stack corruption with indirect calls when all arguments fit in registers
Labels new issue
Assignees
Reporter exeex
    ## Summary

LLVM RISC-V backend incorrectly allocates spill slots at `sp+0` when a function contains indirect calls but all call arguments fit in registers. This causes stack corruption as the values are clobbered during function calls.

## Environment

- **LLVM Version**: 18.1.8 (also reproduced on newer version)
- **Target**: `riscv32-unknown-elf`
- **Compile Flags**: `-O2 -target riscv32 -mcpu=generic -mabi=ilp32`

## Root Cause

In `RISCVISelLowering::LowerCall` (line 18568), when all call arguments fit in registers:

```cpp
unsigned NumBytes = ArgCCInfo.getStackSize();  // Returns 0
// ...
Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL);  // NumBytes = 0!
```

This causes:
1. `CALLSEQ_START` with size 0
2. `PrologEpilogInserter::calculateCallFrameInfo` computes `MaxCallFrameSize = 0`
3. Stack slot allocator places spill slots at offset -4, -8, -12 (relative to frame base)
4. These translate to `sp+0`, `sp+4`, `sp+8` in the final code
5. **Values stored at sp+0 get overwritten during subsequent calls**


## Reproducer

<details>
<summary>Click to expand: aggressive_test.c</summary>

```c
/*
 * Aggressive Minimal Reproducer for RISC-V Stack Slot Bug
 *
 * This version tries harder to trigger sp+0 allocation by:
 * 1. Creating more register pressure
 * 2. Having more live variables across calls
 * 3. Using more complex control flow
 */

#include <stdio.h>
#include <stdint.h>

typedef int (*func_t)(int, int, int, int, int, int, int, int, int, int, int, int);

int f1(int a0, int a1, int a2, int a3, int a4, int a5,
       int a6, int a7, int a8, int a9, int a10, int a11) {
    return a0 + a1 + a2 + a3 + a4 + a5 + a6 + a7 + a8 + a9 + a10 + a11;
}

int f2(int a0, int a1, int a2, int a3, int a4, int a5,
       int a6, int a7, int a8, int a9, int a10, int a11) {
    int part1 = (a0 ^ a1) + (a2 ^ a3) + (a4 ^ a5);
    int part2 = (a6 & a7) + (a8 | a9) + (a10 - a11);
    return part1 + part2;
}

typedef struct {
    func_t funcs[2];
} ftable_t;

static inline volatile int *get_stack_top_slot(void) {
#if defined(__riscv)
    volatile int *ptr;
    __asm__ volatile("addi %0, sp, 0" : "=r"(ptr));
    return ptr;
#elif defined(__x86_64__)
    volatile int *ptr;
    __asm__ volatile("lea 0(%%rsp), %0" : "=r"(ptr));
    return ptr;
#else
    return (volatile int *)__builtin_frame_address(0);
#endif
}

ftable_t g_table = { .funcs = {f1, f2} };

// Force more aggressive optimization by:
// - Using many local variables
// - Making them volatile to prevent optimization
// - Having complex dependencies
__attribute__((noinline))
int trigger_bug(ftable_t *t, int x, int y, int z, int important) {
    // Create lots of register pressure
    volatile int r0 = x;
    volatile int r1 = y;
    volatile int r2 = z;
    volatile int r3 = x + y;
    volatile int r4 = x * y;
    volatile int r5 = y - z;
    volatile int r6 = z ^ x;
    volatile int r7 = x | y;
    volatile int r8 = y & z;
    volatile int r9 = x << 1;
    volatile int r10 = y >> 1;
    volatile int r11 = z + x;
    volatile int r12 = x - z;
    volatile int r13 = y * z;

    // Reserve a manual stack slot using alloca to sit at the top of the frame.
    volatile int *stack_slot = get_stack_top_slot();
    *stack_slot = important;

    int sum = 0;

    int arg9 = 0x13579BDF;   // 9th argument - forces stack spill onto sp+0
    int arg10 = *stack_slot; // Capture the original sentinel value
    int arg11 = 0x2468ACE0;  // 12th argument

    // Call #1
    sum += t->funcs[x & 1](r0, r1, r2, r3, r4, r5, r6, r7, arg9, arg10, arg11, r8);

    // Call #2
    sum += t->funcs[y & 1](r3, r4, r5, r6, r7, r8, r9, r10, arg9, arg10, arg11, r11);

    // Call #3
    sum += t->funcs[z & 1](r6, r7, r8, r9, r10, r11, r12, r13, arg9, arg10, arg11, r0);

    // Call #4
    sum += t->funcs[(x+y) & 1](r9, r10, r11, r12, r13, r0, r1, r2, arg9, arg10, arg11, r3);

    // Call #5
    sum += t->funcs[(y+z) & 1](r12, r13, r0, r1, r2, r3, r4, r5, arg9, arg10, arg11, r6);

    // Call #6
    sum += t->funcs[(x+z) & 1](r1, r2, r3, r4, r5, r6, r7, r8, arg9, arg10, arg11, r9);

    // Reload saved from memory (volatile ensures a load)
    int restored = *stack_slot;

    // If the bug is triggered, restored will now hold arg9 instead of 'important'
    sum += restored;

    return sum;
}

int main(void) {
    int answer = trigger_bug(&g_table, 1, 2, 3, 0xDEADBEEF);
    printf("answer=%d\n", answer);
    return answer;
}

```

In my RISC-V simulator, stdout:
answer=1372937956

In x86 Linux, stdout:
answer=1659269093

</details>

### Compile and Check

```bash
clang -target riscv32 -mabi=ilp32 -O2 -c aggressive_test.c -o aggressive_test.o
llvm-objdump -d aggressive_test.o > output.asm
```

### Buggy Assembly Output

- Full assembly of trigger_bug function

```asm
00000080 <trigger_bug>:
      # Prologue - allocate 128-byte stack frame
      80: 13 01 01 f8    addi  sp, sp, -0x80
      84: 23 2e 11 06    sw    ra, 0x7c(sp)
      88: 23 2c 81 06    sw    s0, 0x78(sp)
      8c: 23 2a 91 06    sw    s1, 0x74(sp)
      # ... (saving more callee-saved registers)

      # Prepare arguments for call #1
     14c: 03 25 01 05    lw    a0, 0x50(sp)
     150: 83 25 c1 04    lw    a1, 0x4c(sp)
     154: 03 26 81 04    lw    a2, 0x48(sp)
     158: 83 26 41 04    lw    a3, 0x44(sp)
     15c: 03 27 01 04    lw    a4, 0x40(sp)
     160: 83 27 c1 03    lw    a5, 0x3c(sp)
     164: 03 28 81 03    lw    a6, 0x38(sp)
     168: 83 28 41 03    lw    a7, 0x34(sp)

      # ⚠️ BUG: Store arguments 9-12 to sp+0, sp+4, sp+8, sp+12
     170: 23 22 51 01    sw    s5, 0x4(sp)     ← arg10 to sp+4
     174: 23 24 81 01    sw    s8, 0x8(sp)     ← arg11 to sp+8
     178: 23 20 71 01    sw    s7, 0x0(sp)     ← arg9 to sp+0  🚨
     17c: 23 26 51 00    sw    t0, 0xc(sp)     ← arg12 to sp+12

      # Indirect call #1
     180: e7 00 03 00    jalr  t1              ← May clobber sp+0!

      # ... (prepare for call #2)

      # ⚠️ BUG: Store to sp+0 again before call #2
     1bc: 23 24 81 01    sw    s8, 0x8(sp)
     1c0: 23 22 51 01    sw    s5, 0x4(sp)
     1c4: 23 20 71 01    sw    s7, 0x0(sp)     ← sp+0 again! 🚨
     1c8: 23 26 51 00    sw    t0, 0xc(sp)

      # Indirect call #2
     1cc: e7 00 03 00    jalr  t1              ← May clobber sp+0!

      # ⚠️ Call #3 - same pattern
     208: 23 24 81 01    sw    s8, 0x8(sp)
     20c: 23 22 51 01    sw    s5, 0x4(sp)
     210: 23 20 71 01    sw    s7, 0x0(sp)     ← sp+0 🚨
     214: 23 26 51 00    sw    t0, 0xc(sp)
     218: e7 00 03 00    jalr  t1

      # ⚠️ Call #4 - same pattern
     254: 23 24 81 01    sw    s8, 0x8(sp)
     258: 23 22 51 01    sw    s5, 0x4(sp)
     25c: 23 20 71 01    sw    s7, 0x0(sp)     ← sp+0 🚨
     260: 23 26 51 00    sw    t0, 0xc(sp)
     264: e7 00 03 00    jalr  t1

      # ⚠️ Call #5 - same pattern
     2a4: 23 24 81 01    sw    s8, 0x8(sp)
     2a8: 23 22 51 01    sw    s5, 0x4(sp)
     2ac: 23 20 71 01    sw    s7, 0x0(sp)     ← sp+0 🚨
     2b0: 23 26 51 00    sw    t0, 0xc(sp)
     2b4: e7 00 03 00    jalr  t1

      # ⚠️ Call #6 - same pattern
     2f4: 23 24 81 01    sw    s8, 0x8(sp)
     2f8: 23 22 51 01    sw    s5, 0x4(sp)
     2fc: 23 20 71 01    sw    s7, 0x0(sp)     ← sp+0 🚨
     300: 23 26 51 00    sw    t0, 0xc(sp)
     304: e7 00 03 00    jalr  t1

      # Epilogue
     308: 83 25 0a 00    lw    a1, 0x0(s4)     ← Load from stack
     30c: 83 20 c1 07    lw    ra, 0x7c(sp)
     # ... (restore registers and return)
```



## Why This Happens

Unlike x86_64 which has a [128-byte red zone](https://en.wikipedia.org/wiki/Red_zone_(computing)), RISC-V ABI does not define a safety region below SP. When `MaxCallFrameSize=0`, the stack slot allocator assumes it's safe to use `sp+0` for spill slots, but:

1. **Indirect calls** (via function pointers) - callee behavior cannot be statically analyzed
2. **Leaf function optimizations** - some callees may not allocate their own stack frame
3. **Non-standard calling conventions** - some implementations may use caller's stack bottom

## Impact

- **Silent data corruption**: Variables are silently overwritten across function calls
- **Non-deterministic behavior**: Depends on what the callee does with its stack
- **Hard to debug**: Only manifests under specific register pressure scenarios



## Proposed Fix (proposed  by Claude)

Reserve minimum stack space even when all arguments fit in registers:

```cpp
// In RISCVISelLowering.cpp, around line 18568
unsigned NumBytes = ArgCCInfo.getStackSize();

// WORKAROUND: Reserve minimum stack space for safety
// even when all arguments fit in registers
if (NumBytes == 0 && !IsTailCall) {
  NumBytes = 16;  // Reserve at least 16 bytes (4 words)
}

if (!IsTailCall)
  Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL);
```

This ensures:
- `MaxCallFrameSize >= 16`
- Spill slots are allocated at safe offsets (sp+16 and above)
- Minimal performance impact (~0.1% stack usage increase)



_______________________________________________
llvm-bugs mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-bugs

Reply via email to