https://gcc.gnu.org/g:ad860cc27b3312f9119c7fecb8638a7c1f6d77c9
commit r14-9438-gad860cc27b3312f9119c7fecb8638a7c1f6d77c9 Author: Jakub Jelinek <ja...@redhat.com> Date: Tue Mar 12 11:34:50 2024 +0100 asan: Instrument <retval> stores in callees rather than callers [PR112709] asan currently instruments since PR69276 r6-6758 fix calls which store the return value into memory on the caller side, before the call it verifies the memory is writable. Now PR112709 where we ICE on trying to instrument such calls made me think about whether that is what we want to do. There are 3 different cases. One is when a function returns an aggregate which is passed e.g. in registers, say like struct S { int a[4]; }; returning on x86_64. That would be ideally instrumented in between the actual call and storing of the aggregate into memory, but asan currently mostly works as a GIMPLE pass and arranging for the instrumentation to happen at that spot would be really hard. We could diagnose after the call but generally asan attempts to diagnose stuff before something is overwritten rather than after, or keep the current behavior (that is what this patch does, which has the disadvantage that it can complain about UB even for functions which never return and so never actually store, and doesn't check whether the memory wasn't e.g. poisoned during the call) or could e.g. instrument both before and after the call (that would have the disadvantage the current state has but at least would check post-factum the store again afterwards). Another case is when a function returns an aggregate through a hidden reference, struct T { int a[128]; }; on x86_64 or even the above struct S on ia32 as example. In the actual program such stores happen when storing something to <retval> or its parts in the callee, because <retval> there expands to *hidden_retval. So, IMHO we should instrument those in the callee rather than caller, that is where the writes are and we can do that easily. This is what the patch below does. And the last case is for builtins/internal functions. Usually those don't return aggregates, but in case they'd do and can be expanded inline, it is better to instrument them in the caller (as before) rather than not instrumenting the return stores at all. I had to tweak the expected output on the PR69276 testcase, because with the patch it keeps previous behavior on x86_64 (structure returned in registers, stored in the caller, so reported as UB in A::A()), while on i686 it changed the behavior and is reported as UB in the vnull::operator vec which stores the structure, A::A() is then a frame above it in the backtrace. 2024-03-12 Jakub Jelinek <ja...@redhat.com> PR sanitizer/112709 * asan.cc (has_stmt_been_instrumented_p): Don't instrument call stores on the caller side unless it is a call to a builtin or internal function or function doesn't return by hidden reference. (maybe_instrument_call): Likewise. (instrument_derefs): Instrument stores to RESULT_DECL if returning by hidden reference. * gcc.dg/asan/pr112709-1.c: New test. * g++.dg/asan/pr69276.C: Adjust expected output for some targets. Diff: --- gcc/asan.cc | 17 +++++++++-- gcc/testsuite/g++.dg/asan/pr69276.C | 3 +- gcc/testsuite/gcc.dg/asan/pr112709-1.c | 52 ++++++++++++++++++++++++++++++++++ 3 files changed, 68 insertions(+), 4 deletions(-) diff --git a/gcc/asan.cc b/gcc/asan.cc index d621ec9c323..c533b09b1a1 100644 --- a/gcc/asan.cc +++ b/gcc/asan.cc @@ -1372,7 +1372,12 @@ has_stmt_been_instrumented_p (gimple *stmt) return true; } } - else if (is_gimple_call (stmt) && gimple_store_p (stmt)) + else if (is_gimple_call (stmt) + && gimple_store_p (stmt) + && (gimple_call_builtin_p (stmt) + || gimple_call_internal_p (stmt) + || !aggregate_value_p (TREE_TYPE (gimple_call_lhs (stmt)), + gimple_call_fntype (stmt)))) { asan_mem_ref r; asan_mem_ref_init (&r, NULL, 1); @@ -2751,7 +2756,9 @@ instrument_derefs (gimple_stmt_iterator *iter, tree t, return; poly_int64 decl_size; - if ((VAR_P (inner) || TREE_CODE (inner) == RESULT_DECL) + if ((VAR_P (inner) + || (TREE_CODE (inner) == RESULT_DECL + && !aggregate_value_p (inner, current_function_decl))) && offset == NULL_TREE && DECL_SIZE (inner) && poly_int_tree_p (DECL_SIZE (inner), &decl_size) @@ -3023,7 +3030,11 @@ maybe_instrument_call (gimple_stmt_iterator *iter) } bool instrumented = false; - if (gimple_store_p (stmt)) + if (gimple_store_p (stmt) + && (gimple_call_builtin_p (stmt) + || gimple_call_internal_p (stmt) + || !aggregate_value_p (TREE_TYPE (gimple_call_lhs (stmt)), + gimple_call_fntype (stmt)))) { tree ref_expr = gimple_call_lhs (stmt); instrument_derefs (iter, ref_expr, diff --git a/gcc/testsuite/g++.dg/asan/pr69276.C b/gcc/testsuite/g++.dg/asan/pr69276.C index 1347fae78a6..aea710407e9 100644 --- a/gcc/testsuite/g++.dg/asan/pr69276.C +++ b/gcc/testsuite/g++.dg/asan/pr69276.C @@ -35,4 +35,5 @@ int main() } /* { dg-output "ERROR: AddressSanitizer: heap-buffer-overflow.*(\n|\r\n|\r)" } */ -/* { dg-output " #0 0x\[0-9a-f\]+ +in A::A()" } */ +/* { dg-output " #0 0x\[0-9a-f\]+ +in (A::A\\\(\\\)|vnull::operator vec\\\(\\\).*(\n|\r\n|\r)" } */ +/* { dg-output " #1 0x\[0-9a-f\]+ +in A::A\\\(\\\))" } */ diff --git a/gcc/testsuite/gcc.dg/asan/pr112709-1.c b/gcc/testsuite/gcc.dg/asan/pr112709-1.c new file mode 100644 index 00000000000..970b6c7b695 --- /dev/null +++ b/gcc/testsuite/gcc.dg/asan/pr112709-1.c @@ -0,0 +1,52 @@ +/* PR sanitizer/112709 */ +/* { dg-do compile } */ +/* { dg-options "-fsanitize=address -O2" } */ + +struct S { char c[1024]; }; +int foo (int); + +__attribute__((returns_twice, noipa)) struct S +bar (int x) +{ + (void) x; + struct S s = {}; + s.c[42] = 42; + return s; +} + +void +baz (struct S *p) +{ + foo (1); + *p = bar (0); +} + +void +qux (int x, struct S *p) +{ + if (x == 25) + x = foo (2); + else if (x == 42) + x = foo (foo (3)); + *p = bar (x); +} + +void +corge (int x, struct S *p) +{ + void *q[] = { &&l1, &&l2, &&l3, &&l3 }; + if (x == 25) + { + l1: + x = foo (2); + } + else if (x == 42) + { + l2: + x = foo (foo (3)); + } +l3: + *p = bar (x); + if (x < 4) + goto *q[x & 3]; +}