manojgupta added subscribers: nickdesaulniers, llozano, srhines. manojgupta added a comment.
I was able to reduce to following: typedef unsigned int u32; typedef unsigned long long u64; typedef unsigned long size_t; void fortify_panic(const char *name) __attribute__((noreturn)) ; void __read_overflow(void) ; void __read_overflow2(void) ; void __write_overflow(void) ; extern void *memcpy(void *to, const void *from, size_t len); extern void *__memcpy(void *to, const void *from, size_t len); extern inline __attribute__((unused)) __attribute__((no_instrument_function)) __attribute__((always_inline)) __attribute__((gnu_inline)) void *memcpy(void *p, const void *q, size_t size) { size_t p_size = __builtin_object_size(p, 0); size_t q_size = __builtin_object_size(q, 0); if (__builtin_constant_p(size)) { if (p_size < size) __write_overflow(); if (q_size < size) __read_overflow2(); } if (p_size < size || q_size < size) fortify_panic(__func__); return __builtin_memcpy(p, q, size); } static inline __attribute__((unused)) __attribute__((no_instrument_function)) void memcpy_fromio(void *dst, const volatile void *src, size_t count) { memcpy(dst, (const void *)src, count); } u64 sst_shim32_read64(void *addr, u32 offset) { u64 val; memcpy_fromio(&val, addr + offset, sizeof(val)); return val; } Compiling with clang -Qunused-arguments -D_FORTIFY_SOURCE=2 -fno-omit-frame-pointer -fno-stack-protector -nostdinc -fno-strict-aliasing -fno-common -std=gnu89 -fno-PIE -mno-sse -mno-mmx -mno-sse2 -mno-3dnow -mno-avx -m64 -mno-80387 -mstack-alignment=8 -mtune=generic -mno-red-zone -mcmodel=kernel -funit-at-a-time -pipe -mretpoline-external-thunk -fno-delete-null-pointer-checks -Os -fstack-protector-strong -mno-global-merge -no-integrated-as -fno-omit-frame-pointer -fno-optimize-sibling-calls -ftrivial-auto-var-init=pattern -pg -mfentry -fno-strict-overflow -fno-merge-all-constants -fno-stack-check -c -o test.o test.c -target x86_64-cros-linux-gnu and objdump -drW test.o before: No memcpy is emitted as clang is able to optimize it. 0000000000000000 <sst_shim32_read64>: 0: e8 00 00 00 00 callq 5 <sst_shim32_read64+0x5> 1: R_X86_64_PLT32 __fentry__-0x4 5: 55 push %rbp 6: 48 89 e5 mov %rsp,%rbp 9: 89 f0 mov %esi,%eax b: 48 8b 04 07 mov (%rdi,%rax,1),%rax f: 5d pop %rbp 10: c3 retq After: A call to memcpy is emitted. 0000000000000000 <sst_shim32_read64>: 0: e8 00 00 00 00 callq 5 <sst_shim32_read64+0x5> 1: R_X86_64_PLT32 __fentry__-0x4 5: 55 push %rbp 6: 48 89 e5 mov %rsp,%rbp 9: 53 push %rbx a: 48 83 ec 10 sub $0x10,%rsp e: 65 48 8b 04 25 28 00 mov %gs:0x28,%rax 15: 00 00 17: 48 89 45 f0 mov %rax,-0x10(%rbp) 1b: 48 b8 aa aa aa aa aa movabs $0xaaaaaaaaaaaaaaaa,%rax 22: aa aa aa 25: 48 8d 5d e8 lea -0x18(%rbp),%rbx 29: 48 89 03 mov %rax,(%rbx) 2c: 89 f6 mov %esi,%esi 2e: 48 01 fe add %rdi,%rsi 31: ba 08 00 00 00 mov $0x8,%edx 36: 48 89 df mov %rbx,%rdi 39: e8 00 00 00 00 callq 3e <sst_shim32_read64+0x3e> 3a: R_X86_64_PLT32 memcpy-0x4 3e: 48 8b 03 mov (%rbx),%rax 41: 65 48 8b 0c 25 28 00 mov %gs:0x28,%rcx 48: 00 00 4a: 48 3b 4d f0 cmp -0x10(%rbp),%rcx 4e: 75 07 jne 57 <sst_shim32_read64+0x57> 50: 48 83 c4 10 add $0x10,%rsp 54: 5b pop %rbx 55: 5d pop %rbp 56: c3 retq 57: e8 00 00 00 00 callq 5c <sst_shim32_read64+0x5c> 58: R_X86_64_PLT32 __stack_chk_fail-0x4 At this point, it is not clear to me if clang is doing anything wrong here is or this a bug in kernel 4.4 that it is using a regular memcpy for IO which cannot use regular memcpy. And so I suspect we need to backport https://github.com/torvalds/linux/commit/c2327da06b33d8e1093ce2c28f395bc500d1b0d3 to older kernel versions. @nickdesaulniers wdyt? Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D71082/new/ https://reviews.llvm.org/D71082 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits