manojgupta added subscribers: nickdesaulniers, llozano, srhines.
manojgupta added a comment.

I was able to reduce to following:

  typedef unsigned int u32;
  typedef unsigned long long u64;
  typedef unsigned long size_t;
  
  void fortify_panic(const char *name) __attribute__((noreturn)) ;
  void __read_overflow(void) ;
  void __read_overflow2(void) ;
  void __write_overflow(void) ;
  
  extern void *memcpy(void *to, const void *from, size_t len);
  extern void *__memcpy(void *to, const void *from, size_t len);
  
  extern inline __attribute__((unused)) __attribute__((no_instrument_function)) 
__attribute__((always_inline)) __attribute__((gnu_inline)) void *memcpy(void 
*p, const void *q, size_t size)
  {
   size_t p_size = __builtin_object_size(p, 0);
   size_t q_size = __builtin_object_size(q, 0);
   if (__builtin_constant_p(size)) {
    if (p_size < size)
     __write_overflow();
    if (q_size < size)
     __read_overflow2();
   }
   if (p_size < size || q_size < size)
    fortify_panic(__func__);
   return __builtin_memcpy(p, q, size);
  }
  
  static inline __attribute__((unused)) __attribute__((no_instrument_function)) 
void
  memcpy_fromio(void *dst, const volatile void *src, size_t count)
  {
   memcpy(dst, (const void *)src, count);
  }
  
  u64 sst_shim32_read64(void *addr, u32 offset)
  {
   u64 val;
   memcpy_fromio(&val, addr + offset, sizeof(val));
   return val;
  }

Compiling with

  clang -Qunused-arguments -D_FORTIFY_SOURCE=2 -fno-omit-frame-pointer 
-fno-stack-protector  -nostdinc  -fno-strict-aliasing -fno-common  -std=gnu89 
-fno-PIE -mno-sse -mno-mmx -mno-sse2 -mno-3dnow -mno-avx -m64 -mno-80387 
-mstack-alignment=8 -mtune=generic -mno-red-zone -mcmodel=kernel 
-funit-at-a-time  -pipe  -mretpoline-external-thunk 
-fno-delete-null-pointer-checks  -Os -fstack-protector-strong -mno-global-merge 
-no-integrated-as  -fno-omit-frame-pointer -fno-optimize-sibling-calls 
-ftrivial-auto-var-init=pattern  -pg -mfentry  -fno-strict-overflow 
-fno-merge-all-constants -fno-stack-check  -c -o test.o test.c -target 
x86_64-cros-linux-gnu

and objdump -drW test.o
before:
No memcpy is emitted as clang is able to optimize it.

  0000000000000000 <sst_shim32_read64>:
     0: e8 00 00 00 00          callq  5 <sst_shim32_read64+0x5>
                        1: R_X86_64_PLT32       __fentry__-0x4
     5: 55                      push   %rbp
     6: 48 89 e5                mov    %rsp,%rbp
     9: 89 f0                   mov    %esi,%eax
     b: 48 8b 04 07             mov    (%rdi,%rax,1),%rax
     f: 5d                      pop    %rbp
    10: c3                      retq 

After:

A call to memcpy is emitted.

  0000000000000000 <sst_shim32_read64>:
     0: e8 00 00 00 00          callq  5 <sst_shim32_read64+0x5>
                        1: R_X86_64_PLT32       __fentry__-0x4
     5: 55                      push   %rbp
     6: 48 89 e5                mov    %rsp,%rbp
     9: 53                      push   %rbx
     a: 48 83 ec 10             sub    $0x10,%rsp
     e: 65 48 8b 04 25 28 00    mov    %gs:0x28,%rax
    15: 00 00 
    17: 48 89 45 f0             mov    %rax,-0x10(%rbp)
    1b: 48 b8 aa aa aa aa aa    movabs $0xaaaaaaaaaaaaaaaa,%rax
    22: aa aa aa 
    25: 48 8d 5d e8             lea    -0x18(%rbp),%rbx
    29: 48 89 03                mov    %rax,(%rbx)
    2c: 89 f6                   mov    %esi,%esi
    2e: 48 01 fe                add    %rdi,%rsi
    31: ba 08 00 00 00          mov    $0x8,%edx
    36: 48 89 df                mov    %rbx,%rdi
    39: e8 00 00 00 00          callq  3e <sst_shim32_read64+0x3e>
                        3a: R_X86_64_PLT32      memcpy-0x4
    3e: 48 8b 03                mov    (%rbx),%rax
    41: 65 48 8b 0c 25 28 00    mov    %gs:0x28,%rcx
    48: 00 00 
    4a: 48 3b 4d f0             cmp    -0x10(%rbp),%rcx
    4e: 75 07                   jne    57 <sst_shim32_read64+0x57>
    50: 48 83 c4 10             add    $0x10,%rsp
    54: 5b                      pop    %rbx
    55: 5d                      pop    %rbp
    56: c3                      retq   
    57: e8 00 00 00 00          callq  5c <sst_shim32_read64+0x5c>
                        58: R_X86_64_PLT32      __stack_chk_fail-0x4

At this point, it is not clear to me if clang is doing anything wrong here is 
or this a bug in kernel 4.4 that it is using a regular memcpy for IO which 
cannot use regular memcpy.
And so I suspect we need to backport 
https://github.com/torvalds/linux/commit/c2327da06b33d8e1093ce2c28f395bc500d1b0d3
 to older kernel versions.

@nickdesaulniers wdyt?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D71082/new/

https://reviews.llvm.org/D71082



_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to