On Fri, Mar 27, 2015 at 11:16:41AM -0700, Dave Hansen wrote:
> That would have saved creating 'u32 __user *bd_entry_32' so that we
> could implicitly do sizeof(*bd_entry_32).  But, what else does it buy us?

Well, you could misappropriate futex_atomic_cmpxchg_inatomic() which
takes u32s already - you probably might want to rename it to something
more generic first, though.

Diff ontop:

---
Index: b/arch/x86/mm/mpx.c
===================================================================
--- a/arch/x86/mm/mpx.c 2015-03-28 09:21:40.199966745 +0100
+++ b/arch/x86/mm/mpx.c 2015-03-28 09:19:40.491968402 +0100
@@ -18,6 +18,7 @@
 #include <asm/processor.h>
 #include <asm/trace/mpx.h>
 #include <asm/fpu-internal.h>
+#include <asm/futex.h>
 
 #define CREATE_TRACE_POINTS
 #include <asm/trace/mpx.h>
@@ -425,7 +426,6 @@ static int mpx_cmpxchg_bd_entry(struct m
                unsigned long *actual_old_val_ptr, long __user *bd_entry_addr,
                unsigned long expected_old_val, unsigned long new_bd_entry)
 {
-       int ret;
        /*
         * user_atomic_cmpxchg_inatomic() actually uses sizeof()
         * the pointer thatt we pass to it to figure out how much
@@ -433,21 +433,16 @@ static int mpx_cmpxchg_bd_entry(struct m
         * pass a pointer to a 64-bit data type when we only want
         * a 32-bit copy.
         */
-       if (is_64bit_mm(mm)) {
-               ret = user_atomic_cmpxchg_inatomic(actual_old_val_ptr,
-                               bd_entry_addr, expected_old_val, new_bd_entry);
-       } else {
-               u32 uninitialized_var(actual_old_val_32);
-               u32 expected_old_val_32 = expected_old_val;
-               u32 new_bd_entry_32 = new_bd_entry;
-               u32 __user *bd_entry_32 = (u32 __user *)bd_entry_addr;
-               ret = user_atomic_cmpxchg_inatomic(&actual_old_val_32,
-                               bd_entry_32, expected_old_val_32,
-                               new_bd_entry_32);
-               if (!ret)
-                       *actual_old_val_ptr = actual_old_val_32;
-       }
-       return ret;
+       if (is_64bit_mm(mm))
+               return user_atomic_cmpxchg_inatomic(actual_old_val_ptr,
+                                                  bd_entry_addr,
+                                                  expected_old_val,
+                                                  new_bd_entry);
+       else
+               return futex_atomic_cmpxchg_inatomic((u32 *)actual_old_val_ptr,
+                                                   (u32 __user *)bd_entry_addr,
+                                                   expected_old_val,
+                                                   new_bd_entry);
 }
 
 /*
---

The asm looks the same except the retval. Yours does

        mov %rax, (%rsi)

for actual_old_val_ptr which, AFAICT, is not needed in the 32-bit
case because there we're returning a 32-bit value anyway:

        *actual_old_val_ptr = actual_old_val_32;

but gcc writes out the whole 64-bit register %rax to the pointer in %rsi
because it is an unsigned long it gets passed in.

Not that it matters, it is being sign-extended before that with

        movl    %eax, %eax      # actual_old_val_32, tmp137


yours:
------
        .loc 1 445 0
        cmpq    %rax, %rdx      # D.38827, bd_entry_addr
        ja      .L151   #,
.LBB993:
        .loc 1 445 0 is_stmt 0 discriminator 1
        movl    %ecx, %eax      # expected_old_val, actual_old_val_32
.LVL179:
        xorl    %edi, %edi      # ret
.LVL180:
#APP
# 445 "arch/x86/mm/mpx.c" 1

1:      .pushsection .smp_locks,"a"
.balign 4
.long 671f - .
.popsection
671:
        lock; cmpxchgl %r8d, (%rdx)     # new_bd_entry, MEM[(u32 
*)bd_entry_addr_12(D)]
2:
        .section .fixup, "ax"
3:      mov     $-14, %edi      #, ret
        jmp     2b
        .previous
 .pushsection "__ex_table","a"
 .balign 8
 .long (1b) - .
 .long (3b) - .
 .popsection

# 0 "" 2
#NO_APP
.LBE993:
        .loc 1 448 0 is_stmt 1 discriminator 1
        testl   %edi, %edi      # ret
        jne     .L151   #,
        .loc 1 449 0
        movl    %eax, %eax      # actual_old_val_32, tmp137
.LVL181:
        movq    %rax, (%rsi)    # tmp137, *actual_old_val_ptr_17(D)
---



futex_atomic_cmpxchg_inatomic:
------------------------------
        .file 9 "./arch/x86/include/asm/futex.h"
        .loc 9 113 0
        cmpq    %rax, %rdx      # D.38827, bd_entry_addr
        ja      .L153   #,
.LBB1003:
        movl    %ecx, %eax      # expected_old_val, __old
.LVL185:
        xorl    %edi, %edi      # ret
.LVL186:
#APP
# 113 "./arch/x86/include/asm/futex.h" 1

1:      .pushsection .smp_locks,"a"
.balign 4
.long 671f - .
.popsection
671:
        lock; cmpxchgl %r8d, (%rdx)     # new_bd_entry, MEM[(u32 
*)bd_entry_addr_12(D)]
2:
        .section .fixup, "ax"
3:      mov     $-14, %edi      #, ret
        jmp     2b
        .previous
 .pushsection "__ex_table","a"
 .balign 8
 .long (1b) - .
 .long (3b) - .
 .popsection

# 0 "" 2
#NO_APP
        movl    %eax, (%rsi)    # __old, MEM[(u32 *)actual_old_val_ptr_17(D)]
.LBE1003:
.LBE995:
.LBE994:
.LBE989:
        .loc 1 458 0
        movl    %edi, %eax      # ret,
---

Here the objdump output which shows the difference better:

yours:
------
     b02:       66 0f 1f 44 00 00       nopw   0x0(%rax,%rax,1)
     b08:       48 83 e8 04             sub    $0x4,%rax
     b0c:       bf f2 ff ff ff          mov    $0xfffffff2,%edi
     b11:       48 39 c2                cmp    %rax,%rdx
     b14:       77 e8                   ja     afe <mpx_cmpxchg_bd_entry+0x3e>
     b16:       89 c8                   mov    %ecx,%eax
     b18:       31 ff                   xor    %edi,%edi
     b1a:       f0 44 0f b1 02          lock cmpxchg %r8d,(%rdx)
     b1f:       85 ff                   test   %edi,%edi
     b21:       75 db                   jne    afe <mpx_cmpxchg_bd_entry+0x3e>
     b23:       89 c0                   mov    %eax,%eax
     b25:       48 89 06                mov    %rax,(%rsi)
     b28:       89 f8                   mov    %edi,%eax
     b2a:       5d                      pop    %rbp
     b2b:       c3                      retq
     b2c:       0f 1f 40 00             nopl   0x0(%rax)



futex_atomic_cmpxchg_inatomic:
------------------------------
     b72:       66 0f 1f 44 00 00       nopw   0x0(%rax,%rax,1)
     b78:       48 83 ef 04             sub    $0x4,%rdi
     b7c:       b8 f2 ff ff ff          mov    $0xfffffff2,%eax
     b81:       48 39 fa                cmp    %rdi,%rdx
     b84:       77 ea                   ja     b70 <mpx_cmpxchg_bd_entry+0x40>
     b86:       89 c8                   mov    %ecx,%eax
     b88:       31 ff                   xor    %edi,%edi
     b8a:       f0 44 0f b1 02          lock cmpxchg %r8d,(%rdx)
     b8f:       89 06                   mov    %eax,(%rsi)
     b91:       89 f8                   mov    %edi,%eax
     b93:       5d                      pop    %rbp
     b94:       c3                      retq
     b95:       66 66 2e 0f 1f 84 00    data16 nopw %cs:0x0(%rax,%rax,1)
     b9c:       00 00 00 00

AFAICT, in this case, we return only a 32-bit value and don't touch
the upper 32 bits of actual_old_val which might be a problem if the
assumptions of the callers is that the whole unsigned long is being
changed.

If that's not the case, then you get much nicer code :-)

-- 
Regards/Gruss,
    Boris.

ECO tip #101: Trim your mails when you reply.
--
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to