[PATCH] D72184: [BPF] support atomic instructions

2020-12-03 Thread Yonghong Song via Phabricator via cfe-commits
This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
Closed by commit rG286daafd6512: [BPF] support atomic instructions (authored by 
yonghong-song).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D72184/new/

https://reviews.llvm.org/D72184

Files:
  llvm/lib/Target/BPF/BPFInstrFormats.td
  llvm/lib/Target/BPF/BPFInstrInfo.td
  llvm/lib/Target/BPF/BPFMIChecking.cpp
  llvm/lib/Target/BPF/Disassembler/BPFDisassembler.cpp
  llvm/lib/Target/BPF/MCTargetDesc/BPFMCCodeEmitter.cpp
  llvm/test/CodeGen/BPF/atomics.ll
  llvm/test/CodeGen/BPF/atomics_2.ll
  llvm/test/CodeGen/BPF/xadd.ll

Index: llvm/test/CodeGen/BPF/xadd.ll
===
--- llvm/test/CodeGen/BPF/xadd.ll
+++ llvm/test/CodeGen/BPF/xadd.ll
@@ -1,7 +1,5 @@
 ; RUN: not --crash llc -march=bpfel < %s 2>&1 | FileCheck %s
 ; RUN: not --crash llc -march=bpfeb < %s 2>&1 | FileCheck %s
-; RUN: not --crash llc -march=bpfel -mattr=+alu32 < %s 2>&1 | FileCheck %s
-; RUN: not --crash llc -march=bpfeb -mattr=+alu32 < %s 2>&1 | FileCheck %s
 
 ; This file is generated with the source command and source
 ; $ clang -target bpf -O2 -g -S -emit-llvm t.c
Index: llvm/test/CodeGen/BPF/atomics_2.ll
===
--- /dev/null
+++ llvm/test/CodeGen/BPF/atomics_2.ll
@@ -0,0 +1,254 @@
+; RUN: llc < %s -march=bpfel -mcpu=v3 -verify-machineinstrs -show-mc-encoding | FileCheck %s
+;
+; Source:
+;   int test_load_add_32(int *p, int v) {
+; return __sync_fetch_and_add(p, v);
+;   }
+;   int test_load_add_64(long *p, long v) {
+; return __sync_fetch_and_add(p, v);
+;   }
+;   int test_load_sub_32(int *p, int v) {
+; return __sync_fetch_and_sub(p, v);
+;   }
+;   int test_load_sub_64(long *p, long v) {
+; return __sync_fetch_and_sub(p, v);
+;   }
+;   // from https://gcc.gnu.org/onlinedocs/gcc-4.1.1/gcc/Atomic-Builtins.html
+;   // __sync_lock_test_and_set() actually does atomic xchg and returns
+;   // old contents.
+;   int test_xchg_32(int *p, int v) {
+; return __sync_lock_test_and_set(p, v);
+;   }
+;   int test_xchg_64(long *p, long v) {
+; return __sync_lock_test_and_set(p, v);
+;   }
+;   int test_cas_32(int *p, int old, int new) {
+; return __sync_val_compare_and_swap(p, old, new);
+;   }
+;   long test_cas_64(long *p, long old, long new) {
+; return __sync_val_compare_and_swap(p, old, new);
+;   }
+;   int test_load_and_32(int *p, int v) {
+; return __sync_fetch_and_and(p, v);
+;   }
+;   int test_load_and_64(long *p, long v) {
+; return __sync_fetch_and_and(p, v);
+;   }
+;   int test_load_or_32(int *p, int v) {
+; return __sync_fetch_and_or(p, v);
+;   }
+;   int test_load_or_64(long *p, long v) {
+; return __sync_fetch_and_or(p, v);
+;   }
+;   int test_load_xor_32(int *p, int v) {
+; return __sync_fetch_and_xor(p, v);
+;   }
+;   int test_load_xor_64(long *p, long v) {
+; return __sync_fetch_and_xor(p, v);
+;   }
+;   int test_atomic_xor_32(int *p, int v) {
+; __sync_fetch_and_xor(p, v);
+; return 0;
+;   }
+;   int test_atomic_xor_64(long *p, long v) {
+; __sync_fetch_and_xor(p, v);
+; return 0;
+;   }
+;   int test_atomic_and_64(long *p, long v) {
+; __sync_fetch_and_and(p, v);
+; return 0;
+;   }
+;   int test_atomic_or_64(long *p, long v) {
+; __sync_fetch_and_or(p, v);
+; return 0;
+;   }
+
+; CHECK-LABEL: test_load_add_32
+; CHECK: w0 = w2
+; CHECK: w0 = atomic_fetch_add((u32 *)(r1 + 0), w0)
+; CHECK: encoding: [0xc3,0x01,0x00,0x00,0x01,0x00,0x00,0x00]
+define dso_local i32 @test_load_add_32(i32* nocapture %p, i32 %v) local_unnamed_addr {
+entry:
+  %0 = atomicrmw add i32* %p, i32 %v seq_cst
+  ret i32 %0
+}
+
+; CHECK-LABEL: test_load_add_64
+; CHECK: r0 = r2
+; CHECK: r0 = atomic_fetch_add((u64 *)(r1 + 0), r0)
+; CHECK: encoding: [0xdb,0x01,0x00,0x00,0x01,0x00,0x00,0x00]
+define dso_local i32 @test_load_add_64(i64* nocapture %p, i64 %v) local_unnamed_addr {
+entry:
+  %0 = atomicrmw add i64* %p, i64 %v seq_cst
+  %conv = trunc i64 %0 to i32
+  ret i32 %conv
+}
+
+; CHECK-LABEL: test_load_sub_32
+; CHECK: w0 = w2
+; CHECK: w0 = -w0
+; CHECK: w0 = atomic_fetch_add((u32 *)(r1 + 0), w0)
+; CHECK: encoding: [0xc3,0x01,0x00,0x00,0x01,0x00,0x00,0x00]
+define dso_local i32 @test_load_sub_32(i32* nocapture %p, i32 %v) local_unnamed_addr {
+entry:
+  %0 = atomicrmw sub i32* %p, i32 %v seq_cst
+  ret i32 %0
+}
+
+; CHECK-LABEL: test_load_sub_64
+; CHECK: r0 = r2
+; CHECK: r0 = -r0
+; CHECK: r0 = atomic_fetch_add((u64 *)(r1 + 0), r0)
+; CHECK: encoding: [0xdb,0x01,0x00,0x00,0x01,0x00,0x00,0x00]
+define dso_local i32 @test_load_sub_64(i64* nocapture %p, i64 %v) local_unnamed_addr {
+entry:
+  %0 = atomicrmw sub i64* %p, i64 %v seq_cst
+  %conv = trunc i64 %0 to i32
+  ret i32 %conv
+}
+
+; CHECK-LABEL: test_xchg_32
+; CHECK: w0 = w2
+; CHECK: w0 = xchg32_32(r1 

[PATCH] D72184: [BPF] support atomic instructions

2020-12-02 Thread Yonghong Song via Phabricator via cfe-commits
yonghong-song updated this revision to Diff 309167.
yonghong-song added a comment.

fix clang-format warnings.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D72184/new/

https://reviews.llvm.org/D72184

Files:
  llvm/lib/Target/BPF/BPFInstrFormats.td
  llvm/lib/Target/BPF/BPFInstrInfo.td
  llvm/lib/Target/BPF/BPFMIChecking.cpp
  llvm/lib/Target/BPF/Disassembler/BPFDisassembler.cpp
  llvm/lib/Target/BPF/MCTargetDesc/BPFMCCodeEmitter.cpp
  llvm/test/CodeGen/BPF/atomics.ll
  llvm/test/CodeGen/BPF/atomics_2.ll
  llvm/test/CodeGen/BPF/xadd.ll

Index: llvm/test/CodeGen/BPF/xadd.ll
===
--- llvm/test/CodeGen/BPF/xadd.ll
+++ llvm/test/CodeGen/BPF/xadd.ll
@@ -1,7 +1,5 @@
 ; RUN: not --crash llc -march=bpfel < %s 2>&1 | FileCheck %s
 ; RUN: not --crash llc -march=bpfeb < %s 2>&1 | FileCheck %s
-; RUN: not --crash llc -march=bpfel -mattr=+alu32 < %s 2>&1 | FileCheck %s
-; RUN: not --crash llc -march=bpfeb -mattr=+alu32 < %s 2>&1 | FileCheck %s
 
 ; This file is generated with the source command and source
 ; $ clang -target bpf -O2 -g -S -emit-llvm t.c
Index: llvm/test/CodeGen/BPF/atomics_2.ll
===
--- /dev/null
+++ llvm/test/CodeGen/BPF/atomics_2.ll
@@ -0,0 +1,254 @@
+; RUN: llc < %s -march=bpfel -mcpu=v3 -verify-machineinstrs -show-mc-encoding | FileCheck %s
+;
+; Source:
+;   int test_load_add_32(int *p, int v) {
+; return __sync_fetch_and_add(p, v);
+;   }
+;   int test_load_add_64(long *p, long v) {
+; return __sync_fetch_and_add(p, v);
+;   }
+;   int test_load_sub_32(int *p, int v) {
+; return __sync_fetch_and_sub(p, v);
+;   }
+;   int test_load_sub_64(long *p, long v) {
+; return __sync_fetch_and_sub(p, v);
+;   }
+;   // from https://gcc.gnu.org/onlinedocs/gcc-4.1.1/gcc/Atomic-Builtins.html
+;   // __sync_lock_test_and_set() actually does atomic xchg and returns
+;   // old contents.
+;   int test_xchg_32(int *p, int v) {
+; return __sync_lock_test_and_set(p, v);
+;   }
+;   int test_xchg_64(long *p, long v) {
+; return __sync_lock_test_and_set(p, v);
+;   }
+;   int test_cas_32(int *p, int old, int new) {
+; return __sync_val_compare_and_swap(p, old, new);
+;   }
+;   long test_cas_64(long *p, long old, long new) {
+; return __sync_val_compare_and_swap(p, old, new);
+;   }
+;   int test_load_and_32(int *p, int v) {
+; return __sync_fetch_and_and(p, v);
+;   }
+;   int test_load_and_64(long *p, long v) {
+; return __sync_fetch_and_and(p, v);
+;   }
+;   int test_load_or_32(int *p, int v) {
+; return __sync_fetch_and_or(p, v);
+;   }
+;   int test_load_or_64(long *p, long v) {
+; return __sync_fetch_and_or(p, v);
+;   }
+;   int test_load_xor_32(int *p, int v) {
+; return __sync_fetch_and_xor(p, v);
+;   }
+;   int test_load_xor_64(long *p, long v) {
+; return __sync_fetch_and_xor(p, v);
+;   }
+;   int test_atomic_xor_32(int *p, int v) {
+; __sync_fetch_and_xor(p, v);
+; return 0;
+;   }
+;   int test_atomic_xor_64(long *p, long v) {
+; __sync_fetch_and_xor(p, v);
+; return 0;
+;   }
+;   int test_atomic_and_64(long *p, long v) {
+; __sync_fetch_and_and(p, v);
+; return 0;
+;   }
+;   int test_atomic_or_64(long *p, long v) {
+; __sync_fetch_and_or(p, v);
+; return 0;
+;   }
+
+; CHECK-LABEL: test_load_add_32
+; CHECK: w0 = w2
+; CHECK: w0 = atomic_fetch_add((u32 *)(r1 + 0), w0)
+; CHECK: encoding: [0xc3,0x01,0x00,0x00,0x01,0x00,0x00,0x00]
+define dso_local i32 @test_load_add_32(i32* nocapture %p, i32 %v) local_unnamed_addr {
+entry:
+  %0 = atomicrmw add i32* %p, i32 %v seq_cst
+  ret i32 %0
+}
+
+; CHECK-LABEL: test_load_add_64
+; CHECK: r0 = r2
+; CHECK: r0 = atomic_fetch_add((u64 *)(r1 + 0), r0)
+; CHECK: encoding: [0xdb,0x01,0x00,0x00,0x01,0x00,0x00,0x00]
+define dso_local i32 @test_load_add_64(i64* nocapture %p, i64 %v) local_unnamed_addr {
+entry:
+  %0 = atomicrmw add i64* %p, i64 %v seq_cst
+  %conv = trunc i64 %0 to i32
+  ret i32 %conv
+}
+
+; CHECK-LABEL: test_load_sub_32
+; CHECK: w0 = w2
+; CHECK: w0 = -w0
+; CHECK: w0 = atomic_fetch_add((u32 *)(r1 + 0), w0)
+; CHECK: encoding: [0xc3,0x01,0x00,0x00,0x01,0x00,0x00,0x00]
+define dso_local i32 @test_load_sub_32(i32* nocapture %p, i32 %v) local_unnamed_addr {
+entry:
+  %0 = atomicrmw sub i32* %p, i32 %v seq_cst
+  ret i32 %0
+}
+
+; CHECK-LABEL: test_load_sub_64
+; CHECK: r0 = r2
+; CHECK: r0 = -r0
+; CHECK: r0 = atomic_fetch_add((u64 *)(r1 + 0), r0)
+; CHECK: encoding: [0xdb,0x01,0x00,0x00,0x01,0x00,0x00,0x00]
+define dso_local i32 @test_load_sub_64(i64* nocapture %p, i64 %v) local_unnamed_addr {
+entry:
+  %0 = atomicrmw sub i64* %p, i64 %v seq_cst
+  %conv = trunc i64 %0 to i32
+  ret i32 %conv
+}
+
+; CHECK-LABEL: test_xchg_32
+; CHECK: w0 = w2
+; CHECK: w0 = xchg32_32(r1 + 0, w0)
+; CHECK: encoding: [0xc3,0x01,0x00,0x00,0xe1,0x00,0x00,0x00]
+define dso_local i32 @test_xchg_32(i32* 

[PATCH] D72184: [BPF] support atomic instructions

2020-12-02 Thread Yonghong Song via Phabricator via cfe-commits
yonghong-song updated this revision to Diff 308891.
yonghong-song added a comment.

use llvm_unreachable() in default case of the switch statement


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D72184/new/

https://reviews.llvm.org/D72184

Files:
  llvm/lib/Target/BPF/BPFInstrFormats.td
  llvm/lib/Target/BPF/BPFInstrInfo.td
  llvm/lib/Target/BPF/BPFMIChecking.cpp
  llvm/lib/Target/BPF/Disassembler/BPFDisassembler.cpp
  llvm/lib/Target/BPF/MCTargetDesc/BPFMCCodeEmitter.cpp
  llvm/test/CodeGen/BPF/atomics.ll
  llvm/test/CodeGen/BPF/atomics_2.ll
  llvm/test/CodeGen/BPF/xadd.ll

Index: llvm/test/CodeGen/BPF/xadd.ll
===
--- llvm/test/CodeGen/BPF/xadd.ll
+++ llvm/test/CodeGen/BPF/xadd.ll
@@ -1,7 +1,5 @@
 ; RUN: not --crash llc -march=bpfel < %s 2>&1 | FileCheck %s
 ; RUN: not --crash llc -march=bpfeb < %s 2>&1 | FileCheck %s
-; RUN: not --crash llc -march=bpfel -mattr=+alu32 < %s 2>&1 | FileCheck %s
-; RUN: not --crash llc -march=bpfeb -mattr=+alu32 < %s 2>&1 | FileCheck %s
 
 ; This file is generated with the source command and source
 ; $ clang -target bpf -O2 -g -S -emit-llvm t.c
Index: llvm/test/CodeGen/BPF/atomics_2.ll
===
--- /dev/null
+++ llvm/test/CodeGen/BPF/atomics_2.ll
@@ -0,0 +1,254 @@
+; RUN: llc < %s -march=bpfel -mcpu=v3 -verify-machineinstrs -show-mc-encoding | FileCheck %s
+;
+; Source:
+;   int test_load_add_32(int *p, int v) {
+; return __sync_fetch_and_add(p, v);
+;   }
+;   int test_load_add_64(long *p, long v) {
+; return __sync_fetch_and_add(p, v);
+;   }
+;   int test_load_sub_32(int *p, int v) {
+; return __sync_fetch_and_sub(p, v);
+;   }
+;   int test_load_sub_64(long *p, long v) {
+; return __sync_fetch_and_sub(p, v);
+;   }
+;   // from https://gcc.gnu.org/onlinedocs/gcc-4.1.1/gcc/Atomic-Builtins.html
+;   // __sync_lock_test_and_set() actually does atomic xchg and returns
+;   // old contents.
+;   int test_xchg_32(int *p, int v) {
+; return __sync_lock_test_and_set(p, v);
+;   }
+;   int test_xchg_64(long *p, long v) {
+; return __sync_lock_test_and_set(p, v);
+;   }
+;   int test_cas_32(int *p, int old, int new) {
+; return __sync_val_compare_and_swap(p, old, new);
+;   }
+;   long test_cas_64(long *p, long old, long new) {
+; return __sync_val_compare_and_swap(p, old, new);
+;   }
+;   int test_load_and_32(int *p, int v) {
+; return __sync_fetch_and_and(p, v);
+;   }
+;   int test_load_and_64(long *p, long v) {
+; return __sync_fetch_and_and(p, v);
+;   }
+;   int test_load_or_32(int *p, int v) {
+; return __sync_fetch_and_or(p, v);
+;   }
+;   int test_load_or_64(long *p, long v) {
+; return __sync_fetch_and_or(p, v);
+;   }
+;   int test_load_xor_32(int *p, int v) {
+; return __sync_fetch_and_xor(p, v);
+;   }
+;   int test_load_xor_64(long *p, long v) {
+; return __sync_fetch_and_xor(p, v);
+;   }
+;   int test_atomic_xor_32(int *p, int v) {
+; __sync_fetch_and_xor(p, v);
+; return 0;
+;   }
+;   int test_atomic_xor_64(long *p, long v) {
+; __sync_fetch_and_xor(p, v);
+; return 0;
+;   }
+;   int test_atomic_and_64(long *p, long v) {
+; __sync_fetch_and_and(p, v);
+; return 0;
+;   }
+;   int test_atomic_or_64(long *p, long v) {
+; __sync_fetch_and_or(p, v);
+; return 0;
+;   }
+
+; CHECK-LABEL: test_load_add_32
+; CHECK: w0 = w2
+; CHECK: w0 = atomic_fetch_add((u32 *)(r1 + 0), w0)
+; CHECK: encoding: [0xc3,0x01,0x00,0x00,0x01,0x00,0x00,0x00]
+define dso_local i32 @test_load_add_32(i32* nocapture %p, i32 %v) local_unnamed_addr {
+entry:
+  %0 = atomicrmw add i32* %p, i32 %v seq_cst
+  ret i32 %0
+}
+
+; CHECK-LABEL: test_load_add_64
+; CHECK: r0 = r2
+; CHECK: r0 = atomic_fetch_add((u64 *)(r1 + 0), r0)
+; CHECK: encoding: [0xdb,0x01,0x00,0x00,0x01,0x00,0x00,0x00]
+define dso_local i32 @test_load_add_64(i64* nocapture %p, i64 %v) local_unnamed_addr {
+entry:
+  %0 = atomicrmw add i64* %p, i64 %v seq_cst
+  %conv = trunc i64 %0 to i32
+  ret i32 %conv
+}
+
+; CHECK-LABEL: test_load_sub_32
+; CHECK: w0 = w2
+; CHECK: w0 = -w0
+; CHECK: w0 = atomic_fetch_add((u32 *)(r1 + 0), w0)
+; CHECK: encoding: [0xc3,0x01,0x00,0x00,0x01,0x00,0x00,0x00]
+define dso_local i32 @test_load_sub_32(i32* nocapture %p, i32 %v) local_unnamed_addr {
+entry:
+  %0 = atomicrmw sub i32* %p, i32 %v seq_cst
+  ret i32 %0
+}
+
+; CHECK-LABEL: test_load_sub_64
+; CHECK: r0 = r2
+; CHECK: r0 = -r0
+; CHECK: r0 = atomic_fetch_add((u64 *)(r1 + 0), r0)
+; CHECK: encoding: [0xdb,0x01,0x00,0x00,0x01,0x00,0x00,0x00]
+define dso_local i32 @test_load_sub_64(i64* nocapture %p, i64 %v) local_unnamed_addr {
+entry:
+  %0 = atomicrmw sub i64* %p, i64 %v seq_cst
+  %conv = trunc i64 %0 to i32
+  ret i32 %conv
+}
+
+; CHECK-LABEL: test_xchg_32
+; CHECK: w0 = w2
+; CHECK: w0 = xchg32_32(r1 + 0, w0)
+; CHECK: encoding: [0xc3,0x01,0x00,0x00,0xe1,0x00,0x00,0x00]
+define 

[PATCH] D72184: [BPF] support atomic instructions

2020-12-01 Thread Yonghong Song via Phabricator via cfe-commits
yonghong-song updated this revision to Diff 308884.
yonghong-song edited the summary of this revision.
yonghong-song added a comment.

- remove -mcpu=v4.
- for new instructions (except xadd), for 32bit mode, only alu32 mode is 
supported. I chose this way since we have -mcpu=v3 for a while which has alu32 
as default. We really want to promote alu32 mode. The new kernel which supports 
atomic op definitely supports alu32 well...


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D72184/new/

https://reviews.llvm.org/D72184

Files:
  llvm/lib/Target/BPF/BPFInstrFormats.td
  llvm/lib/Target/BPF/BPFInstrInfo.td
  llvm/lib/Target/BPF/BPFMIChecking.cpp
  llvm/lib/Target/BPF/Disassembler/BPFDisassembler.cpp
  llvm/lib/Target/BPF/MCTargetDesc/BPFMCCodeEmitter.cpp
  llvm/test/CodeGen/BPF/atomics.ll
  llvm/test/CodeGen/BPF/atomics_2.ll
  llvm/test/CodeGen/BPF/xadd.ll

Index: llvm/test/CodeGen/BPF/xadd.ll
===
--- llvm/test/CodeGen/BPF/xadd.ll
+++ llvm/test/CodeGen/BPF/xadd.ll
@@ -1,7 +1,5 @@
 ; RUN: not --crash llc -march=bpfel < %s 2>&1 | FileCheck %s
 ; RUN: not --crash llc -march=bpfeb < %s 2>&1 | FileCheck %s
-; RUN: not --crash llc -march=bpfel -mattr=+alu32 < %s 2>&1 | FileCheck %s
-; RUN: not --crash llc -march=bpfeb -mattr=+alu32 < %s 2>&1 | FileCheck %s
 
 ; This file is generated with the source command and source
 ; $ clang -target bpf -O2 -g -S -emit-llvm t.c
Index: llvm/test/CodeGen/BPF/atomics_2.ll
===
--- /dev/null
+++ llvm/test/CodeGen/BPF/atomics_2.ll
@@ -0,0 +1,254 @@
+; RUN: llc < %s -march=bpfel -mcpu=v3 -verify-machineinstrs -show-mc-encoding | FileCheck %s
+;
+; Source:
+;   int test_load_add_32(int *p, int v) {
+; return __sync_fetch_and_add(p, v);
+;   }
+;   int test_load_add_64(long *p, long v) {
+; return __sync_fetch_and_add(p, v);
+;   }
+;   int test_load_sub_32(int *p, int v) {
+; return __sync_fetch_and_sub(p, v);
+;   }
+;   int test_load_sub_64(long *p, long v) {
+; return __sync_fetch_and_sub(p, v);
+;   }
+;   // from https://gcc.gnu.org/onlinedocs/gcc-4.1.1/gcc/Atomic-Builtins.html
+;   // __sync_lock_test_and_set() actually does atomic xchg and returns
+;   // old contents.
+;   int test_xchg_32(int *p, int v) {
+; return __sync_lock_test_and_set(p, v);
+;   }
+;   int test_xchg_64(long *p, long v) {
+; return __sync_lock_test_and_set(p, v);
+;   }
+;   int test_cas_32(int *p, int old, int new) {
+; return __sync_val_compare_and_swap(p, old, new);
+;   }
+;   long test_cas_64(long *p, long old, long new) {
+; return __sync_val_compare_and_swap(p, old, new);
+;   }
+;   int test_load_and_32(int *p, int v) {
+; return __sync_fetch_and_and(p, v);
+;   }
+;   int test_load_and_64(long *p, long v) {
+; return __sync_fetch_and_and(p, v);
+;   }
+;   int test_load_or_32(int *p, int v) {
+; return __sync_fetch_and_or(p, v);
+;   }
+;   int test_load_or_64(long *p, long v) {
+; return __sync_fetch_and_or(p, v);
+;   }
+;   int test_load_xor_32(int *p, int v) {
+; return __sync_fetch_and_xor(p, v);
+;   }
+;   int test_load_xor_64(long *p, long v) {
+; return __sync_fetch_and_xor(p, v);
+;   }
+;   int test_atomic_xor_32(int *p, int v) {
+; __sync_fetch_and_xor(p, v);
+; return 0;
+;   }
+;   int test_atomic_xor_64(long *p, long v) {
+; __sync_fetch_and_xor(p, v);
+; return 0;
+;   }
+;   int test_atomic_and_64(long *p, long v) {
+; __sync_fetch_and_and(p, v);
+; return 0;
+;   }
+;   int test_atomic_or_64(long *p, long v) {
+; __sync_fetch_and_or(p, v);
+; return 0;
+;   }
+
+; CHECK-LABEL: test_load_add_32
+; CHECK: w0 = w2
+; CHECK: w0 = atomic_fetch_add((u32 *)(r1 + 0), w0)
+; CHECK: encoding: [0xc3,0x01,0x00,0x00,0x01,0x00,0x00,0x00]
+define dso_local i32 @test_load_add_32(i32* nocapture %p, i32 %v) local_unnamed_addr {
+entry:
+  %0 = atomicrmw add i32* %p, i32 %v seq_cst
+  ret i32 %0
+}
+
+; CHECK-LABEL: test_load_add_64
+; CHECK: r0 = r2
+; CHECK: r0 = atomic_fetch_add((u64 *)(r1 + 0), r0)
+; CHECK: encoding: [0xdb,0x01,0x00,0x00,0x01,0x00,0x00,0x00]
+define dso_local i32 @test_load_add_64(i64* nocapture %p, i64 %v) local_unnamed_addr {
+entry:
+  %0 = atomicrmw add i64* %p, i64 %v seq_cst
+  %conv = trunc i64 %0 to i32
+  ret i32 %conv
+}
+
+; CHECK-LABEL: test_load_sub_32
+; CHECK: w0 = w2
+; CHECK: w0 = -w0
+; CHECK: w0 = atomic_fetch_add((u32 *)(r1 + 0), w0)
+; CHECK: encoding: [0xc3,0x01,0x00,0x00,0x01,0x00,0x00,0x00]
+define dso_local i32 @test_load_sub_32(i32* nocapture %p, i32 %v) local_unnamed_addr {
+entry:
+  %0 = atomicrmw sub i32* %p, i32 %v seq_cst
+  ret i32 %0
+}
+
+; CHECK-LABEL: test_load_sub_64
+; CHECK: r0 = r2
+; CHECK: r0 = -r0
+; CHECK: r0 = atomic_fetch_add((u64 *)(r1 + 0), r0)
+; CHECK: encoding: [0xdb,0x01,0x00,0x00,0x01,0x00,0x00,0x00]
+define dso_local i32 @test_load_sub_64(i64* nocapture %p, i64 

[PATCH] D72184: [BPF] support atomic instructions

2020-12-01 Thread Alexei Starovoitov via Phabricator via cfe-commits
ast added inline comments.



Comment at: llvm/lib/Target/BPF/BPFMIChecking.cpp:199
+  unsigned newOpcode;
+  switch(MI.getOpcode()) {
+  case BPF::XFADDW32: newOpcode = BPF::XADDW32; break;

yonghong-song wrote:
> yonghong-song wrote:
> > ast wrote:
> > > With this logic in place Andrii has a point. There is no need for 
> > > -mcpu=v4 flag.
> > > The builtins will produce new insns and it will be up to kernel to accept 
> > > them or not.
> > will make the change (removing -mcpu=v4).
> There will be one user-level change:
>   . if user has "ret = __sync_fetch_and_add(p, r);", today's llvm will 
> generate a fatal error saying that invalid XADD since return value is used.
>   . but removing -mcpu=v4. the compilation will be successful and the error 
> will happen in kernel since atomic_fetch_add is not supported.
> 
> I guess this is an okay change, right?
right. With new llvm compilation will succeed, but it will fail to load on 
older kernel and will work as expected on newer kernel. There is a change where 
the error will be seen, but that's acceptable.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D72184/new/

https://reviews.llvm.org/D72184

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D72184: [BPF] support atomic instructions

2020-12-01 Thread Yonghong Song via Phabricator via cfe-commits
yonghong-song added inline comments.



Comment at: llvm/lib/Target/BPF/BPFMIChecking.cpp:199
+  unsigned newOpcode;
+  switch(MI.getOpcode()) {
+  case BPF::XFADDW32: newOpcode = BPF::XADDW32; break;

yonghong-song wrote:
> ast wrote:
> > With this logic in place Andrii has a point. There is no need for -mcpu=v4 
> > flag.
> > The builtins will produce new insns and it will be up to kernel to accept 
> > them or not.
> will make the change (removing -mcpu=v4).
There will be one user-level change:
  . if user has "ret = __sync_fetch_and_add(p, r);", today's llvm will generate 
a fatal error saying that invalid XADD since return value is used.
  . but removing -mcpu=v4. the compilation will be successful and the error 
will happen in kernel since atomic_fetch_add is not supported.

I guess this is an okay change, right?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D72184/new/

https://reviews.llvm.org/D72184

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D72184: [BPF] support atomic instructions

2020-12-01 Thread Yonghong Song via Phabricator via cfe-commits
yonghong-song added inline comments.



Comment at: llvm/lib/Target/BPF/BPFMIChecking.cpp:199
+  unsigned newOpcode;
+  switch(MI.getOpcode()) {
+  case BPF::XFADDW32: newOpcode = BPF::XADDW32; break;

ast wrote:
> With this logic in place Andrii has a point. There is no need for -mcpu=v4 
> flag.
> The builtins will produce new insns and it will be up to kernel to accept 
> them or not.
will make the change (removing -mcpu=v4).



Comment at: llvm/lib/Target/BPF/BPFMIChecking.cpp:207
+  case BPF::XFORW32: newOpcode = BPF::XORW32; break;
+  default: newOpcode = BPF::XORD; break;
+  }

ast wrote:
> The default is error prone. Why not to check for XFORD explicitly and default 
> to fatal_error?
Yes, we can do this although theoretically default fatal_error should never 
happen.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D72184/new/

https://reviews.llvm.org/D72184

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D72184: [BPF] support atomic instructions

2020-12-01 Thread Alexei Starovoitov via Phabricator via cfe-commits
ast added inline comments.



Comment at: llvm/lib/Target/BPF/BPFMIChecking.cpp:199
+  unsigned newOpcode;
+  switch(MI.getOpcode()) {
+  case BPF::XFADDW32: newOpcode = BPF::XADDW32; break;

With this logic in place Andrii has a point. There is no need for -mcpu=v4 flag.
The builtins will produce new insns and it will be up to kernel to accept them 
or not.



Comment at: llvm/lib/Target/BPF/BPFMIChecking.cpp:207
+  case BPF::XFORW32: newOpcode = BPF::XORW32; break;
+  default: newOpcode = BPF::XORD; break;
+  }

The default is error prone. Why not to check for XFORD explicitly and default 
to fatal_error?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D72184/new/

https://reviews.llvm.org/D72184

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D72184: [BPF] support atomic instructions

2020-11-30 Thread Yonghong Song via Phabricator via cfe-commits
yonghong-song updated this revision to Diff 308517.
yonghong-song edited the summary of this revision.
yonghong-song added a comment.

- remove atomic_fetch_sub which can be implemented with neg + atomic_fetch_add
- add support for xand, xor, xxor (xadd already been supported)
- for any given __sync_fetch_and_{add, and, or, xor}, llvm will generate either 
atomic_fetch_ or x depending on whether the return value is used or not.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D72184/new/

https://reviews.llvm.org/D72184

Files:
  clang/lib/Basic/Targets/BPF.cpp
  clang/test/Misc/target-invalid-cpu-note.c
  llvm/lib/Target/BPF/BPF.td
  llvm/lib/Target/BPF/BPFInstrFormats.td
  llvm/lib/Target/BPF/BPFInstrInfo.td
  llvm/lib/Target/BPF/BPFMIChecking.cpp
  llvm/lib/Target/BPF/BPFSubtarget.cpp
  llvm/lib/Target/BPF/BPFSubtarget.h
  llvm/lib/Target/BPF/Disassembler/BPFDisassembler.cpp
  llvm/lib/Target/BPF/MCTargetDesc/BPFMCCodeEmitter.cpp
  llvm/test/CodeGen/BPF/atomics.ll
  llvm/test/CodeGen/BPF/atomics_2.ll

Index: llvm/test/CodeGen/BPF/atomics_2.ll
===
--- /dev/null
+++ llvm/test/CodeGen/BPF/atomics_2.ll
@@ -0,0 +1,254 @@
+; RUN: llc < %s -march=bpfel -verify-machineinstrs -show-mc-encoding -mcpu=v4 | FileCheck %s
+;
+; Source:
+;   int test_load_add_32(int *p, int v) {
+; return __sync_fetch_and_add(p, v);
+;   }
+;   int test_load_add_64(long *p, long v) {
+; return __sync_fetch_and_add(p, v);
+;   }
+;   int test_load_sub_32(int *p, int v) {
+; return __sync_fetch_and_sub(p, v);
+;   }
+;   int test_load_sub_64(long *p, long v) {
+; return __sync_fetch_and_sub(p, v);
+;   }
+;   // from https://gcc.gnu.org/onlinedocs/gcc-4.1.1/gcc/Atomic-Builtins.html
+;   // __sync_lock_test_and_set() actually does atomic xchg and returns
+;   // old contents.
+;   int test_xchg_32(int *p, int v) {
+; return __sync_lock_test_and_set(p, v);
+;   }
+;   int test_xchg_64(long *p, long v) {
+; return __sync_lock_test_and_set(p, v);
+;   }
+;   int test_cas_32(int *p, int old, int new) {
+; return __sync_val_compare_and_swap(p, old, new);
+;   }
+;   long test_cas_64(long *p, long old, long new) {
+; return __sync_val_compare_and_swap(p, old, new);
+;   }
+;   int test_load_and_32(int *p, int v) {
+; return __sync_fetch_and_and(p, v);
+;   }
+;   int test_load_and_64(long *p, long v) {
+; return __sync_fetch_and_and(p, v);
+;   }
+;   int test_load_or_32(int *p, int v) {
+; return __sync_fetch_and_or(p, v);
+;   }
+;   int test_load_or_64(long *p, long v) {
+; return __sync_fetch_and_or(p, v);
+;   }
+;   int test_load_xor_32(int *p, int v) {
+; return __sync_fetch_and_xor(p, v);
+;   }
+;   int test_load_xor_64(long *p, long v) {
+; return __sync_fetch_and_xor(p, v);
+;   }
+;   int test_atomic_xor_32(int *p, int v) {
+; __sync_fetch_and_xor(p, v);
+; return 0;
+;   }
+;   int test_atomic_xor_64(long *p, long v) {
+; __sync_fetch_and_xor(p, v);
+; return 0;
+;   }
+;   int test_atomic_and_64(long *p, long v) {
+; __sync_fetch_and_and(p, v);
+; return 0;
+;   }
+;   int test_atomic_or_64(long *p, long v) {
+; __sync_fetch_and_or(p, v);
+; return 0;
+;   }
+
+; CHECK-LABEL: test_load_add_32
+; CHECK: w0 = w2
+; CHECK: w0 = atomic_fetch_add((u32 *)(r1 + 0), w0)
+; CHECK: encoding: [0xc3,0x01,0x00,0x00,0x01,0x00,0x00,0x00]
+define dso_local i32 @test_load_add_32(i32* nocapture %p, i32 %v) local_unnamed_addr {
+entry:
+  %0 = atomicrmw add i32* %p, i32 %v seq_cst
+  ret i32 %0
+}
+
+; CHECK-LABEL: test_load_add_64
+; CHECK: r0 = r2
+; CHECK: r0 = atomic_fetch_add((u64 *)(r1 + 0), r0)
+; CHECK: encoding: [0xdb,0x01,0x00,0x00,0x01,0x00,0x00,0x00]
+define dso_local i32 @test_load_add_64(i64* nocapture %p, i64 %v) local_unnamed_addr {
+entry:
+  %0 = atomicrmw add i64* %p, i64 %v seq_cst
+  %conv = trunc i64 %0 to i32
+  ret i32 %conv
+}
+
+; CHECK-LABEL: test_load_sub_32
+; CHECK: w0 = w2
+; CHECK: w0 = -w0
+; CHECK: w0 = atomic_fetch_add((u32 *)(r1 + 0), w0)
+; CHECK: encoding: [0xc3,0x01,0x00,0x00,0x01,0x00,0x00,0x00]
+define dso_local i32 @test_load_sub_32(i32* nocapture %p, i32 %v) local_unnamed_addr {
+entry:
+  %0 = atomicrmw sub i32* %p, i32 %v seq_cst
+  ret i32 %0
+}
+
+; CHECK-LABEL: test_load_sub_64
+; CHECK: r0 = r2
+; CHECK: r0 = -r0
+; CHECK: r0 = atomic_fetch_add((u64 *)(r1 + 0), r0)
+; CHECK: encoding: [0xdb,0x01,0x00,0x00,0x01,0x00,0x00,0x00]
+define dso_local i32 @test_load_sub_64(i64* nocapture %p, i64 %v) local_unnamed_addr {
+entry:
+  %0 = atomicrmw sub i64* %p, i64 %v seq_cst
+  %conv = trunc i64 %0 to i32
+  ret i32 %conv
+}
+
+; CHECK-LABEL: test_xchg_32
+; CHECK: w0 = w2
+; CHECK: w0 = xchg32_32(r1 + 0, w0)
+; CHECK: encoding: [0xc3,0x01,0x00,0x00,0xe1,0x00,0x00,0x00]
+define dso_local i32 @test_xchg_32(i32* nocapture %p, i32 %v) local_unnamed_addr {
+entry:
+  %0 = atomicrmw xchg i32* %p, i32 %v seq_cst
+  ret 

[PATCH] D72184: [BPF] support atomic instructions

2020-11-23 Thread Brendan Jackman via Phabricator via cfe-commits
jackmanb added a comment.

> I did not see kernel has atomic_store, do you mean atomic_set?

Sorry yep I meant `atomic_set`

> Do you suggest we also implement atomic_set? There is no need for 64-bit 
> architecture like x64, right?

Yeah actually now I think about it, `atomic_set` is pretty pointless, I think 
it's only there in the kernel to support strong type-checking of `atomic_t`; It 
doesn't imply any barriers.

I think we can do without it, it makes more sense to implement alongside 
barrier instructions.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D72184/new/

https://reviews.llvm.org/D72184

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D72184: [BPF] support atomic instructions

2020-11-23 Thread Yonghong Song via Phabricator via cfe-commits
yonghong-song added a comment.

Ya, the above llvm crash is expected as bpf backend does not handle AtomicStore.

For kernel code, I can see:

  kvm/x86.c:  vcpu->arch.nmi_pending += atomic_xchg(>arch.nmi_queued, 
0);

...

  kvm/x86.c:  atomic_set(_guest_has_master_clock, 1);

So for atomic_set we do not return a value, right?

I did not see kernel has atomic_store, do you mean atomic_set?

Do you suggest we also implement atomic_set? There is no need for 64-bit 
architecture like x64, right?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D72184/new/

https://reviews.llvm.org/D72184

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D72184: [BPF] support atomic instructions

2020-11-23 Thread Brendan Jackman via Phabricator via cfe-commits
jackmanb added a comment.

BTW to investigate my previous comment I tried compiling this code:

  // SPDX-License-Identifier: GPL-2.0
  #include 
  #include 
  #include 
  
  __u64 add64_value = 1;
  __u64 add64_result;
  __u32 add32_value = 1;
  __u32 add32_result;
  __u64 add_stack_value_copy;
  __u64 add_stack_result;
  SEC("fentry/bpf_fentry_test1")
  int BPF_PROG(add, int a)
  {
  __u64 add_stack_value = 1;
  
  add64_result = __sync_fetch_and_add(_value, 2);
  add32_result = __sync_fetch_and_add(_value, 2);
  add_stack_result = __sync_fetch_and_add(_stack_value, 2);
  add_stack_value_copy = add_stack_value;
  
  return 0;
  }
  
  __u64 cmpxchg64_value = 1;
  __u64 cmpxchg64_result_fail;
  __u64 cmpxchg64_result_succeed;
  __u32 cmpxchg32_value = 1;
  __u32 cmpxchg32_result_fail;
  __u32 cmpxchg32_result_succeed;
  SEC("fentry/bpf_fentry_test1")
  int BPF_PROG(cmpxchg, int a)
  {
  cmpxchg64_result_fail = __sync_val_compare_and_swap(
  _value, 0, 3);
  cmpxchg64_result_succeed = __sync_val_compare_and_swap(
  _value, 1, 2);
  
  cmpxchg32_result_fail = __sync_val_compare_and_swap(
  _value, 0, 3);
  cmpxchg32_result_succeed = __sync_val_compare_and_swap(
  _value, 1, 2);
  
  return 0;
  }
  
  __u64 xchg64_value = 1;
  __u64 xchg64_result;
  __u32 xchg32_value = 1;
  __u32 xchg32_result;
  SEC("fentry/bpf_fentry_test1")
  int BPF_PROG(xchg, int a)
  {
  __u64 val64 = 2;
  __u32 val32 = 2;
  
  __atomic_exchange(_value, , _result, 
__ATOMIC_RELAXED);
  __atomic_exchange(_value, , _result, 
__ATOMIC_RELAXED);
  __atomic_store(_result, , __ATOMIC_SEQ_CST);
  
  return 0;
  }

(By modifying  this code 

 to add the `__atomic_store` and then running `make -C 
~/src/linux/linux/tools/testing/selftests/bpf -j test_progs` from the base of 
the kernel tree)

And got a crash:

  $ make -C ~/src/linux/linux/tools/testing/selftests/bpf -j test_progs  
  make: Entering directory 
'/usr/local/google/home/jackmanb/src/linux/linux/tools/testing/selftests/bpf'
CLNG-LLC [test_maps] atomics_test.o
  LLVM ERROR: Cannot select: 0x5638e9444528: ch = AtomicStore<(store seq_cst 8 
into @xchg64_result)> 0x5638e94445f8, 0x5638e9444660, Constant:i64<2>, 
progs/atomics_test.c:59:2 @[ progs/atomics_test.c:52:5 ]
0x5638e9444660: i64 = BPFISD::Wrapper TargetGlobalAddress:i64 0, progs/atomics_test.c:57:2 @[ progs/atomics_test.c:52:5 ]
  0x5638e9444b40: i64 = TargetGlobalAddress 0, 
progs/atomics_test.c:57:2 @[ progs/atomics_test.c:52:5 ]
0x5638e94628c0: i64 = Constant<2>
  In function: xchg
  PLEASE submit a bug report to https://bugs.llvm.org/ and include the crash 
backtrace.
  Stack dump:
  0.  Program arguments: llc -mattr=dwarfris -march=bpf -mcpu=v4 
-mattr=+alu32 -filetype=obj -o 
/usr/local/google/home/jackmanb/src/linux/linux/tools/testing/selftests/bpf/atomics_test.o
  1.  Running pass 'Function Pass Manager' on module ''.
  2.  Running pass 'BPF DAG->DAG Pattern Instruction Selection' on function 
'@xchg'
   #0 0x5638e543ae1d llvm::sys::PrintStackTrace(llvm::raw_ostream&, int) 
(/usr/local/bin/llc+0x26f4e1d)
   #1 0x5638e5438c14 llvm::sys::RunSignalHandlers() 
(/usr/local/bin/llc+0x26f2c14)
   #2 0x5638e5438d70 SignalHandler(int) (/usr/local/bin/llc+0x26f2d70)
   #3 0x7ff91703b140 __restore_rt 
(/lib/x86_64-linux-gnu/libpthread.so.0+0x14140)
   #4 0x7ff916b1edb1 raise ./signal/../sysdeps/unix/sysv/linux/raise.c:51:1
   #5 0x7ff916b08537 abort ./stdlib/abort.c:81:7
   #6 0x5638e53b393c llvm::report_fatal_error(llvm::Twine const&, bool) 
(/usr/local/bin/llc+0x266d93c)
   #7 0x5638e53b3a6e (/usr/local/bin/llc+0x266da6e)
   #8 0x5638e527bac0 llvm::SelectionDAGISel::CannotYetSelect(llvm::SDNode*) 
(/usr/local/bin/llc+0x2535ac0)
   #9 0x5638e527c8b1 
llvm::SelectionDAGISel::SelectCodeCommon(llvm::SDNode*, unsigned char const*, 
unsigned int) (/usr/local/bin/llc+0x25368b1)
  #10 0x5638e527a5d7 llvm::SelectionDAGISel::DoInstructionSelection() 
(/usr/local/bin/llc+0x25345d7)
  #11 0x5638e52833f5 llvm::SelectionDAGISel::CodeGenAndEmitDAG() 
(/usr/local/bin/llc+0x253d3f5)
  #12 0x5638e5287471 
llvm::SelectionDAGISel::SelectAllBasicBlocks(llvm::Function const&) 
(/usr/local/bin/llc+0x2541471)
  #13 0x5638e5289dbc 
llvm::SelectionDAGISel::runOnMachineFunction(llvm::MachineFunction&) (.part.0) 
(/usr/local/bin/llc+0x2543dbc)
  #14 0x5638e491a5d4 
llvm::MachineFunctionPass::runOnFunction(llvm::Function&) 
(/usr/local/bin/llc+0x1bd45d4)
  #15 0x5638e4d0a050 llvm::FPPassManager::runOnFunction(llvm::Function&) 
(/usr/local/bin/llc+0x1fc4050)
  #16 0x5638e4d0b5dc 

[PATCH] D72184: [BPF] support atomic instructions

2020-11-23 Thread Brendan Jackman via Phabricator via cfe-commits
jackmanb added a comment.

I thought a little more about something I was saying in the office hours.

I'm pretty sure GCC's `__atomic_store(, , order)` should fail to compile 
for anything other than `order=__ATOMIC_RELAXED`, since with the current kernel 
patchset we have `BPF_SET` (which is called `BPF_XCHG` in this LLVM patch) 
implemented with the kernel's `atomic_store`, which _doesn't imply a barrier_.

One alternative solution might be just to scrap `BPF_SET` without `BPF_FETCH` 
(`BPF_SET | BPF_FETCH` is implemented with `atomic_xchg` which _does_ imply a 
barrier IIUC).  As we discussed in the office hours, `BPF_CMPSET` (called 
`BPF_CMPXCHG` in this LLVM patch) won't be supported without `BPF_FETCH`


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D72184/new/

https://reviews.llvm.org/D72184

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D72184: [BPF] support atomic instructions

2020-11-19 Thread Yonghong Song via Phabricator via cfe-commits
yonghong-song updated this revision to Diff 306598.
yonghong-song added a comment.

add some comments in test w.r.t. __sync_lock_test_and_set()


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D72184/new/

https://reviews.llvm.org/D72184

Files:
  clang/lib/Basic/Targets/BPF.cpp
  clang/test/Misc/target-invalid-cpu-note.c
  llvm/lib/Target/BPF/BPF.td
  llvm/lib/Target/BPF/BPFInstrFormats.td
  llvm/lib/Target/BPF/BPFInstrInfo.td
  llvm/lib/Target/BPF/BPFSubtarget.cpp
  llvm/lib/Target/BPF/BPFSubtarget.h
  llvm/lib/Target/BPF/Disassembler/BPFDisassembler.cpp
  llvm/lib/Target/BPF/MCTargetDesc/BPFMCCodeEmitter.cpp
  llvm/test/CodeGen/BPF/atomics.ll
  llvm/test/CodeGen/BPF/atomics_2.ll

Index: llvm/test/CodeGen/BPF/atomics_2.ll
===
--- /dev/null
+++ llvm/test/CodeGen/BPF/atomics_2.ll
@@ -0,0 +1,169 @@
+; RUN: llc < %s -march=bpfel -verify-machineinstrs -show-mc-encoding -mcpu=v4 | FileCheck %s
+;
+; Source:
+;   int test_load_sub_32(int *p, int v) {
+; return __sync_fetch_and_sub(p, v);
+;   }
+;   int test_load_sub_64(long *p, long v) {
+; return __sync_fetch_and_sub(p, v);
+;   }
+;   // from https://gcc.gnu.org/onlinedocs/gcc-4.1.1/gcc/Atomic-Builtins.html
+;   // __sync_lock_test_and_set() actually does atomic xchg and returns
+;   // old contents.
+;   int test_xchg_32(int *p, int v) {
+; return __sync_lock_test_and_set(p, v);
+;   }
+;   int test_xchg_64(long *p, long v) {
+; return __sync_lock_test_and_set(p, v);
+;   }
+;   int test_cas_32(int *p, int old, int new) {
+; return __sync_val_compare_and_swap(p, old, new);
+;   }
+;   long test_cas_64(long *p, long old, long new) {
+; return __sync_val_compare_and_swap(p, old, new);
+;   }
+;   int test_load_and_32(int *p, int v) {
+; return __sync_fetch_and_and(p, v);
+;   }
+;   int test_load_and_64(long *p, long v) {
+; return __sync_fetch_and_and(p, v);
+;   }
+;   int test_load_or_32(int *p, int v) {
+; return __sync_fetch_and_or(p, v);
+;   }
+;   int test_load_or_64(long *p, long v) {
+; return __sync_fetch_and_or(p, v);
+;   }
+;   int test_load_xor_32(int *p, int v) {
+; return __sync_fetch_and_xor(p, v);
+;   }
+;   int test_load_xor_64(long *p, long v) {
+; return __sync_fetch_and_xor(p, v);
+;   }
+
+; CHECK-LABEL: test_load_sub_32
+; CHECK: w0 = w2
+; CHECK: w0 = atomic_fetch_sub((u32 *)(r1 + 0), w0)
+; CHECK: encoding: [0xc3,0x01,0x00,0x00,0x11,0x00,0x00,0x00]
+define dso_local i32 @test_load_sub_32(i32* nocapture %p, i32 %v) local_unnamed_addr {
+entry:
+  %0 = atomicrmw sub i32* %p, i32 %v seq_cst
+  ret i32 %0
+}
+
+; CHECK-LABEL: test_load_sub_64
+; CHECK: r0 = r2
+; CHECK: r0 = atomic_fetch_sub((u64 *)(r1 + 0), r0)
+; CHECK: encoding: [0xdb,0x01,0x00,0x00,0x11,0x00,0x00,0x00]
+define dso_local i32 @test_load_sub_64(i64* nocapture %p, i64 %v) local_unnamed_addr {
+entry:
+  %0 = atomicrmw sub i64* %p, i64 %v seq_cst
+  %conv = trunc i64 %0 to i32
+  ret i32 %conv
+}
+
+; CHECK-LABEL: test_xchg_32
+; CHECK: w0 = w2
+; CHECK: w0 = xchg32_32(r1 + 0, w0)
+; CHECK: encoding: [0xc3,0x01,0x00,0x00,0xe1,0x00,0x00,0x00]
+define dso_local i32 @test_xchg_32(i32* nocapture %p, i32 %v) local_unnamed_addr {
+entry:
+  %0 = atomicrmw xchg i32* %p, i32 %v seq_cst
+  ret i32 %0
+}
+
+; CHECK-LABEL: test_xchg_64
+; CHECK: r0 = r2
+; CHECK: r0 = xchg_64(r1 + 0, r0)
+; CHECK: encoding: [0xdb,0x01,0x00,0x00,0xe1,0x00,0x00,0x00]
+define dso_local i32 @test_xchg_64(i64* nocapture %p, i64 %v) local_unnamed_addr {
+entry:
+  %0 = atomicrmw xchg i64* %p, i64 %v seq_cst
+  %conv = trunc i64 %0 to i32
+  ret i32 %conv
+}
+
+; CHECK-LABEL: test_cas_32
+; CHECK: w0 = w2
+; CHECK: w0 = cmpxchg32_32(r1 + 0, w0, w3)
+; CHECK: encoding: [0xc3,0x31,0x00,0x00,0xf1,0x00,0x00,0x00]
+define dso_local i32 @test_cas_32(i32* nocapture %p, i32 %old, i32 %new) local_unnamed_addr {
+entry:
+  %0 = cmpxchg i32* %p, i32 %old, i32 %new seq_cst seq_cst
+  %1 = extractvalue { i32, i1 } %0, 0
+  ret i32 %1
+}
+
+; CHECK-LABEL: test_cas_64
+; CHECK: r0 = r2
+; CHECK: r0 = cmpxchg_64(r1 + 0, r0, r3)
+; CHECK: encoding: [0xdb,0x31,0x00,0x00,0xf1,0x00,0x00,0x00]
+define dso_local i64 @test_cas_64(i64* nocapture %p, i64 %old, i64 %new) local_unnamed_addr {
+entry:
+  %0 = cmpxchg i64* %p, i64 %old, i64 %new seq_cst seq_cst
+  %1 = extractvalue { i64, i1 } %0, 0
+  ret i64 %1
+}
+
+; CHECK-LABEL: test_load_and_32
+; CHECK: w0 = w2
+; CHECK: w0 = atomic_fetch_and((u32 *)(r1 + 0), w0)
+; CHECK: encoding: [0xc3,0x01,0x00,0x00,0x51,0x00,0x00,0x00]
+define dso_local i32 @test_load_and_32(i32* nocapture %p, i32 %v) local_unnamed_addr {
+entry:
+  %0 = atomicrmw and i32* %p, i32 %v seq_cst
+  ret i32 %0
+}
+
+; CHECK-LABEL: test_load_and_64
+; CHECK: r0 = r2
+; CHECK: r0 = atomic_fetch_and((u64 *)(r1 + 0), r0)
+; CHECK: encoding: [0xdb,0x01,0x00,0x00,0x51,0x00,0x00,0x00]
+define dso_local i32 @test_load_and_64(i64* nocapture %p, i64 %v) 

[PATCH] D72184: [BPF] support atomic instructions

2020-11-19 Thread Yonghong Song via Phabricator via cfe-commits
yonghong-song added a comment.

okay, my fault. I must have uploaded an old version of the patch so comment is 
not in the test. will upload the new version tonight.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D72184/new/

https://reviews.llvm.org/D72184

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D72184: [BPF] support atomic instructions

2020-11-19 Thread Yonghong Song via Phabricator via cfe-commits
yonghong-song added a comment.

In D72184#2407264 , @ast wrote:

> Looks like the test didn't change. Only commit log... that's fine. I think 
> the diff is ready to land, but let's wait for the kernel patches to be ready 
> as well.

I added some comments in test... Yes, make sense to wait until we have at least 
a RFC for kernel patches we know the interface sort of works.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D72184/new/

https://reviews.llvm.org/D72184

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D72184: [BPF] support atomic instructions

2020-11-19 Thread Alexei Starovoitov via Phabricator via cfe-commits
ast added a comment.

Looks like the test didn't change. Only commit log... that's fine. I think the 
diff is ready to land, but let's wait for the kernel patches to be ready as 
well.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D72184/new/

https://reviews.llvm.org/D72184

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D72184: [BPF] support atomic instructions

2020-11-19 Thread Yonghong Song via Phabricator via cfe-commits
yonghong-song updated this revision to Diff 306592.
yonghong-song edited the summary of this revision.
yonghong-song added a comment.

add proper comment in test and clarify in commit message for 
__sync_lock_test_and_set which actually means an atomic exchange operation.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D72184/new/

https://reviews.llvm.org/D72184

Files:
  clang/lib/Basic/Targets/BPF.cpp
  clang/test/Misc/target-invalid-cpu-note.c
  llvm/lib/Target/BPF/BPF.td
  llvm/lib/Target/BPF/BPFInstrFormats.td
  llvm/lib/Target/BPF/BPFInstrInfo.td
  llvm/lib/Target/BPF/BPFSubtarget.cpp
  llvm/lib/Target/BPF/BPFSubtarget.h
  llvm/lib/Target/BPF/Disassembler/BPFDisassembler.cpp
  llvm/lib/Target/BPF/MCTargetDesc/BPFMCCodeEmitter.cpp
  llvm/test/CodeGen/BPF/atomics.ll
  llvm/test/CodeGen/BPF/atomics_2.ll

Index: llvm/test/CodeGen/BPF/atomics_2.ll
===
--- /dev/null
+++ llvm/test/CodeGen/BPF/atomics_2.ll
@@ -0,0 +1,166 @@
+; RUN: llc < %s -march=bpfel -verify-machineinstrs -show-mc-encoding -mcpu=v4 | FileCheck %s
+;
+; Source:
+;   int test_load_sub_32(int *p, int v) {
+; return __sync_fetch_and_sub(p, v);
+;   }
+;   int test_load_sub_64(long *p, long v) {
+; return __sync_fetch_and_sub(p, v);
+;   }
+;   int test_xchg_32(int *p, int v) {
+; return __sync_lock_test_and_set(p, v);
+;   }
+;   int test_xchg_64(long *p, long v) {
+; return __sync_lock_test_and_set(p, v);
+;   }
+;   int test_cas_32(int *p, int old, int new) {
+; return __sync_val_compare_and_swap(p, old, new);
+;   }
+;   long test_cas_64(long *p, long old, long new) {
+; return __sync_val_compare_and_swap(p, old, new);
+;   }
+;   int test_load_and_32(int *p, int v) {
+; return __sync_fetch_and_and(p, v);
+;   }
+;   int test_load_and_64(long *p, long v) {
+; return __sync_fetch_and_and(p, v);
+;   }
+;   int test_load_or_32(int *p, int v) {
+; return __sync_fetch_and_or(p, v);
+;   }
+;   int test_load_or_64(long *p, long v) {
+; return __sync_fetch_and_or(p, v);
+;   }
+;   int test_load_xor_32(int *p, int v) {
+; return __sync_fetch_and_xor(p, v);
+;   }
+;   int test_load_xor_64(long *p, long v) {
+; return __sync_fetch_and_xor(p, v);
+;   }
+
+; CHECK-LABEL: test_load_sub_32
+; CHECK: w0 = w2
+; CHECK: w0 = atomic_fetch_sub((u32 *)(r1 + 0), w0)
+; CHECK: encoding: [0xc3,0x01,0x00,0x00,0x11,0x00,0x00,0x00]
+define dso_local i32 @test_load_sub_32(i32* nocapture %p, i32 %v) local_unnamed_addr {
+entry:
+  %0 = atomicrmw sub i32* %p, i32 %v seq_cst
+  ret i32 %0
+}
+
+; CHECK-LABEL: test_load_sub_64
+; CHECK: r0 = r2
+; CHECK: r0 = atomic_fetch_sub((u64 *)(r1 + 0), r0)
+; CHECK: encoding: [0xdb,0x01,0x00,0x00,0x11,0x00,0x00,0x00]
+define dso_local i32 @test_load_sub_64(i64* nocapture %p, i64 %v) local_unnamed_addr {
+entry:
+  %0 = atomicrmw sub i64* %p, i64 %v seq_cst
+  %conv = trunc i64 %0 to i32
+  ret i32 %conv
+}
+
+; CHECK-LABEL: test_xchg_32
+; CHECK: w0 = w2
+; CHECK: w0 = xchg32_32(r1 + 0, w0)
+; CHECK: encoding: [0xc3,0x01,0x00,0x00,0xe1,0x00,0x00,0x00]
+define dso_local i32 @test_xchg_32(i32* nocapture %p, i32 %v) local_unnamed_addr {
+entry:
+  %0 = atomicrmw xchg i32* %p, i32 %v seq_cst
+  ret i32 %0
+}
+
+; CHECK-LABEL: test_xchg_64
+; CHECK: r0 = r2
+; CHECK: r0 = xchg_64(r1 + 0, r0)
+; CHECK: encoding: [0xdb,0x01,0x00,0x00,0xe1,0x00,0x00,0x00]
+define dso_local i32 @test_xchg_64(i64* nocapture %p, i64 %v) local_unnamed_addr {
+entry:
+  %0 = atomicrmw xchg i64* %p, i64 %v seq_cst
+  %conv = trunc i64 %0 to i32
+  ret i32 %conv
+}
+
+; CHECK-LABEL: test_cas_32
+; CHECK: w0 = w2
+; CHECK: w0 = cmpxchg32_32(r1 + 0, w0, w3)
+; CHECK: encoding: [0xc3,0x31,0x00,0x00,0xf1,0x00,0x00,0x00]
+define dso_local i32 @test_cas_32(i32* nocapture %p, i32 %old, i32 %new) local_unnamed_addr {
+entry:
+  %0 = cmpxchg i32* %p, i32 %old, i32 %new seq_cst seq_cst
+  %1 = extractvalue { i32, i1 } %0, 0
+  ret i32 %1
+}
+
+; CHECK-LABEL: test_cas_64
+; CHECK: r0 = r2
+; CHECK: r0 = cmpxchg_64(r1 + 0, r0, r3)
+; CHECK: encoding: [0xdb,0x31,0x00,0x00,0xf1,0x00,0x00,0x00]
+define dso_local i64 @test_cas_64(i64* nocapture %p, i64 %old, i64 %new) local_unnamed_addr {
+entry:
+  %0 = cmpxchg i64* %p, i64 %old, i64 %new seq_cst seq_cst
+  %1 = extractvalue { i64, i1 } %0, 0
+  ret i64 %1
+}
+
+; CHECK-LABEL: test_load_and_32
+; CHECK: w0 = w2
+; CHECK: w0 = atomic_fetch_and((u32 *)(r1 + 0), w0)
+; CHECK: encoding: [0xc3,0x01,0x00,0x00,0x51,0x00,0x00,0x00]
+define dso_local i32 @test_load_and_32(i32* nocapture %p, i32 %v) local_unnamed_addr {
+entry:
+  %0 = atomicrmw and i32* %p, i32 %v seq_cst
+  ret i32 %0
+}
+
+; CHECK-LABEL: test_load_and_64
+; CHECK: r0 = r2
+; CHECK: r0 = atomic_fetch_and((u64 *)(r1 + 0), r0)
+; CHECK: encoding: [0xdb,0x01,0x00,0x00,0x51,0x00,0x00,0x00]
+define dso_local i32 @test_load_and_64(i64* nocapture %p, i64 %v) local_unnamed_addr {
+entry:
+  %0 = atomicrmw and i64* 

[PATCH] D72184: [BPF] support atomic instructions

2020-11-19 Thread Alexei Starovoitov via Phabricator via cfe-commits
ast added inline comments.



Comment at: llvm/test/CodeGen/BPF/atomics_2.ll:14
+;   int test_xchg_64(long *p, long v) {
+; return __sync_lock_test_and_set(p, v);
+;   }

yonghong-song wrote:
> ast wrote:
> > test_and_set is not the same as xchg.
> > xchg doesn't do comparison.
> I am looking at here:
>   https://gcc.gnu.org/onlinedocs/gcc-4.1.1/gcc/Atomic-Builtins.html
> which mentions:
>  type __sync_lock_test_and_set (type *ptr, type value, ...)
> This builtin, as described by Intel, is not a traditional test-and-set 
> operation, but rather an atomic exchange operation. It writes value into 
> *ptr, and returns the previous contents of *ptr.
> 
> Many targets have only minimal support for such locks, and do not support 
> a full exchange operation. In this case, a target may support reduced 
> functionality here by which the only valid value to store is the immediate 
> constant 1. The exact value actually stored in *ptr is implementation defined.
> 
> This builtin is not a full barrier, but rather an acquire barrier. This 
> means that references after the builtin cannot move to (or be speculated to) 
> before the builtin, but previous memory stores may not be globally visible 
> yet, and previous memory loads may not yet be satisfied. 
> 
> So it does not do compare.
> 
> Or alternatively for llvm atomic builtin,
>   https://llvm.org/docs/Atomics.html
> 
> We have:
>   iN __atomic_load_N(iN *ptr, iN val, int ordering)
> void __atomic_store_N(iN *ptr, iN val, int ordering)
> iN __atomic_exchange_N(iN *ptr, iN val, int ordering)
> bool __atomic_compare_exchange_N(iN *ptr, iN *expected, iN desired, int 
> success_order, int failure_order)
> 
> But as I mentioned in bpf office hour meeting, a "ordering" is required and I 
> do not know how to deal with it.
Thanks for the info. gcc builtin has a misleading name.
Please mention this quirk in the tests comment and in the commit log.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D72184/new/

https://reviews.llvm.org/D72184

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D72184: [BPF] support atomic instructions

2020-11-19 Thread Yonghong Song via Phabricator via cfe-commits
yonghong-song added inline comments.



Comment at: llvm/test/CodeGen/BPF/atomics_2.ll:14
+;   int test_xchg_64(long *p, long v) {
+; return __sync_lock_test_and_set(p, v);
+;   }

ast wrote:
> test_and_set is not the same as xchg.
> xchg doesn't do comparison.
I am looking at here:
  https://gcc.gnu.org/onlinedocs/gcc-4.1.1/gcc/Atomic-Builtins.html
which mentions:
 type __sync_lock_test_and_set (type *ptr, type value, ...)
This builtin, as described by Intel, is not a traditional test-and-set 
operation, but rather an atomic exchange operation. It writes value into *ptr, 
and returns the previous contents of *ptr.

Many targets have only minimal support for such locks, and do not support a 
full exchange operation. In this case, a target may support reduced 
functionality here by which the only valid value to store is the immediate 
constant 1. The exact value actually stored in *ptr is implementation defined.

This builtin is not a full barrier, but rather an acquire barrier. This 
means that references after the builtin cannot move to (or be speculated to) 
before the builtin, but previous memory stores may not be globally visible yet, 
and previous memory loads may not yet be satisfied. 

So it does not do compare.

Or alternatively for llvm atomic builtin,
  https://llvm.org/docs/Atomics.html

We have:
  iN __atomic_load_N(iN *ptr, iN val, int ordering)
void __atomic_store_N(iN *ptr, iN val, int ordering)
iN __atomic_exchange_N(iN *ptr, iN val, int ordering)
bool __atomic_compare_exchange_N(iN *ptr, iN *expected, iN desired, int 
success_order, int failure_order)

But as I mentioned in bpf office hour meeting, a "ordering" is required and I 
do not know how to deal with it.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D72184/new/

https://reviews.llvm.org/D72184

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D72184: [BPF] support atomic instructions

2020-11-19 Thread Alexei Starovoitov via Phabricator via cfe-commits
ast added inline comments.



Comment at: llvm/test/CodeGen/BPF/atomics_2.ll:14
+;   int test_xchg_64(long *p, long v) {
+; return __sync_lock_test_and_set(p, v);
+;   }

test_and_set is not the same as xchg.
xchg doesn't do comparison.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D72184/new/

https://reviews.llvm.org/D72184

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D72184: [BPF] support atomic instructions

2020-11-19 Thread Yonghong Song via Phabricator via cfe-commits
yonghong-song updated this revision to Diff 306485.
yonghong-song edited the summary of this revision.
yonghong-song added a comment.

- remove all char/short atomic operations
- remove barrier instruction


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D72184/new/

https://reviews.llvm.org/D72184

Files:
  clang/lib/Basic/Targets/BPF.cpp
  clang/test/Misc/target-invalid-cpu-note.c
  llvm/lib/Target/BPF/BPF.td
  llvm/lib/Target/BPF/BPFInstrFormats.td
  llvm/lib/Target/BPF/BPFInstrInfo.td
  llvm/lib/Target/BPF/BPFSubtarget.cpp
  llvm/lib/Target/BPF/BPFSubtarget.h
  llvm/lib/Target/BPF/Disassembler/BPFDisassembler.cpp
  llvm/lib/Target/BPF/MCTargetDesc/BPFMCCodeEmitter.cpp
  llvm/test/CodeGen/BPF/atomics.ll
  llvm/test/CodeGen/BPF/atomics_2.ll

Index: llvm/test/CodeGen/BPF/atomics_2.ll
===
--- /dev/null
+++ llvm/test/CodeGen/BPF/atomics_2.ll
@@ -0,0 +1,166 @@
+; RUN: llc < %s -march=bpfel -verify-machineinstrs -show-mc-encoding -mcpu=v4 | FileCheck %s
+;
+; Source:
+;   int test_load_sub_32(int *p, int v) {
+; return __sync_fetch_and_sub(p, v);
+;   }
+;   int test_load_sub_64(long *p, long v) {
+; return __sync_fetch_and_sub(p, v);
+;   }
+;   int test_xchg_32(int *p, int v) {
+; return __sync_lock_test_and_set(p, v);
+;   }
+;   int test_xchg_64(long *p, long v) {
+; return __sync_lock_test_and_set(p, v);
+;   }
+;   int test_cas_32(int *p, int old, int new) {
+; return __sync_val_compare_and_swap(p, old, new);
+;   }
+;   long test_cas_64(long *p, long old, long new) {
+; return __sync_val_compare_and_swap(p, old, new);
+;   }
+;   int test_load_and_32(int *p, int v) {
+; return __sync_fetch_and_and(p, v);
+;   }
+;   int test_load_and_64(long *p, long v) {
+; return __sync_fetch_and_and(p, v);
+;   }
+;   int test_load_or_32(int *p, int v) {
+; return __sync_fetch_and_or(p, v);
+;   }
+;   int test_load_or_64(long *p, long v) {
+; return __sync_fetch_and_or(p, v);
+;   }
+;   int test_load_xor_32(int *p, int v) {
+; return __sync_fetch_and_xor(p, v);
+;   }
+;   int test_load_xor_64(long *p, long v) {
+; return __sync_fetch_and_xor(p, v);
+;   }
+
+; CHECK-LABEL: test_load_sub_32
+; CHECK: w0 = w2
+; CHECK: w0 = atomic_fetch_sub((u32 *)(r1 + 0), w0)
+; CHECK: encoding: [0xc3,0x01,0x00,0x00,0x11,0x00,0x00,0x00]
+define dso_local i32 @test_load_sub_32(i32* nocapture %p, i32 %v) local_unnamed_addr {
+entry:
+  %0 = atomicrmw sub i32* %p, i32 %v seq_cst
+  ret i32 %0
+}
+
+; CHECK-LABEL: test_load_sub_64
+; CHECK: r0 = r2
+; CHECK: r0 = atomic_fetch_sub((u64 *)(r1 + 0), r0)
+; CHECK: encoding: [0xdb,0x01,0x00,0x00,0x11,0x00,0x00,0x00]
+define dso_local i32 @test_load_sub_64(i64* nocapture %p, i64 %v) local_unnamed_addr {
+entry:
+  %0 = atomicrmw sub i64* %p, i64 %v seq_cst
+  %conv = trunc i64 %0 to i32
+  ret i32 %conv
+}
+
+; CHECK-LABEL: test_xchg_32
+; CHECK: w0 = w2
+; CHECK: w0 = xchg32_32(r1 + 0, w0)
+; CHECK: encoding: [0xc3,0x01,0x00,0x00,0xe1,0x00,0x00,0x00]
+define dso_local i32 @test_xchg_32(i32* nocapture %p, i32 %v) local_unnamed_addr {
+entry:
+  %0 = atomicrmw xchg i32* %p, i32 %v seq_cst
+  ret i32 %0
+}
+
+; CHECK-LABEL: test_xchg_64
+; CHECK: r0 = r2
+; CHECK: r0 = xchg_64(r1 + 0, r0)
+; CHECK: encoding: [0xdb,0x01,0x00,0x00,0xe1,0x00,0x00,0x00]
+define dso_local i32 @test_xchg_64(i64* nocapture %p, i64 %v) local_unnamed_addr {
+entry:
+  %0 = atomicrmw xchg i64* %p, i64 %v seq_cst
+  %conv = trunc i64 %0 to i32
+  ret i32 %conv
+}
+
+; CHECK-LABEL: test_cas_32
+; CHECK: w0 = w2
+; CHECK: w0 = cmpxchg32_32(r1 + 0, w0, w3)
+; CHECK: encoding: [0xc3,0x31,0x00,0x00,0xf1,0x00,0x00,0x00]
+define dso_local i32 @test_cas_32(i32* nocapture %p, i32 %old, i32 %new) local_unnamed_addr {
+entry:
+  %0 = cmpxchg i32* %p, i32 %old, i32 %new seq_cst seq_cst
+  %1 = extractvalue { i32, i1 } %0, 0
+  ret i32 %1
+}
+
+; CHECK-LABEL: test_cas_64
+; CHECK: r0 = r2
+; CHECK: r0 = cmpxchg_64(r1 + 0, r0, r3)
+; CHECK: encoding: [0xdb,0x31,0x00,0x00,0xf1,0x00,0x00,0x00]
+define dso_local i64 @test_cas_64(i64* nocapture %p, i64 %old, i64 %new) local_unnamed_addr {
+entry:
+  %0 = cmpxchg i64* %p, i64 %old, i64 %new seq_cst seq_cst
+  %1 = extractvalue { i64, i1 } %0, 0
+  ret i64 %1
+}
+
+; CHECK-LABEL: test_load_and_32
+; CHECK: w0 = w2
+; CHECK: w0 = atomic_fetch_and((u32 *)(r1 + 0), w0)
+; CHECK: encoding: [0xc3,0x01,0x00,0x00,0x51,0x00,0x00,0x00]
+define dso_local i32 @test_load_and_32(i32* nocapture %p, i32 %v) local_unnamed_addr {
+entry:
+  %0 = atomicrmw and i32* %p, i32 %v seq_cst
+  ret i32 %0
+}
+
+; CHECK-LABEL: test_load_and_64
+; CHECK: r0 = r2
+; CHECK: r0 = atomic_fetch_and((u64 *)(r1 + 0), r0)
+; CHECK: encoding: [0xdb,0x01,0x00,0x00,0x51,0x00,0x00,0x00]
+define dso_local i32 @test_load_and_64(i64* nocapture %p, i64 %v) local_unnamed_addr {
+entry:
+  %0 = atomicrmw and i64* %p, i64 %v seq_cst
+  %conv = trunc i64 %0 to i32
+  ret i32 %conv

[PATCH] D72184: [BPF] support atomic instructions

2020-11-19 Thread Yonghong Song via Phabricator via cfe-commits
yonghong-song added a comment.

Thanks. Somehow my build is successful. I kind of know what is the possible 
issue but wonder why I did not hit it.

> Can we please keep barriers out of scope? I think there's a lot of design to 
> be done there and I'd rather just get the core atomics working first.

The reason I put it here is to make *cpu=v4* roughly complete. Because if you 
adds barrier later we may need to add "cpu=v5", and I want to avoid that. But I 
agree this is a little bit overwhelming on you and let us discuss in bpf office 
hour how to proceed.

For the time being, alternatively, you can remove the following code
+let hasSideEffects = 1, mayLoad = 1, mayStore = 1 in {
+  def SYNC : TYPE_LD_ST {
+let Inst{7-4} = BPF_BARRIER.Value;
+let BPFClass = BPF_STX;
+  }
+}
+
+def : Pat<(atomic_fence (timm), (timm)), (SYNC)>;
to workaround compilation issue.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D72184/new/

https://reviews.llvm.org/D72184

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D72184: [BPF] support atomic instructions

2020-11-19 Thread Brendan Jackman via Phabricator via cfe-commits
jackmanb added a comment.

Can we please keep barriers out of scope? I think there's a lot of design to be 
done there and I'd rather just get the core atomics working first.

BTW I got

  [ 31%] Building LanaiGenDAGISel.inc...
   
  Included from 
/usr/local/google/home/jackmanb/src/llvm-project/llvm/lib/Target/BPF/BPF.td:13: 
   
  
/usr/local/google/home/jackmanb/src/llvm-project/llvm/lib/Target/BPF/BPFInstrInfo.td:677:1:
 error: Pattern doesn't match mayStore = 1
  def : Pat<(atomic_fence (timm), (timm)), (SYNC)>; 
   
  ^ 
   
  error: pattern conflicts  
   

But removing line 677 fixes it.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D72184/new/

https://reviews.llvm.org/D72184

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D72184: [BPF] support atomic instructions

2020-11-18 Thread Yonghong Song via Phabricator via cfe-commits
yonghong-song added inline comments.



Comment at: llvm/lib/Target/BPF/BPFInstrInfo.td:783
+  let Inst{47-32} = addr{15-0}; // offset
+  let Inst{11-8} = new;
+  let Inst{7-4} = BPF_CMPXCHG.Value;

ast wrote:
> yonghong-song wrote:
> > jackmanb wrote:
> > > If we go down the route of matching operands with x86 as you have done 
> > > for `XFALU64` and `XCHG`, I think  we should also do it for cmpxchg.
> > > 
> > > IIUC this is `dst = atomic_cmpxchg(*(src + off), r0, new);` 
> > > 
> > > But to do it in a single x86 instruction we need to have only 2 operands 
> > > + the hard-coded r0. `r0 = atomic_xmpxchg(*(dst + off), r0, src);` would 
> > > seem most natural to me.
> > We can do this:
> >   r0 = atomic_xmpxchg(*(dst + off), r0, src);
> I'm confused. Isn't that what that is already?
> r0 = atomic_cmpxchg(*(dst + off), r0, src);
> 
> In "class CMPXCHG":
> let Inst{51-48} = addr{19-16}; // this is 'dst_reg' in bpf_insn.
> let Inst{55-52} = new; // this is 'src_reg' in bpf_insn.
> 
> Same as old xadd and new fetch_add insns.
> What am I missing?
I just fixed to return R0/W0 in the early afternoon. So you looked at old 
comments with new code and that is why it is confusion :-)


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D72184/new/

https://reviews.llvm.org/D72184

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D72184: [BPF] support atomic instructions

2020-11-18 Thread Alexei Starovoitov via Phabricator via cfe-commits
ast added inline comments.



Comment at: llvm/lib/Target/BPF/BPFInstrInfo.td:783
+  let Inst{47-32} = addr{15-0}; // offset
+  let Inst{11-8} = new;
+  let Inst{7-4} = BPF_CMPXCHG.Value;

yonghong-song wrote:
> jackmanb wrote:
> > If we go down the route of matching operands with x86 as you have done for 
> > `XFALU64` and `XCHG`, I think  we should also do it for cmpxchg.
> > 
> > IIUC this is `dst = atomic_cmpxchg(*(src + off), r0, new);` 
> > 
> > But to do it in a single x86 instruction we need to have only 2 operands + 
> > the hard-coded r0. `r0 = atomic_xmpxchg(*(dst + off), r0, src);` would seem 
> > most natural to me.
> We can do this:
>   r0 = atomic_xmpxchg(*(dst + off), r0, src);
I'm confused. Isn't that what that is already?
r0 = atomic_cmpxchg(*(dst + off), r0, src);

In "class CMPXCHG":
let Inst{51-48} = addr{19-16}; // this is 'dst_reg' in bpf_insn.
let Inst{55-52} = new; // this is 'src_reg' in bpf_insn.

Same as old xadd and new fetch_add insns.
What am I missing?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D72184/new/

https://reviews.llvm.org/D72184

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D72184: [BPF] support atomic instructions

2020-11-18 Thread Yonghong Song via Phabricator via cfe-commits
yonghong-song updated this revision to Diff 306172.
yonghong-song edited the summary of this revision.
yonghong-song added a comment.

add support for a barrier function. The correspond C code is 
__sync_synchronize().
If we want to have variant like barrier for read/write/rw, etc. we may need to 
define bpf specific barrier functions as __sync_synchronize() does not allow 
taking additional parameters. I tentatively use one particular encoding but we 
can certainly change that later.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D72184/new/

https://reviews.llvm.org/D72184

Files:
  clang/lib/Basic/Targets/BPF.cpp
  clang/test/Misc/target-invalid-cpu-note.c
  llvm/lib/Target/BPF/BPF.td
  llvm/lib/Target/BPF/BPFInstrFormats.td
  llvm/lib/Target/BPF/BPFInstrInfo.td
  llvm/lib/Target/BPF/BPFSubtarget.cpp
  llvm/lib/Target/BPF/BPFSubtarget.h
  llvm/lib/Target/BPF/Disassembler/BPFDisassembler.cpp
  llvm/lib/Target/BPF/MCTargetDesc/BPFMCCodeEmitter.cpp
  llvm/test/CodeGen/BPF/atomics.ll
  llvm/test/CodeGen/BPF/atomics_2.ll

Index: llvm/test/CodeGen/BPF/atomics_2.ll
===
--- /dev/null
+++ llvm/test/CodeGen/BPF/atomics_2.ll
@@ -0,0 +1,242 @@
+; RUN: llc < %s -march=bpfel -verify-machineinstrs -show-mc-encoding -mcpu=v4 | FileCheck %s
+;
+; Source:
+;   char test_load_sub_8(char *p, char v) {
+; return __sync_fetch_and_sub(p, v);
+;   }
+;   short test_load_sub_16(short *p, short v) {
+; return __sync_fetch_and_sub(p, v);
+;   }
+;   int test_load_sub_32(int *p, int v) {
+; return __sync_fetch_and_sub(p, v);
+;   }
+;   int test_load_sub_64(long *p, long v) {
+; return __sync_fetch_and_sub(p, v);
+;   }
+;   int test_xchg_8(char *p, char v) {
+; return __sync_lock_test_and_set(p, v);
+;   }
+;   int test_xchg_16(short *p, short v) {
+; return __sync_lock_test_and_set(p, v);
+;   }
+;   int test_xchg_32(int *p, int v) {
+; return __sync_lock_test_and_set(p, v);
+;   }
+;   int test_xchg_64(long *p, long v) {
+; return __sync_lock_test_and_set(p, v);
+;   }
+;   int test_cas_32(int *p, int old, int new) {
+; return __sync_val_compare_and_swap(p, old, new);
+;   }
+;   long test_cas_64(long *p, long old, long new) {
+; return __sync_val_compare_and_swap(p, old, new);
+;   }
+;   int test_load_and_32(int *p, int v) {
+; return __sync_fetch_and_and(p, v);
+;   }
+;   int test_load_and_64(long *p, long v) {
+; return __sync_fetch_and_and(p, v);
+;   }
+;   int test_load_or_32(int *p, int v) {
+; return __sync_fetch_and_or(p, v);
+;   }
+;   int test_load_or_64(long *p, long v) {
+; return __sync_fetch_and_or(p, v);
+;   }
+;   int test_load_xor_32(int *p, int v) {
+; return __sync_fetch_and_xor(p, v);
+;   }
+;   int test_load_xor_64(long *p, long v) {
+; return __sync_fetch_and_xor(p, v);
+;   }
+;   int test_sync(int *p, int *q)
+;   {
+; *p = 1;
+; __sync_synchronize();
+; *q = 2;
+; return 0;
+;   }
+
+; CHECK-LABEL: test_load_sub_8
+; CHECK: w0 = w2
+; CHECK: w0 = atomic_fetch_sub((u8 *)(r1 + 0), w0)
+; CHECK: encoding: [0xd3,0x01,0x00,0x00,0x11,0x00,0x00,0x00]
+define dso_local signext i8 @test_load_sub_8(i8* nocapture %p, i8 signext %v) local_unnamed_addr #0 {
+entry:
+  %0 = atomicrmw sub i8* %p, i8 %v seq_cst
+  ret i8 %0
+}
+
+; CHECK-LABEL: test_load_sub_16
+; CHECK: w0 = w2
+; CHECK: w0 = atomic_fetch_sub((u16 *)(r1 + 0), w0)
+; CHECK: encoding: [0xcb,0x01,0x00,0x00,0x11,0x00,0x00,0x00]
+define dso_local signext i16 @test_load_sub_16(i16* nocapture %p, i16 signext %v) local_unnamed_addr #0 {
+entry:
+  %0 = atomicrmw sub i16* %p, i16 %v seq_cst
+  ret i16 %0
+}
+
+; CHECK-LABEL: test_load_sub_32
+; CHECK: w0 = w2
+; CHECK: w0 = atomic_fetch_sub((u32 *)(r1 + 0), w0)
+; CHECK: encoding: [0xc3,0x01,0x00,0x00,0x11,0x00,0x00,0x00]
+define dso_local i32 @test_load_sub_32(i32* nocapture %p, i32 %v) local_unnamed_addr {
+entry:
+  %0 = atomicrmw sub i32* %p, i32 %v seq_cst
+  ret i32 %0
+}
+
+; CHECK-LABEL: test_load_sub_64
+; CHECK: r0 = r2
+; CHECK: r0 = atomic_fetch_sub((u64 *)(r1 + 0), r0)
+; CHECK: encoding: [0xdb,0x01,0x00,0x00,0x11,0x00,0x00,0x00]
+define dso_local i32 @test_load_sub_64(i64* nocapture %p, i64 %v) local_unnamed_addr {
+entry:
+  %0 = atomicrmw sub i64* %p, i64 %v seq_cst
+  %conv = trunc i64 %0 to i32
+  ret i32 %conv
+}
+
+; CHECK-LABEL: test_xchg_8
+; CHECK: w0 = w2
+; CHECK: w0 = xchg32_8(r1 + 0, w0)
+; CHECK: encoding: [0xd3,0x01,0x00,0x00,0xe1,0x00,0x00,0x00]
+define dso_local i32 @test_xchg_8(i8* nocapture %p, i8 signext %v) local_unnamed_addr {
+entry:
+  %0 = atomicrmw xchg i8* %p, i8 %v seq_cst
+  %conv = sext i8 %0 to i32
+  ret i32 %conv
+}
+
+; CHECK-LABEL: test_xchg_16
+; CHECK: w0 = w2
+; CHECK: w0 = xchg32_16(r1 + 0, w0)
+; CHECK: encoding: [0xcb,0x01,0x00,0x00,0xe1,0x00,0x00,0x00]
+define dso_local i32 @test_xchg_16(i16* nocapture %p, i16 signext %v) local_unnamed_addr {
+entry:
+  %0 = 

[PATCH] D72184: [BPF] support atomic instructions

2020-11-18 Thread Yonghong Song via Phabricator via cfe-commits
yonghong-song updated this revision to Diff 306146.
yonghong-song edited the summary of this revision.
yonghong-song added a comment.

add fetch_and_{and,or,xor} support
force cmpxchg insn return results in r0 like r0 = cmpxchg(addr, r0, new)


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D72184/new/

https://reviews.llvm.org/D72184

Files:
  clang/lib/Basic/Targets/BPF.cpp
  clang/test/Misc/target-invalid-cpu-note.c
  llvm/lib/Target/BPF/BPF.td
  llvm/lib/Target/BPF/BPFInstrFormats.td
  llvm/lib/Target/BPF/BPFInstrInfo.td
  llvm/lib/Target/BPF/BPFSubtarget.cpp
  llvm/lib/Target/BPF/BPFSubtarget.h
  llvm/lib/Target/BPF/Disassembler/BPFDisassembler.cpp
  llvm/lib/Target/BPF/MCTargetDesc/BPFMCCodeEmitter.cpp
  llvm/test/CodeGen/BPF/atomics.ll
  llvm/test/CodeGen/BPF/atomics_2.ll

Index: llvm/test/CodeGen/BPF/atomics_2.ll
===
--- /dev/null
+++ llvm/test/CodeGen/BPF/atomics_2.ll
@@ -0,0 +1,202 @@
+; RUN: llc < %s -march=bpfel -verify-machineinstrs -show-mc-encoding -mcpu=v4 | FileCheck %s
+;
+; Source:
+;   char test_load_sub_8(char *p, char v) {
+; return __sync_fetch_and_sub(p, v);
+;   }
+;   short test_load_sub_16(short *p, short v) {
+; return __sync_fetch_and_sub(p, v);
+;   }
+;   int test_load_sub_32(int *p, int v) {
+; return __sync_fetch_and_sub(p, v);
+;   }
+;   int test_load_sub_64(long *p, long v) {
+; return __sync_fetch_and_sub(p, v);
+;   }
+;   int test_xchg_8(char *p, char v) {
+; return __sync_lock_test_and_set(p, v);
+;   }
+;   int test_xchg_16(short *p, short v) {
+; return __sync_lock_test_and_set(p, v);
+;   }
+;   int test_xchg_32(int *p, int v) {
+; return __sync_lock_test_and_set(p, v);
+;   }
+;   int test_xchg_64(long *p, long v) {
+; return __sync_lock_test_and_set(p, v);
+;   }
+;   int test_cas_32(int *p, int old, int new) {
+; return __sync_val_compare_and_swap(p, old, new);
+;   }
+;   long test_cas_64(long *p, long old, long new) {
+; return __sync_val_compare_and_swap(p, old, new);
+;   }
+
+; CHECK-LABEL: test_load_sub_8
+; CHECK: w0 = w2
+; CHECK: w0 = atomic_fetch_sub((u8 *)(r1 + 0), w0)
+; CHECK: encoding: [0xd3,0x01,0x00,0x00,0x11,0x00,0x00,0x00]
+define dso_local signext i8 @test_load_sub_8(i8* nocapture %p, i8 signext %v) local_unnamed_addr #0 {
+entry:
+  %0 = atomicrmw sub i8* %p, i8 %v seq_cst
+  ret i8 %0
+}
+
+; CHECK-LABEL: test_load_sub_16
+; CHECK: w0 = w2
+; CHECK: w0 = atomic_fetch_sub((u16 *)(r1 + 0), w0)
+; CHECK: encoding: [0xcb,0x01,0x00,0x00,0x11,0x00,0x00,0x00]
+define dso_local signext i16 @test_load_sub_16(i16* nocapture %p, i16 signext %v) local_unnamed_addr #0 {
+entry:
+  %0 = atomicrmw sub i16* %p, i16 %v seq_cst
+  ret i16 %0
+}
+
+; CHECK-LABEL: test_load_sub_32
+; CHECK: w0 = w2
+; CHECK: w0 = atomic_fetch_sub((u32 *)(r1 + 0), w0)
+; CHECK: encoding: [0xc3,0x01,0x00,0x00,0x11,0x00,0x00,0x00]
+define dso_local i32 @test_load_sub_32(i32* nocapture %p, i32 %v) local_unnamed_addr {
+entry:
+  %0 = atomicrmw sub i32* %p, i32 %v seq_cst
+  ret i32 %0
+}
+
+; CHECK-LABEL: test_load_sub_64
+; CHECK: r0 = r2
+; CHECK: r0 = atomic_fetch_sub((u64 *)(r1 + 0), r0)
+; CHECK: encoding: [0xdb,0x01,0x00,0x00,0x11,0x00,0x00,0x00]
+define dso_local i32 @test_load_sub_64(i64* nocapture %p, i64 %v) local_unnamed_addr {
+entry:
+  %0 = atomicrmw sub i64* %p, i64 %v seq_cst
+  %conv = trunc i64 %0 to i32
+  ret i32 %conv
+}
+
+; CHECK-LABEL: test_xchg_8
+; CHECK: w0 = w2
+; CHECK: w0 = xchg32_8(r1 + 0, w0)
+; CHECK: encoding: [0xd3,0x01,0x00,0x00,0xe1,0x00,0x00,0x00]
+define dso_local i32 @test_xchg_8(i8* nocapture %p, i8 signext %v) local_unnamed_addr {
+entry:
+  %0 = atomicrmw xchg i8* %p, i8 %v seq_cst
+  %conv = sext i8 %0 to i32
+  ret i32 %conv
+}
+
+; CHECK-LABEL: test_xchg_16
+; CHECK: w0 = w2
+; CHECK: w0 = xchg32_16(r1 + 0, w0)
+; CHECK: encoding: [0xcb,0x01,0x00,0x00,0xe1,0x00,0x00,0x00]
+define dso_local i32 @test_xchg_16(i16* nocapture %p, i16 signext %v) local_unnamed_addr {
+entry:
+  %0 = atomicrmw xchg i16* %p, i16 %v seq_cst
+  %conv = sext i16 %0 to i32
+  ret i32 %conv
+}
+
+; CHECK-LABEL: test_xchg_32
+; CHECK: w0 = w2
+; CHECK: w0 = xchg32_32(r1 + 0, w0)
+; CHECK: encoding: [0xc3,0x01,0x00,0x00,0xe1,0x00,0x00,0x00]
+define dso_local i32 @test_xchg_32(i32* nocapture %p, i32 %v) local_unnamed_addr {
+entry:
+  %0 = atomicrmw xchg i32* %p, i32 %v seq_cst
+  ret i32 %0
+}
+
+; CHECK-LABEL: test_xchg_64
+; CHECK: r0 = r2
+; CHECK: r0 = xchg_64(r1 + 0, r0)
+; CHECK: encoding: [0xdb,0x01,0x00,0x00,0xe1,0x00,0x00,0x00]
+define dso_local i32 @test_xchg_64(i64* nocapture %p, i64 %v) local_unnamed_addr {
+entry:
+  %0 = atomicrmw xchg i64* %p, i64 %v seq_cst
+  %conv = trunc i64 %0 to i32
+  ret i32 %conv
+}
+
+; CHECK-LABEL: test_cas_32
+; CHECK: w0 = w2
+; CHECK: w0 = cmpxchg32_32(r1 + 0, w0, w3)
+; CHECK: encoding: [0xc3,0x31,0x00,0x00,0xf1,0x00,0x00,0x00]
+define dso_local i32 @test_cas_32(i32* 

[PATCH] D72184: [BPF] support atomic instructions

2020-11-18 Thread Yonghong Song via Phabricator via cfe-commits
yonghong-song added inline comments.



Comment at: llvm/lib/Target/BPF/BPFInstrInfo.td:699
+  let Inst{51-48} = addr{19-16}; // base reg
+  let Inst{55-52} = dst;
+  let Inst{47-32} = addr{15-0}; // offset

jackmanb wrote:
> There is another mismatch between what I implemented in the kernel and what 
> you've implemented here, which is that I used `dst` as the pointer base and 
> `src` as the writeback register but you did the opposite.
> 
> IIUC what we have here at the moment is `dst = sync_fetch_and_add(*(src + 
> off), dst);`. The advantage is that `dst` makes more intuitive sense as a 
> writeback register. 
> 
> If we instead had `src = sync_fetch_and_add(*(dst + off), src)` we have the 
> unintuitive writeback to `src`, but now a) the 2nd argument (the addend) 
> makes more intuitive sense, and b) `dst` being the pointer base is consistent 
> with the rest of `BPF_STX`.
could you double check? my encoding is actually
   dst = sync_fetch_and_add(*(src + off), dst);.
The encoding itself is very similar to xadd/stx.



Comment at: llvm/lib/Target/BPF/BPFInstrInfo.td:783
+  let Inst{47-32} = addr{15-0}; // offset
+  let Inst{11-8} = new;
+  let Inst{7-4} = BPF_CMPXCHG.Value;

jackmanb wrote:
> If we go down the route of matching operands with x86 as you have done for 
> `XFALU64` and `XCHG`, I think  we should also do it for cmpxchg.
> 
> IIUC this is `dst = atomic_cmpxchg(*(src + off), r0, new);` 
> 
> But to do it in a single x86 instruction we need to have only 2 operands + 
> the hard-coded r0. `r0 = atomic_xmpxchg(*(dst + off), r0, src);` would seem 
> most natural to me.
We can do this:
  r0 = atomic_xmpxchg(*(dst + off), r0, src);


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D72184/new/

https://reviews.llvm.org/D72184

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D72184: [BPF] support atomic instructions

2020-11-18 Thread Brendan Jackman via Phabricator via cfe-commits
jackmanb added inline comments.



Comment at: llvm/lib/Target/BPF/BPFInstrInfo.td:699
+  let Inst{51-48} = addr{19-16}; // base reg
+  let Inst{55-52} = dst;
+  let Inst{47-32} = addr{15-0}; // offset

There is another mismatch between what I implemented in the kernel and what 
you've implemented here, which is that I used `dst` as the pointer base and 
`src` as the writeback register but you did the opposite.

IIUC what we have here at the moment is `dst = sync_fetch_and_add(*(src + off), 
dst);`. The advantage is that `dst` makes more intuitive sense as a writeback 
register. 

If we instead had `src = sync_fetch_and_add(*(dst + off), src)` we have the 
unintuitive writeback to `src`, but now a) the 2nd argument (the addend) makes 
more intuitive sense, and b) `dst` being the pointer base is consistent with 
the rest of `BPF_STX`.



Comment at: llvm/lib/Target/BPF/BPFInstrInfo.td:783
+  let Inst{47-32} = addr{15-0}; // offset
+  let Inst{11-8} = new;
+  let Inst{7-4} = BPF_CMPXCHG.Value;

If we go down the route of matching operands with x86 as you have done for 
`XFALU64` and `XCHG`, I think  we should also do it for cmpxchg.

IIUC this is `dst = atomic_cmpxchg(*(src + off), r0, new);` 

But to do it in a single x86 instruction we need to have only 2 operands + the 
hard-coded r0. `r0 = atomic_xmpxchg(*(dst + off), r0, src);` would seem most 
natural to me.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D72184/new/

https://reviews.llvm.org/D72184

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D72184: [BPF] support atomic instructions

2020-11-17 Thread Yonghong Song via Phabricator via cfe-commits
yonghong-song added a comment.

make sense. will add and/or/xor support in the next revision. will also think 
about how to support hardware-level barrier. Totally agree that we should have 
adequate atomic op support in cpu=v4.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D72184/new/

https://reviews.llvm.org/D72184

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D72184: [BPF] support atomic instructions

2020-11-17 Thread Alexei Starovoitov via Phabricator via cfe-commits
ast added a comment.

looks good. Before landing we need to agree on the full set of instructions 
that -mcpu=v4 will support.
atomic_fetch_or|xor|and are probably needed as instructions. The kernel JIT 
will generate x86 cmpxchg for them.
Because if llvm generates bpf cmpxchg insn then we'd need to teach the verifier 
to recognize infinite loops.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D72184/new/

https://reviews.llvm.org/D72184

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D72184: [BPF] support atomic instructions

2020-11-17 Thread Yonghong Song via Phabricator via cfe-commits
yonghong-song updated this revision to Diff 305895.
yonghong-song added a comment.

use the same register for dst and val for fetch-add, fetch-sub and xchg 
instructions.
for fetch-sub, if it is deemed later using different registers for dst/val is 
preferred, I can make the change then.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D72184/new/

https://reviews.llvm.org/D72184

Files:
  clang/lib/Basic/Targets/BPF.cpp
  clang/test/Misc/target-invalid-cpu-note.c
  llvm/lib/Target/BPF/BPF.td
  llvm/lib/Target/BPF/BPFInstrFormats.td
  llvm/lib/Target/BPF/BPFInstrInfo.td
  llvm/lib/Target/BPF/BPFSubtarget.cpp
  llvm/lib/Target/BPF/BPFSubtarget.h
  llvm/lib/Target/BPF/Disassembler/BPFDisassembler.cpp
  llvm/test/CodeGen/BPF/atomics.ll
  llvm/test/CodeGen/BPF/atomics_2.ll

Index: llvm/test/CodeGen/BPF/atomics_2.ll
===
--- /dev/null
+++ llvm/test/CodeGen/BPF/atomics_2.ll
@@ -0,0 +1,139 @@
+; RUN: llc < %s -march=bpfel -verify-machineinstrs -show-mc-encoding -mcpu=v4 | FileCheck %s
+;
+; Source:
+;   char test_load_sub_8(char *p, char v) {
+; return __sync_fetch_and_sub(p, v);
+;   }
+;   short test_load_sub_16(short *p, short v) {
+; return __sync_fetch_and_sub(p, v);
+;   }
+;   int test_load_sub_32(int *p, int v) {
+; return __sync_fetch_and_sub(p, v);
+;   }
+;   int test_load_sub_64(long *p, long v) {
+; return __sync_fetch_and_sub(p, v);
+;   }
+;   int test_xchg_8(char *p, char v) {
+; return __sync_lock_test_and_set(p, v);
+;   }
+;   int test_xchg_16(short *p, short v) {
+; return __sync_lock_test_and_set(p, v);
+;   }
+;   int test_xchg_32(int *p, int v) {
+; return __sync_lock_test_and_set(p, v);
+;   }
+;   int test_xchg_64(long *p, long v) {
+; return __sync_lock_test_and_set(p, v);
+;   }
+;   int test_cas_32(int *p, int old, int new) {
+; return __sync_val_compare_and_swap(p, old, new);
+;   }
+;   long test_cas_64(long *p, long old, long new) {
+; return __sync_val_compare_and_swap(p, old, new);
+;   }
+
+; CHECK-LABEL: test_load_sub_8
+; CHECK: w0 = w2
+; CHECK: w0 = atomic_fetch_sub((u8 *)(r1 + 0), w0)
+; CHECK: encoding: [0xd3,0x01,0x00,0x00,0x11,0x00,0x00,0x00]
+define dso_local signext i8 @test_load_sub_8(i8* nocapture %p, i8 signext %v) local_unnamed_addr #0 {
+entry:
+  %0 = atomicrmw sub i8* %p, i8 %v seq_cst
+  ret i8 %0
+}
+
+; CHECK-LABEL: test_load_sub_16
+; CHECK: w0 = w2
+; CHECK: w0 = atomic_fetch_sub((u16 *)(r1 + 0), w0)
+; CHECK: encoding: [0xcb,0x01,0x00,0x00,0x11,0x00,0x00,0x00]
+define dso_local signext i16 @test_load_sub_16(i16* nocapture %p, i16 signext %v) local_unnamed_addr #0 {
+entry:
+  %0 = atomicrmw sub i16* %p, i16 %v seq_cst
+  ret i16 %0
+}
+
+; CHECK-LABEL: test_load_sub_32
+; CHECK: w0 = w2
+; CHECK: w0 = atomic_fetch_sub((u32 *)(r1 + 0), w0)
+; CHECK: encoding: [0xc3,0x01,0x00,0x00,0x11,0x00,0x00,0x00]
+define dso_local i32 @test_load_sub_32(i32* nocapture %p, i32 %v) local_unnamed_addr {
+entry:
+  %0 = atomicrmw sub i32* %p, i32 %v seq_cst
+  ret i32 %0
+}
+
+; CHECK-LABEL: test_load_sub_64
+; CHECK: r0 = r2
+; CHECK: r0 = atomic_fetch_sub((u64 *)(r1 + 0), r0)
+; CHECK: encoding: [0xdb,0x01,0x00,0x00,0x11,0x00,0x00,0x00]
+define dso_local i32 @test_load_sub_64(i64* nocapture %p, i64 %v) local_unnamed_addr {
+entry:
+  %0 = atomicrmw sub i64* %p, i64 %v seq_cst
+  %conv = trunc i64 %0 to i32
+  ret i32 %conv
+}
+
+; CHECK-LABEL: test_xchg_8
+; CHECK: w0 = w2
+; CHECK: w0 = xchg32_8(r1 + 0, w0)
+; CHECK: encoding: [0xd3,0x01,0x00,0x00,0xe1,0x00,0x00,0x00]
+define dso_local i32 @test_xchg_8(i8* nocapture %p, i8 signext %v) local_unnamed_addr {
+entry:
+  %0 = atomicrmw xchg i8* %p, i8 %v seq_cst
+  %conv = sext i8 %0 to i32
+  ret i32 %conv
+}
+
+; CHECK-LABEL: test_xchg_16
+; CHECK: w0 = w2
+; CHECK: w0 = xchg32_16(r1 + 0, w0)
+; CHECK: encoding: [0xcb,0x01,0x00,0x00,0xe1,0x00,0x00,0x00]
+define dso_local i32 @test_xchg_16(i16* nocapture %p, i16 signext %v) local_unnamed_addr {
+entry:
+  %0 = atomicrmw xchg i16* %p, i16 %v seq_cst
+  %conv = sext i16 %0 to i32
+  ret i32 %conv
+}
+
+; CHECK-LABEL: test_xchg_32
+; CHECK: w0 = w2
+; CHECK: w0 = xchg32_32(r1 + 0, w0)
+; CHECK: encoding: [0xc3,0x01,0x00,0x00,0xe1,0x00,0x00,0x00]
+define dso_local i32 @test_xchg_32(i32* nocapture %p, i32 %v) local_unnamed_addr {
+entry:
+  %0 = atomicrmw xchg i32* %p, i32 %v seq_cst
+  ret i32 %0
+}
+
+; CHECK-LABEL: test_xchg_64
+; CHECK: r0 = r2
+; CHECK: r0 = xchg_64(r1 + 0, r0)
+; CHECK: encoding: [0xdb,0x01,0x00,0x00,0xe1,0x00,0x00,0x00]
+define dso_local i32 @test_xchg_64(i64* nocapture %p, i64 %v) local_unnamed_addr {
+entry:
+  %0 = atomicrmw xchg i64* %p, i64 %v seq_cst
+  %conv = trunc i64 %0 to i32
+  ret i32 %conv
+}
+
+; CHECK-LABEL: test_cas_32
+; CHECK: w0 = w2
+; CHECK: w0 = cmpxchg32_32(r1 + 0, w0, w3)
+; CHECK: encoding: [0xc3,0x01,0x00,0x00,0xf1,0x03,0x00,0x00]
+define dso_local i32 @test_cas_32(i32* nocapture %p, 

[PATCH] D72184: [BPF] support atomic instructions

2020-11-17 Thread Alexei Starovoitov via Phabricator via cfe-commits
ast added inline comments.



Comment at: llvm/lib/Target/BPF/BPFInstrInfo.td:684
+  let Inst{47-32} = addr{15-0}; // offset
+  let Inst{11-8} = val;
+  let Inst{7-4} = Opc.Value;

jackmanb wrote:
> yonghong-song wrote:
> > jackmanb wrote:
> > > jackmanb wrote:
> > > > jackmanb wrote:
> > > > > Sorry I'm a beginner with the LLVM code, could you explain what `val` 
> > > > > does? I didn't notice this when I looked through here before.
> > > > > 
> > > > > To try and get a clue I tried just removing this line and then 
> > > > > compiling the following code:
> > > > > 
> > > > > ```C
> > > > > // SPDX-License-Identifier: GPL-2.0
> > > > > #include 
> > > > > 
> > > > > #include 
> > > > > #include 
> > > > > #include 
> > > > > 
> > > > > __u64 test_data_64 = 0;
> > > > > __u64 test1_result = 0;
> > > > > 
> > > > > SEC("fentry/bpf_fentry_test1")
> > > > > int BPF_PROG(test1, int a)
> > > > > {
> > > > > /* atomic_fetch_add(_data_64, 1); */
> > > > > test1_result = __sync_fetch_and_add(_data_64, 1);
> > > > > return 0;
> > > > > }
> > > > > ```
> > > > > 
> > > > > And I was able to load and run the program, with the kernel on my WIP 
> > > > > branch: https://github.com/bjackman/linux-bpf/tree/wips/bpf-atomics-v0
> > > > > 
> > > > > The result looks like this:
> > > > > 
> > > > > ```shell
> > > > > $ llvm-objdump -d atomics_test.o
> > > > > 
> > > > > atomics_test.o: file format elf64-bpf
> > > > > 
> > > > > 
> > > > > Disassembly of section fentry/bpf_fentry_test1:
> > > > > 
> > > > >  :
> > > > >0:   b7 01 00 00 01 00 00 00 r1 = 1
> > > > >1:   18 02 00 00 00 00 00 00 00 00 00 00 00 00 00 00 r2 = 
> > > > > 0 ll
> > > > >3:   db 12 00 00 01 00 00 00 r1 = atomic_fetch_add((u64 
> > > > > *)(r2 + 0), PLEASE submit a bug report to https://bugs.llvm.org/ and 
> > > > > include the crash backtrace.
> > > > > Stack dump:
> > > > > 0.  Program arguments: llvm-objdump -d atomics_test.o 
> > > > > Segmentation fault
> > > > > ```
> > > > > 
> > > > > Aside from the fact that llvm-objdump crashed, the encoding `db 12 00 
> > > > > 00 01 00 00 00` seems correct to me. If I add the `let Inst{11-8} = 
> > > > > val` back in I get `db 12 00 00 01 01 00 00` which I don't understand.
> > > > Ah wait, I guess this is adding a 3rd operand register? In my example 
> > > > it's unclear because it is R1 which is also the dst operand. I was 
> > > > envisaging we just have semantics like `src = atomic_fetch_add(dst+off, 
> > > > src)` but you are instead proposing `dst = atomic_fetch_add(dst+off, 
> > > > val)`?
> > > Sorry I mean `dst = atomic_fetch_add(src+off, val)`
> > > Sorry I'm a beginner with the LLVM code, could you explain what `val` 
> > > does? I didn't notice this when I looked through here before.
> > 
> > For the below statement:
> >   test1_result = __sync_fetch_and_add(_data_64, 1);
> > 
> > 'val' represents the register which holds the value '1'.
> > 
> > bit 4-7 is also used in compare-and-exchange insn as you need a memory 
> > location, in-register old/new values.
> > 
> > > 
> > > To try and get a clue I tried just removing this line and then compiling 
> > > the following code:
> > > 
> > > ```C
> > > // SPDX-License-Identifier: GPL-2.0
> > > #include 
> > > 
> > > #include 
> > > #include 
> > > #include 
> > > 
> > > __u64 test_data_64 = 0;
> > > __u64 test1_result = 0;
> > > 
> > > SEC("fentry/bpf_fentry_test1")
> > > int BPF_PROG(test1, int a)
> > > {
> > > /* atomic_fetch_add(_data_64, 1); */
> > > test1_result = __sync_fetch_and_add(_data_64, 1);
> > > return 0;
> > > }
> > > ```
> > > 
> > > And I was able to load and run the program, with the kernel on my WIP 
> > > branch: https://github.com/bjackman/linux-bpf/tree/wips/bpf-atomics-v0
> > > 
> > > The result looks like this:
> > > 
> > > ```shell
> > > $ llvm-objdump -d atomics_test.o
> > > 
> > > atomics_test.o: file format elf64-bpf
> > > 
> > > 
> > > Disassembly of section fentry/bpf_fentry_test1:
> > > 
> > >  :
> > >0:   b7 01 00 00 01 00 00 00 r1 = 1
> > >1:   18 02 00 00 00 00 00 00 00 00 00 00 00 00 00 00 r2 = 0 ll
> > >3:   db 12 00 00 01 00 00 00 r1 = atomic_fetch_add((u64 *)(r2 
> > > + 0), PLEASE submit a bug report to https://bugs.llvm.org/ and include 
> > > the crash backtrace.
> > > Stack dump:
> > > 0.  Program arguments: llvm-objdump -d atomics_test.o 
> > > Segmentation fault
> > > ```
> > > 
> > 
> > the crash may come from that the 'val' is not encoded properly. I will 
> > double check.
> > 
> > > Aside from the fact that llvm-objdump crashed, the encoding `db 12 00 00 
> > > 01 00 00 00` seems correct to me. If I add the `let Inst{11-8} = val` 
> > > back in I get `db 12 00 00 01 01 00 00` which I don't understand.
> > 
> > in this particular case, yes, as final asm code looks like
> >r1 = atomic_fetch_add((u64 *)(r2 + 0), 

Re: [PATCH] D72184: [BPF] support atomic instructions

2020-11-17 Thread Yonghong Song via cfe-commits



On 11/17/20 3:23 AM, Brendan Jackman via Phabricator wrote:

jackmanb added inline comments.



Comment at: llvm/lib/Target/BPF/BPFInstrInfo.td:684
+  let Inst{47-32} = addr{15-0}; // offset
+  let Inst{11-8} = val;
+  let Inst{7-4} = Opc.Value;

yonghong-song wrote:

jackmanb wrote:

jackmanb wrote:

jackmanb wrote:

Sorry I'm a beginner with the LLVM code, could you explain what `val` does? I 
didn't notice this when I looked through here before.

To try and get a clue I tried just removing this line and then compiling the 
following code:

```C
// SPDX-License-Identifier: GPL-2.0
#include 

#include 
#include 
#include 

__u64 test_data_64 = 0;
__u64 test1_result = 0;

SEC("fentry/bpf_fentry_test1")
int BPF_PROG(test1, int a)
{
 /* atomic_fetch_add(_data_64, 1); */
 test1_result = __sync_fetch_and_add(_data_64, 1);
 return 0;
}
```

And I was able to load and run the program, with the kernel on my WIP branch: 
https://github.com/bjackman/linux-bpf/tree/wips/bpf-atomics-v0

The result looks like this:

```shell
$ llvm-objdump -d atomics_test.o

atomics_test.o: file format elf64-bpf


Disassembly of section fentry/bpf_fentry_test1:

 :
0:   b7 01 00 00 01 00 00 00 r1 = 1
1:   18 02 00 00 00 00 00 00 00 00 00 00 00 00 00 00 r2 = 0 ll
3:   db 12 00 00 01 00 00 00 r1 = atomic_fetch_add((u64 *)(r2 + 0), 
PLEASE submit a bug report to https://bugs.llvm.org/  and include the crash 
backtrace.
Stack dump:
0.  Program arguments: llvm-objdump -d atomics_test.o
Segmentation fault
```

Aside from the fact that llvm-objdump crashed, the encoding `db 12 00 00 01 00 
00 00` seems correct to me. If I add the `let Inst{11-8} = val` back in I get 
`db 12 00 00 01 01 00 00` which I don't understand.

Ah wait, I guess this is adding a 3rd operand register? In my example it's 
unclear because it is R1 which is also the dst operand. I was envisaging we 
just have semantics like `src = atomic_fetch_add(dst+off, src)` but you are 
instead proposing `dst = atomic_fetch_add(dst+off, val)`?

Sorry I mean `dst = atomic_fetch_add(src+off, val)`
Sorry I'm a beginner with the LLVM code, could you explain what `val` does? I 
didn't notice this when I looked through here before.


For the below statement:
   test1_result = __sync_fetch_and_add(_data_64, 1);

'val' represents the register which holds the value '1'.

bit 4-7 is also used in compare-and-exchange insn as you need a memory 
location, in-register old/new values.



To try and get a clue I tried just removing this line and then compiling the 
following code:

```C
// SPDX-License-Identifier: GPL-2.0
#include 

#include 
#include 
#include 

__u64 test_data_64 = 0;
__u64 test1_result = 0;

SEC("fentry/bpf_fentry_test1")
int BPF_PROG(test1, int a)
{
 /* atomic_fetch_add(_data_64, 1); */
 test1_result = __sync_fetch_and_add(_data_64, 1);
 return 0;
}
```

And I was able to load and run the program, with the kernel on my WIP branch: 
https://github.com/bjackman/linux-bpf/tree/wips/bpf-atomics-v0

The result looks like this:

```shell
$ llvm-objdump -d atomics_test.o

atomics_test.o: file format elf64-bpf


Disassembly of section fentry/bpf_fentry_test1:

 :
0:   b7 01 00 00 01 00 00 00 r1 = 1
1:   18 02 00 00 00 00 00 00 00 00 00 00 00 00 00 00 r2 = 0 ll
3:   db 12 00 00 01 00 00 00 r1 = atomic_fetch_add((u64 *)(r2 + 0), 
PLEASE submit a bug report to https://bugs.llvm.org/  and include the crash 
backtrace.
Stack dump:
0.  Program arguments: llvm-objdump -d atomics_test.o
Segmentation fault
```



the crash may come from that the 'val' is not encoded properly. I will double 
check.


Aside from the fact that llvm-objdump crashed, the encoding `db 12 00 00 01 00 
00 00` seems correct to me. If I add the `let Inst{11-8} = val` back in I get 
`db 12 00 00 01 01 00 00` which I don't understand.


in this particular case, yes, as final asm code looks like
r1 = atomic_fetch_add((u64 *)(r2 + 0), r1)
where the value "r1" and the result "r1" shared the same register. A little bit compiler 
work is need to enforce "val" register and result register always the same.

My current design allows:
   r3 = atomic_fetch_add((u64 *)(r2 + 0), r1)
and I think this is more flexible as people may later on still use "r1". But 
let me know whether you prefer
   r1 = atomic_fetch_add((u64 *)(r2 + 0), r1)
always.



Got it - this design will introduce some impedance mismatch with x86, since 
XADD has only two operands (`r3 = atomic_fetch_add((u64 *)(r2 + 0), r1)` cannot 
be implemented in a single instruction). It also hard-codes RAX as the third 
operand for [cmp]xchg so my current kernel implementation hard-codes R0 - 
that's also what Alexei supported in the separate email thread.

Will you be available to discuss this in the BPF office hours this week? There 
are a few possible ways forward, we just need 

[PATCH] D72184: [BPF] support atomic instructions

2020-11-17 Thread Brendan Jackman via Phabricator via cfe-commits
jackmanb added inline comments.



Comment at: llvm/lib/Target/BPF/BPFInstrInfo.td:684
+  let Inst{47-32} = addr{15-0}; // offset
+  let Inst{11-8} = val;
+  let Inst{7-4} = Opc.Value;

yonghong-song wrote:
> jackmanb wrote:
> > jackmanb wrote:
> > > jackmanb wrote:
> > > > Sorry I'm a beginner with the LLVM code, could you explain what `val` 
> > > > does? I didn't notice this when I looked through here before.
> > > > 
> > > > To try and get a clue I tried just removing this line and then 
> > > > compiling the following code:
> > > > 
> > > > ```C
> > > > // SPDX-License-Identifier: GPL-2.0
> > > > #include 
> > > > 
> > > > #include 
> > > > #include 
> > > > #include 
> > > > 
> > > > __u64 test_data_64 = 0;
> > > > __u64 test1_result = 0;
> > > > 
> > > > SEC("fentry/bpf_fentry_test1")
> > > > int BPF_PROG(test1, int a)
> > > > {
> > > > /* atomic_fetch_add(_data_64, 1); */
> > > > test1_result = __sync_fetch_and_add(_data_64, 1);
> > > > return 0;
> > > > }
> > > > ```
> > > > 
> > > > And I was able to load and run the program, with the kernel on my WIP 
> > > > branch: https://github.com/bjackman/linux-bpf/tree/wips/bpf-atomics-v0
> > > > 
> > > > The result looks like this:
> > > > 
> > > > ```shell
> > > > $ llvm-objdump -d atomics_test.o
> > > > 
> > > > atomics_test.o: file format elf64-bpf
> > > > 
> > > > 
> > > > Disassembly of section fentry/bpf_fentry_test1:
> > > > 
> > > >  :
> > > >0:   b7 01 00 00 01 00 00 00 r1 = 1
> > > >1:   18 02 00 00 00 00 00 00 00 00 00 00 00 00 00 00 r2 = 0 
> > > > ll
> > > >3:   db 12 00 00 01 00 00 00 r1 = atomic_fetch_add((u64 
> > > > *)(r2 + 0), PLEASE submit a bug report to https://bugs.llvm.org/ and 
> > > > include the crash backtrace.
> > > > Stack dump:
> > > > 0.  Program arguments: llvm-objdump -d atomics_test.o 
> > > > Segmentation fault
> > > > ```
> > > > 
> > > > Aside from the fact that llvm-objdump crashed, the encoding `db 12 00 
> > > > 00 01 00 00 00` seems correct to me. If I add the `let Inst{11-8} = 
> > > > val` back in I get `db 12 00 00 01 01 00 00` which I don't understand.
> > > Ah wait, I guess this is adding a 3rd operand register? In my example 
> > > it's unclear because it is R1 which is also the dst operand. I was 
> > > envisaging we just have semantics like `src = atomic_fetch_add(dst+off, 
> > > src)` but you are instead proposing `dst = atomic_fetch_add(dst+off, 
> > > val)`?
> > Sorry I mean `dst = atomic_fetch_add(src+off, val)`
> > Sorry I'm a beginner with the LLVM code, could you explain what `val` does? 
> > I didn't notice this when I looked through here before.
> 
> For the below statement:
>   test1_result = __sync_fetch_and_add(_data_64, 1);
> 
> 'val' represents the register which holds the value '1'.
> 
> bit 4-7 is also used in compare-and-exchange insn as you need a memory 
> location, in-register old/new values.
> 
> > 
> > To try and get a clue I tried just removing this line and then compiling 
> > the following code:
> > 
> > ```C
> > // SPDX-License-Identifier: GPL-2.0
> > #include 
> > 
> > #include 
> > #include 
> > #include 
> > 
> > __u64 test_data_64 = 0;
> > __u64 test1_result = 0;
> > 
> > SEC("fentry/bpf_fentry_test1")
> > int BPF_PROG(test1, int a)
> > {
> > /* atomic_fetch_add(_data_64, 1); */
> > test1_result = __sync_fetch_and_add(_data_64, 1);
> > return 0;
> > }
> > ```
> > 
> > And I was able to load and run the program, with the kernel on my WIP 
> > branch: https://github.com/bjackman/linux-bpf/tree/wips/bpf-atomics-v0
> > 
> > The result looks like this:
> > 
> > ```shell
> > $ llvm-objdump -d atomics_test.o
> > 
> > atomics_test.o: file format elf64-bpf
> > 
> > 
> > Disassembly of section fentry/bpf_fentry_test1:
> > 
> >  :
> >0:   b7 01 00 00 01 00 00 00 r1 = 1
> >1:   18 02 00 00 00 00 00 00 00 00 00 00 00 00 00 00 r2 = 0 ll
> >3:   db 12 00 00 01 00 00 00 r1 = atomic_fetch_add((u64 *)(r2 + 
> > 0), PLEASE submit a bug report to https://bugs.llvm.org/ and include the 
> > crash backtrace.
> > Stack dump:
> > 0.  Program arguments: llvm-objdump -d atomics_test.o 
> > Segmentation fault
> > ```
> > 
> 
> the crash may come from that the 'val' is not encoded properly. I will double 
> check.
> 
> > Aside from the fact that llvm-objdump crashed, the encoding `db 12 00 00 01 
> > 00 00 00` seems correct to me. If I add the `let Inst{11-8} = val` back in 
> > I get `db 12 00 00 01 01 00 00` which I don't understand.
> 
> in this particular case, yes, as final asm code looks like
>r1 = atomic_fetch_add((u64 *)(r2 + 0), r1)
> where the value "r1" and the result "r1" shared the same register. A little 
> bit compiler work is need to enforce "val" register and result register 
> always the same.
> 
> My current design allows:
>   r3 = atomic_fetch_add((u64 *)(r2 + 0), r1)
> and I think this is 

[PATCH] D72184: [BPF] support atomic instructions

2020-11-16 Thread Yonghong Song via Phabricator via cfe-commits
yonghong-song added inline comments.



Comment at: llvm/lib/Target/BPF/BPFInstrInfo.td:666
+def XADDD : XADD;
+  }
+}

jackmanb wrote:
> FYI - I just spotted some stray `\t` in here (is it helpful to point this 
> out? If not let me know, I will ignore in future)
\t is not allowed. I will run clang-format next time to catch such violations. 
Thanks for letting me know.



Comment at: llvm/lib/Target/BPF/BPFInstrInfo.td:684
+  let Inst{47-32} = addr{15-0}; // offset
+  let Inst{11-8} = val;
+  let Inst{7-4} = Opc.Value;

jackmanb wrote:
> jackmanb wrote:
> > jackmanb wrote:
> > > Sorry I'm a beginner with the LLVM code, could you explain what `val` 
> > > does? I didn't notice this when I looked through here before.
> > > 
> > > To try and get a clue I tried just removing this line and then compiling 
> > > the following code:
> > > 
> > > ```C
> > > // SPDX-License-Identifier: GPL-2.0
> > > #include 
> > > 
> > > #include 
> > > #include 
> > > #include 
> > > 
> > > __u64 test_data_64 = 0;
> > > __u64 test1_result = 0;
> > > 
> > > SEC("fentry/bpf_fentry_test1")
> > > int BPF_PROG(test1, int a)
> > > {
> > > /* atomic_fetch_add(_data_64, 1); */
> > > test1_result = __sync_fetch_and_add(_data_64, 1);
> > > return 0;
> > > }
> > > ```
> > > 
> > > And I was able to load and run the program, with the kernel on my WIP 
> > > branch: https://github.com/bjackman/linux-bpf/tree/wips/bpf-atomics-v0
> > > 
> > > The result looks like this:
> > > 
> > > ```shell
> > > $ llvm-objdump -d atomics_test.o
> > > 
> > > atomics_test.o: file format elf64-bpf
> > > 
> > > 
> > > Disassembly of section fentry/bpf_fentry_test1:
> > > 
> > >  :
> > >0:   b7 01 00 00 01 00 00 00 r1 = 1
> > >1:   18 02 00 00 00 00 00 00 00 00 00 00 00 00 00 00 r2 = 0 ll
> > >3:   db 12 00 00 01 00 00 00 r1 = atomic_fetch_add((u64 *)(r2 
> > > + 0), PLEASE submit a bug report to https://bugs.llvm.org/ and include 
> > > the crash backtrace.
> > > Stack dump:
> > > 0.  Program arguments: llvm-objdump -d atomics_test.o 
> > > Segmentation fault
> > > ```
> > > 
> > > Aside from the fact that llvm-objdump crashed, the encoding `db 12 00 00 
> > > 01 00 00 00` seems correct to me. If I add the `let Inst{11-8} = val` 
> > > back in I get `db 12 00 00 01 01 00 00` which I don't understand.
> > Ah wait, I guess this is adding a 3rd operand register? In my example it's 
> > unclear because it is R1 which is also the dst operand. I was envisaging we 
> > just have semantics like `src = atomic_fetch_add(dst+off, src)` but you are 
> > instead proposing `dst = atomic_fetch_add(dst+off, val)`?
> Sorry I mean `dst = atomic_fetch_add(src+off, val)`
> Sorry I'm a beginner with the LLVM code, could you explain what `val` does? I 
> didn't notice this when I looked through here before.

For the below statement:
  test1_result = __sync_fetch_and_add(_data_64, 1);

'val' represents the register which holds the value '1'.

bit 4-7 is also used in compare-and-exchange insn as you need a memory 
location, in-register old/new values.

> 
> To try and get a clue I tried just removing this line and then compiling the 
> following code:
> 
> ```C
> // SPDX-License-Identifier: GPL-2.0
> #include 
> 
> #include 
> #include 
> #include 
> 
> __u64 test_data_64 = 0;
> __u64 test1_result = 0;
> 
> SEC("fentry/bpf_fentry_test1")
> int BPF_PROG(test1, int a)
> {
> /* atomic_fetch_add(_data_64, 1); */
> test1_result = __sync_fetch_and_add(_data_64, 1);
> return 0;
> }
> ```
> 
> And I was able to load and run the program, with the kernel on my WIP branch: 
> https://github.com/bjackman/linux-bpf/tree/wips/bpf-atomics-v0
> 
> The result looks like this:
> 
> ```shell
> $ llvm-objdump -d atomics_test.o
> 
> atomics_test.o: file format elf64-bpf
> 
> 
> Disassembly of section fentry/bpf_fentry_test1:
> 
>  :
>0:   b7 01 00 00 01 00 00 00 r1 = 1
>1:   18 02 00 00 00 00 00 00 00 00 00 00 00 00 00 00 r2 = 0 ll
>3:   db 12 00 00 01 00 00 00 r1 = atomic_fetch_add((u64 *)(r2 + 
> 0), PLEASE submit a bug report to https://bugs.llvm.org/ and include the 
> crash backtrace.
> Stack dump:
> 0.  Program arguments: llvm-objdump -d atomics_test.o 
> Segmentation fault
> ```
> 

the crash may come from that the 'val' is not encoded properly. I will double 
check.

> Aside from the fact that llvm-objdump crashed, the encoding `db 12 00 00 01 
> 00 00 00` seems correct to me. If I add the `let Inst{11-8} = val` back in I 
> get `db 12 00 00 01 01 00 00` which I don't understand.

in this particular case, yes, as final asm code looks like
   r1 = atomic_fetch_add((u64 *)(r2 + 0), r1)
where the value "r1" and the result "r1" shared the same register. A little bit 
compiler work is need to enforce "val" register and result register always the 
same.

My current design allows:

[PATCH] D72184: [BPF] support atomic instructions

2020-11-16 Thread Brendan Jackman via Phabricator via cfe-commits
jackmanb added inline comments.



Comment at: llvm/lib/Target/BPF/BPFInstrInfo.td:684
+  let Inst{47-32} = addr{15-0}; // offset
+  let Inst{11-8} = val;
+  let Inst{7-4} = Opc.Value;

jackmanb wrote:
> jackmanb wrote:
> > Sorry I'm a beginner with the LLVM code, could you explain what `val` does? 
> > I didn't notice this when I looked through here before.
> > 
> > To try and get a clue I tried just removing this line and then compiling 
> > the following code:
> > 
> > ```C
> > // SPDX-License-Identifier: GPL-2.0
> > #include 
> > 
> > #include 
> > #include 
> > #include 
> > 
> > __u64 test_data_64 = 0;
> > __u64 test1_result = 0;
> > 
> > SEC("fentry/bpf_fentry_test1")
> > int BPF_PROG(test1, int a)
> > {
> > /* atomic_fetch_add(_data_64, 1); */
> > test1_result = __sync_fetch_and_add(_data_64, 1);
> > return 0;
> > }
> > ```
> > 
> > And I was able to load and run the program, with the kernel on my WIP 
> > branch: https://github.com/bjackman/linux-bpf/tree/wips/bpf-atomics-v0
> > 
> > The result looks like this:
> > 
> > ```shell
> > $ llvm-objdump -d atomics_test.o
> > 
> > atomics_test.o: file format elf64-bpf
> > 
> > 
> > Disassembly of section fentry/bpf_fentry_test1:
> > 
> >  :
> >0:   b7 01 00 00 01 00 00 00 r1 = 1
> >1:   18 02 00 00 00 00 00 00 00 00 00 00 00 00 00 00 r2 = 0 ll
> >3:   db 12 00 00 01 00 00 00 r1 = atomic_fetch_add((u64 *)(r2 + 
> > 0), PLEASE submit a bug report to https://bugs.llvm.org/ and include the 
> > crash backtrace.
> > Stack dump:
> > 0.  Program arguments: llvm-objdump -d atomics_test.o 
> > Segmentation fault
> > ```
> > 
> > Aside from the fact that llvm-objdump crashed, the encoding `db 12 00 00 01 
> > 00 00 00` seems correct to me. If I add the `let Inst{11-8} = val` back in 
> > I get `db 12 00 00 01 01 00 00` which I don't understand.
> Ah wait, I guess this is adding a 3rd operand register? In my example it's 
> unclear because it is R1 which is also the dst operand. I was envisaging we 
> just have semantics like `src = atomic_fetch_add(dst+off, src)` but you are 
> instead proposing `dst = atomic_fetch_add(dst+off, val)`?
Sorry I mean `dst = atomic_fetch_add(src+off, val)`


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D72184/new/

https://reviews.llvm.org/D72184

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D72184: [BPF] support atomic instructions

2020-11-16 Thread Brendan Jackman via Phabricator via cfe-commits
jackmanb added inline comments.



Comment at: llvm/lib/Target/BPF/BPFInstrInfo.td:684
+  let Inst{47-32} = addr{15-0}; // offset
+  let Inst{11-8} = val;
+  let Inst{7-4} = Opc.Value;

jackmanb wrote:
> Sorry I'm a beginner with the LLVM code, could you explain what `val` does? I 
> didn't notice this when I looked through here before.
> 
> To try and get a clue I tried just removing this line and then compiling the 
> following code:
> 
> ```C
> // SPDX-License-Identifier: GPL-2.0
> #include 
> 
> #include 
> #include 
> #include 
> 
> __u64 test_data_64 = 0;
> __u64 test1_result = 0;
> 
> SEC("fentry/bpf_fentry_test1")
> int BPF_PROG(test1, int a)
> {
> /* atomic_fetch_add(_data_64, 1); */
> test1_result = __sync_fetch_and_add(_data_64, 1);
> return 0;
> }
> ```
> 
> And I was able to load and run the program, with the kernel on my WIP branch: 
> https://github.com/bjackman/linux-bpf/tree/wips/bpf-atomics-v0
> 
> The result looks like this:
> 
> ```shell
> $ llvm-objdump -d atomics_test.o
> 
> atomics_test.o: file format elf64-bpf
> 
> 
> Disassembly of section fentry/bpf_fentry_test1:
> 
>  :
>0:   b7 01 00 00 01 00 00 00 r1 = 1
>1:   18 02 00 00 00 00 00 00 00 00 00 00 00 00 00 00 r2 = 0 ll
>3:   db 12 00 00 01 00 00 00 r1 = atomic_fetch_add((u64 *)(r2 + 
> 0), PLEASE submit a bug report to https://bugs.llvm.org/ and include the 
> crash backtrace.
> Stack dump:
> 0.  Program arguments: llvm-objdump -d atomics_test.o 
> Segmentation fault
> ```
> 
> Aside from the fact that llvm-objdump crashed, the encoding `db 12 00 00 01 
> 00 00 00` seems correct to me. If I add the `let Inst{11-8} = val` back in I 
> get `db 12 00 00 01 01 00 00` which I don't understand.
Ah wait, I guess this is adding a 3rd operand register? In my example it's 
unclear because it is R1 which is also the dst operand. I was envisaging we 
just have semantics like `src = atomic_fetch_add(dst+off, src)` but you are 
instead proposing `dst = atomic_fetch_add(dst+off, val)`?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D72184/new/

https://reviews.llvm.org/D72184

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D72184: [BPF] support atomic instructions

2020-11-16 Thread Brendan Jackman via Phabricator via cfe-commits
jackmanb added a comment.

Sorry I was disrupted and not able to work on this last week! I've just got 
started trying to integrate this with my kernel patches.




Comment at: llvm/lib/Target/BPF/BPFInstrInfo.td:666
+def XADDD : XADD;
+  }
+}

FYI - I just spotted some stray `\t` in here (is it helpful to point this out? 
If not let me know, I will ignore in future)



Comment at: llvm/lib/Target/BPF/BPFInstrInfo.td:684
+  let Inst{47-32} = addr{15-0}; // offset
+  let Inst{11-8} = val;
+  let Inst{7-4} = Opc.Value;

Sorry I'm a beginner with the LLVM code, could you explain what `val` does? I 
didn't notice this when I looked through here before.

To try and get a clue I tried just removing this line and then compiling the 
following code:

```C
// SPDX-License-Identifier: GPL-2.0
#include 

#include 
#include 
#include 

__u64 test_data_64 = 0;
__u64 test1_result = 0;

SEC("fentry/bpf_fentry_test1")
int BPF_PROG(test1, int a)
{
/* atomic_fetch_add(_data_64, 1); */
test1_result = __sync_fetch_and_add(_data_64, 1);
return 0;
}
```

And I was able to load and run the program, with the kernel on my WIP branch: 
https://github.com/bjackman/linux-bpf/tree/wips/bpf-atomics-v0

The result looks like this:

```shell
$ llvm-objdump -d atomics_test.o

atomics_test.o: file format elf64-bpf


Disassembly of section fentry/bpf_fentry_test1:

 :
   0:   b7 01 00 00 01 00 00 00 r1 = 1
   1:   18 02 00 00 00 00 00 00 00 00 00 00 00 00 00 00 r2 = 0 ll
   3:   db 12 00 00 01 00 00 00 r1 = atomic_fetch_add((u64 *)(r2 + 0), 
PLEASE submit a bug report to https://bugs.llvm.org/ and include the crash 
backtrace.
Stack dump:
0.  Program arguments: llvm-objdump -d atomics_test.o 
Segmentation fault
```

Aside from the fact that llvm-objdump crashed, the encoding `db 12 00 00 01 00 
00 00` seems correct to me. If I add the `let Inst{11-8} = val` back in I get 
`db 12 00 00 01 01 00 00` which I don't understand.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D72184/new/

https://reviews.llvm.org/D72184

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D72184: [BPF] support atomic instructions

2020-11-03 Thread Yonghong Song via Phabricator via cfe-commits
yonghong-song added inline comments.



Comment at: llvm/test/CodeGen/BPF/atomics_2.ll:124
+; CHECK: r0 = r2
+; CHECK: r0 = cmpxchg_64(r1 + 0, r0, r3)
+; CHECK: encoding: [0xdb,0x01,0x00,0x00,0xf1,0x03,0x00,0x00]

ast wrote:
> Looks like it's generating correct code without special handling in 
> ISelLowering. Nice. I wonder why x86 had to use it. Because of eflags, I 
> guess?
which specific x86 codes you are referring to?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D72184/new/

https://reviews.llvm.org/D72184

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D72184: [BPF] support atomic instructions

2020-11-03 Thread Alexei Starovoitov via Phabricator via cfe-commits
ast added inline comments.



Comment at: llvm/test/CodeGen/BPF/atomics_2.ll:124
+; CHECK: r0 = r2
+; CHECK: r0 = cmpxchg_64(r1 + 0, r0, r3)
+; CHECK: encoding: [0xdb,0x01,0x00,0x00,0xf1,0x03,0x00,0x00]

Looks like it's generating correct code without special handling in 
ISelLowering. Nice. I wonder why x86 had to use it. Because of eflags, I guess?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D72184/new/

https://reviews.llvm.org/D72184

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D72184: [BPF] support atomic instructions

2020-11-03 Thread Yonghong Song via Phabricator via cfe-commits
yonghong-song marked an inline comment as done.
yonghong-song added inline comments.



Comment at: llvm/lib/Target/BPF/BPFInstrInfo.td:830
+
+let Predicates = [BPFHasAtomicExt] in {
+  def CMPXCHGD : CMPXCHG;

ast wrote:
> let Defs = [R0], Uses = [R0]
> and BPFISelLowering would need to do getCopyToReg+cmpxchg+getCopyFromReg 
> similar to X86ISelLowering ?
Yes, Just with Uses = [R0] or Uses = [W0] and change patterns can make it work.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D72184/new/

https://reviews.llvm.org/D72184

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D72184: [BPF] support atomic instructions

2020-11-03 Thread Yonghong Song via Phabricator via cfe-commits
yonghong-song updated this revision to Diff 302617.
yonghong-song retitled this revision from "[WIP][BPF] support 
exchange/compare-and-exchange instruction" to "[BPF] support atomic 
instructions".
yonghong-song edited the summary of this revision.
yonghong-song added a comment.

- remove RFC tag.
- addressed comments for encoding, etc.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D72184/new/

https://reviews.llvm.org/D72184

Files:
  clang/lib/Basic/Targets/BPF.cpp
  clang/test/Misc/target-invalid-cpu-note.c
  llvm/lib/Target/BPF/BPF.td
  llvm/lib/Target/BPF/BPFInstrFormats.td
  llvm/lib/Target/BPF/BPFInstrInfo.td
  llvm/lib/Target/BPF/BPFSubtarget.cpp
  llvm/lib/Target/BPF/BPFSubtarget.h
  llvm/lib/Target/BPF/Disassembler/BPFDisassembler.cpp
  llvm/test/CodeGen/BPF/atomics.ll
  llvm/test/CodeGen/BPF/atomics_2.ll

Index: llvm/test/CodeGen/BPF/atomics_2.ll
===
--- /dev/null
+++ llvm/test/CodeGen/BPF/atomics_2.ll
@@ -0,0 +1,131 @@
+; RUN: llc < %s -march=bpfel -verify-machineinstrs -show-mc-encoding -mcpu=v4 | FileCheck %s
+;
+; Source:
+;   char test_load_sub_8(char *p, char v) {
+; return __sync_fetch_and_sub(p, v);
+;   }
+;   short test_load_sub_16(short *p, short v) {
+; return __sync_fetch_and_sub(p, v);
+;   }
+;   int test_load_sub_32(int *p, int v) {
+; return __sync_fetch_and_sub(p, v);
+;   }
+;   int test_load_sub_64(long *p, long v) {
+; return __sync_fetch_and_sub(p, v);
+;   }
+;   int test_xchg_8(char *p, char v) {
+; return __sync_lock_test_and_set(p, v);
+;   }
+;   int test_xchg_16(short *p, short v) {
+; return __sync_lock_test_and_set(p, v);
+;   }
+;   int test_xchg_32(int *p, int v) {
+; return __sync_lock_test_and_set(p, v);
+;   }
+;   int test_xchg_64(long *p, long v) {
+; return __sync_lock_test_and_set(p, v);
+;   }
+;   int test_cas_32(int *p, int old, int new) {
+; return __sync_val_compare_and_swap(p, old, new);
+;   }
+;   long test_cas_64(long *p, long old, long new) {
+; return __sync_val_compare_and_swap(p, old, new);
+;   }
+
+; CHECK-LABEL: test_load_sub_8
+; CHECK: w0 = atomic_fetch_sub((u8 *)(r1 + 0), w2)
+; CHECK: encoding: [0xd3,0x01,0x00,0x00,0x11,0x02,0x00,0x00]
+define dso_local signext i8 @test_load_sub_8(i8* nocapture %p, i8 signext %v) local_unnamed_addr #0 {
+entry:
+  %0 = atomicrmw sub i8* %p, i8 %v seq_cst
+  ret i8 %0
+}
+
+; CHECK-LABEL: test_load_sub_16
+; CHECK: w0 = atomic_fetch_sub((u16 *)(r1 + 0), w2)
+; CHECK: encoding: [0xcb,0x01,0x00,0x00,0x11,0x02,0x00,0x00]
+define dso_local signext i16 @test_load_sub_16(i16* nocapture %p, i16 signext %v) local_unnamed_addr #0 {
+entry:
+  %0 = atomicrmw sub i16* %p, i16 %v seq_cst
+  ret i16 %0
+}
+
+; CHECK-LABEL: test_load_sub_32
+; CHECK: w0 = atomic_fetch_sub((u32 *)(r1 + 0), w2)
+; CHECK: encoding: [0xc3,0x01,0x00,0x00,0x11,0x02,0x00,0x00]
+define dso_local i32 @test_load_sub_32(i32* nocapture %p, i32 %v) local_unnamed_addr {
+entry:
+  %0 = atomicrmw sub i32* %p, i32 %v seq_cst
+  ret i32 %0
+}
+
+; CHECK-LABEL: test_load_sub_64
+; CHECK: r0 = atomic_fetch_sub((u64 *)(r1 + 0), r2)
+; CHECK: encoding: [0xdb,0x01,0x00,0x00,0x11,0x02,0x00,0x00]
+define dso_local i32 @test_load_sub_64(i64* nocapture %p, i64 %v) local_unnamed_addr {
+entry:
+  %0 = atomicrmw sub i64* %p, i64 %v seq_cst
+  %conv = trunc i64 %0 to i32
+  ret i32 %conv
+}
+
+; CHECK-LABEL: test_xchg_8
+; CHECK: w0 = xchg32_8(r1 + 0, w2)
+; CHECK: encoding: [0xd3,0x01,0x00,0x00,0xe1,0x02,0x00,0x00]
+define dso_local i32 @test_xchg_8(i8* nocapture %p, i8 signext %v) local_unnamed_addr {
+entry:
+  %0 = atomicrmw xchg i8* %p, i8 %v seq_cst
+  %conv = sext i8 %0 to i32
+  ret i32 %conv
+}
+
+; CHECK-LABEL: test_xchg_16
+; CHECK: w0 = xchg32_16(r1 + 0, w2)
+; CHECK: encoding: [0xcb,0x01,0x00,0x00,0xe1,0x02,0x00,0x00]
+define dso_local i32 @test_xchg_16(i16* nocapture %p, i16 signext %v) local_unnamed_addr {
+entry:
+  %0 = atomicrmw xchg i16* %p, i16 %v seq_cst
+  %conv = sext i16 %0 to i32
+  ret i32 %conv
+}
+
+; CHECK-LABEL: test_xchg_32
+; CHECK: w0 = xchg32_32(r1 + 0, w2)
+; CHECK: encoding: [0xc3,0x01,0x00,0x00,0xe1,0x02,0x00,0x00]
+define dso_local i32 @test_xchg_32(i32* nocapture %p, i32 %v) local_unnamed_addr {
+entry:
+  %0 = atomicrmw xchg i32* %p, i32 %v seq_cst
+  ret i32 %0
+}
+
+; CHECK-LABEL: test_xchg_64
+; CHECK: xchg_64(r1 + 0, r2)
+; CHECK: encoding: [0xdb,0x01,0x00,0x00,0xe1,0x02,0x00,0x00]
+define dso_local i32 @test_xchg_64(i64* nocapture %p, i64 %v) local_unnamed_addr {
+entry:
+  %0 = atomicrmw xchg i64* %p, i64 %v seq_cst
+  %conv = trunc i64 %0 to i32
+  ret i32 %conv
+}
+
+; CHECK-LABEL: test_cas_32
+; CHECK: w0 = w2
+; CHECK: w0 = cmpxchg32_32(r1 + 0, w0, w3)
+; CHECK: encoding: [0xc3,0x01,0x00,0x00,0xf1,0x03,0x00,0x00]
+define dso_local i32 @test_cas_32(i32* nocapture %p, i32 %old, i32 %new) local_unnamed_addr {
+entry:
+  %0 = cmpxchg i32* %p, i32 %old, i32 %new