Per the Linux Kernel Memory Model, value-returning atomic RMW operations
must provide sequentially consistent ordering (a full memory barrier). On
LoongArch, plain AMO instructions and bare ll/sc loops do not satisfy this
requirement by themselves.

Update emit_atomic_rmw() to emit barrier-carrying instructions for all
value-returning BPF atomics:

 - BPF_FETCH (ADD/AND/OR/XOR): use am*_db.{b,h,w,d}
 - BPF_XCHG: use amswap_db.{b,h,w,d}
 - BPF_CMPXCHG: emit dbar 0x700 after the ll/sc loop, matching
   __WEAK_LLSC_MB in cmpxchg.h

Add the corresponding instruction encodings and emit helpers to inst.h.
Non-value-returning RMW ops (plain BPF_ADD, BPF_AND, etc.) are left as
weakly ordered, consistent with LKMM.

Signed-off-by: Chenguang Zhao <[email protected]>
---
 arch/loongarch/include/asm/inst.h | 18 +++++++++++++++++
 arch/loongarch/net/bpf_jit.c      | 32 +++++++++++++++++--------------
 2 files changed, 36 insertions(+), 14 deletions(-)

diff --git a/arch/loongarch/include/asm/inst.h 
b/arch/loongarch/include/asm/inst.h
index 76b723590023..bdbc17d07110 100644
--- a/arch/loongarch/include/asm/inst.h
+++ b/arch/loongarch/include/asm/inst.h
@@ -199,6 +199,10 @@ enum reg3_op {
        amswaph_op      = 0x70b9,
        amaddb_op       = 0x70ba,
        amaddh_op       = 0x70bb,
+       amswapdbb_op    = 0x70bc,
+       amswapdbh_op    = 0x70bd,
+       amadddbb_op     = 0x70be,
+       amadddbh_op     = 0x70bf,
        amswapw_op      = 0x70c0,
        amswapd_op      = 0x70c1,
        amaddw_op       = 0x70c2,
@@ -783,6 +787,20 @@ DEF_EMIT_REG3_FORMAT(amswapb, amswapb_op)
 DEF_EMIT_REG3_FORMAT(amswaph, amswaph_op)
 DEF_EMIT_REG3_FORMAT(amswapw, amswapw_op)
 DEF_EMIT_REG3_FORMAT(amswapd, amswapd_op)
+DEF_EMIT_REG3_FORMAT(amswapdbb, amswapdbb_op)
+DEF_EMIT_REG3_FORMAT(amswapdbh, amswapdbh_op)
+DEF_EMIT_REG3_FORMAT(amadddbb, amadddbb_op)
+DEF_EMIT_REG3_FORMAT(amadddbh, amadddbh_op)
+DEF_EMIT_REG3_FORMAT(amadddbw, amadddbw_op)
+DEF_EMIT_REG3_FORMAT(amadddbd, amadddbd_op)
+DEF_EMIT_REG3_FORMAT(amanddbw, amanddbw_op)
+DEF_EMIT_REG3_FORMAT(amanddbd, amanddbd_op)
+DEF_EMIT_REG3_FORMAT(amordbw, amordbw_op)
+DEF_EMIT_REG3_FORMAT(amordbd, amordbd_op)
+DEF_EMIT_REG3_FORMAT(amxordbw, amxordbw_op)
+DEF_EMIT_REG3_FORMAT(amxordbd, amxordbd_op)
+DEF_EMIT_REG3_FORMAT(amswapdbw, amswapdbw_op)
+DEF_EMIT_REG3_FORMAT(amswapdbd, amswapdbd_op)
 
 #define DEF_EMIT_REG3SA2_FORMAT(NAME, OP)                              \
 static inline void emit_##NAME(union loongarch_instruction *insn,      \
diff --git a/arch/loongarch/net/bpf_jit.c b/arch/loongarch/net/bpf_jit.c
index 24913dc7f4e8..47707579e61c 100644
--- a/arch/loongarch/net/bpf_jit.c
+++ b/arch/loongarch/net/bpf_jit.c
@@ -7,6 +7,9 @@
 #include <linux/memory.h>
 #include "bpf_jit.h"
 
+/* dbar hint for ll/sc completion ordering, see __WEAK_LLSC_MB */
+#define DBAR_LLSC_MB   0x700
+
 #define LOONGARCH_MAX_REG_ARGS 8
 
 #define LOONGARCH_LONG_JUMP_NINSNS 5
@@ -418,7 +421,7 @@ static int emit_atomic_rmw(const struct bpf_insn *insn, 
struct jit_ctx *ctx)
                                pr_err_once("bpf-jit: amadd.b instruction is 
not supported\n");
                                return -EINVAL;
                        }
-                       emit_insn(ctx, amaddb, src, t1, t3);
+                       emit_insn(ctx, amadddbb, src, t1, t3);
                        emit_zext_32(ctx, src, true);
                        break;
                case BPF_H:
@@ -426,39 +429,39 @@ static int emit_atomic_rmw(const struct bpf_insn *insn, 
struct jit_ctx *ctx)
                                pr_err_once("bpf-jit: amadd.h instruction is 
not supported\n");
                                return -EINVAL;
                        }
-                       emit_insn(ctx, amaddh, src, t1, t3);
+                       emit_insn(ctx, amadddbh, src, t1, t3);
                        emit_zext_32(ctx, src, true);
                        break;
                case BPF_W:
-                       emit_insn(ctx, amaddw, src, t1, t3);
+                       emit_insn(ctx, amadddbw, src, t1, t3);
                        emit_zext_32(ctx, src, true);
                        break;
                case BPF_DW:
-                       emit_insn(ctx, amaddd, src, t1, t3);
+                       emit_insn(ctx, amadddbd, src, t1, t3);
                        break;
                }
                break;
        case BPF_AND | BPF_FETCH:
                if (isdw) {
-                       emit_insn(ctx, amandd, src, t1, t3);
+                       emit_insn(ctx, amanddbd, src, t1, t3);
                } else {
-                       emit_insn(ctx, amandw, src, t1, t3);
+                       emit_insn(ctx, amanddbw, src, t1, t3);
                        emit_zext_32(ctx, src, true);
                }
                break;
        case BPF_OR | BPF_FETCH:
                if (isdw) {
-                       emit_insn(ctx, amord, src, t1, t3);
+                       emit_insn(ctx, amordbd, src, t1, t3);
                } else {
-                       emit_insn(ctx, amorw, src, t1, t3);
+                       emit_insn(ctx, amordbw, src, t1, t3);
                        emit_zext_32(ctx, src, true);
                }
                break;
        case BPF_XOR | BPF_FETCH:
                if (isdw) {
-                       emit_insn(ctx, amxord, src, t1, t3);
+                       emit_insn(ctx, amxordbd, src, t1, t3);
                } else {
-                       emit_insn(ctx, amxorw, src, t1, t3);
+                       emit_insn(ctx, amxordbw, src, t1, t3);
                        emit_zext_32(ctx, src, true);
                }
                break;
@@ -470,7 +473,7 @@ static int emit_atomic_rmw(const struct bpf_insn *insn, 
struct jit_ctx *ctx)
                                pr_err_once("bpf-jit: amswap.b instruction is 
not supported\n");
                                return -EINVAL;
                        }
-                       emit_insn(ctx, amswapb, src, t1, t3);
+                       emit_insn(ctx, amswapdbb, src, t1, t3);
                        emit_zext_32(ctx, src, true);
                        break;
                case BPF_H:
@@ -478,15 +481,15 @@ static int emit_atomic_rmw(const struct bpf_insn *insn, 
struct jit_ctx *ctx)
                                pr_err_once("bpf-jit: amswap.h instruction is 
not supported\n");
                                return -EINVAL;
                        }
-                       emit_insn(ctx, amswaph, src, t1, t3);
+                       emit_insn(ctx, amswapdbh, src, t1, t3);
                        emit_zext_32(ctx, src, true);
                        break;
                case BPF_W:
-                       emit_insn(ctx, amswapw, src, t1, t3);
+                       emit_insn(ctx, amswapdbw, src, t1, t3);
                        emit_zext_32(ctx, src, true);
                        break;
                case BPF_DW:
-                       emit_insn(ctx, amswapd, src, t1, t3);
+                       emit_insn(ctx, amswapdbd, src, t1, t3);
                        break;
                }
                break;
@@ -509,6 +512,7 @@ static int emit_atomic_rmw(const struct bpf_insn *insn, 
struct jit_ctx *ctx)
                        emit_insn(ctx, beq, t3, LOONGARCH_GPR_ZERO, -6);
                        emit_zext_32(ctx, r0, true);
                }
+               emit_insn(ctx, dbar, DBAR_LLSC_MB);
                break;
        default:
                pr_err_once("bpf-jit: invalid atomic read-modify-write opcode 
%02x\n", imm);
-- 
2.25.1


Reply via email to