Re: [PATCH bpf-next] bpf: add support to read cpu_entry in bpf program

2024-04-29 Thread Yonghong Song



On 4/27/24 8:18 AM, Florian Lehner wrote:

Add new field "cpu_entry" to bpf_perf_event_data which could be read by
bpf programs attached to perf events. The value contains the CPU value
recorded by specifying sample_type with PERF_SAMPLE_CPU when calling
perf_event_open().


You can use bpf_cast_to_kern_ctx kfunc which can cast 'struct 
bpf_perf_event_data'
ctx to 'struct bpf_perf_event_data_kern'.

struct bpf_perf_event_data_kern {
bpf_user_pt_regs_t *regs;
struct perf_sample_data *data;
struct perf_event *event;
};

You can access bpf_perf_event_data_kern->data and then to access 'cpu_entry' 
field.



Signed-off-by: Florian Lehner 
---
  include/uapi/linux/bpf_perf_event.h   |  4 
  kernel/trace/bpf_trace.c  | 13 +
  tools/include/uapi/linux/bpf_perf_event.h |  4 
  3 files changed, 21 insertions(+)

diff --git a/include/uapi/linux/bpf_perf_event.h 
b/include/uapi/linux/bpf_perf_event.h
index eb1b9d21250c..4856b4396ece 100644
--- a/include/uapi/linux/bpf_perf_event.h
+++ b/include/uapi/linux/bpf_perf_event.h
@@ -14,6 +14,10 @@ struct bpf_perf_event_data {
bpf_user_pt_regs_t regs;
__u64 sample_period;
__u64 addr;
+   struct {
+   u32 cpu;
+   u32 reserved;
+   }   cpu_entry;
  };
  
  #endif /* _UAPI__LINUX_BPF_PERF_EVENT_H__ */

diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index afb232b1d7c2..2b303221af5c 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -2176,6 +2176,11 @@ static bool pe_prog_is_valid_access(int off, int size, 
enum bpf_access_type type
if (!bpf_ctx_narrow_access_ok(off, size, size_u64))
return false;
break;
+   case bpf_ctx_range(struct bpf_perf_event_data, cpu_entry):
+   bpf_ctx_record_field_size(info, size_u64);
+   if (!bpf_ctx_narrow_access_ok(off, size, size_u64))
+   return false;
+   break;
default:
if (size != sizeof(long))
return false;
@@ -2208,6 +2213,14 @@ static u32 pe_prog_convert_ctx_access(enum 
bpf_access_type type,
  bpf_target_off(struct perf_sample_data, 
addr, 8,
 target_size));
break;
+   case offsetof(struct bpf_perf_event_data, cpu_entry):
+   *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct 
bpf_perf_event_data_kern,
+  data), si->dst_reg, 
si->src_reg,
+ offsetof(struct bpf_perf_event_data_kern, 
data));
+   *insn++ = BPF_LDX_MEM(BPF_DW, si->dst_reg, si->dst_reg,
+ bpf_target_off(struct perf_sample_data, 
cpu_entry, 8,
+target_size));
+   break;
default:
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct 
bpf_perf_event_data_kern,
   regs), si->dst_reg, 
si->src_reg,
diff --git a/tools/include/uapi/linux/bpf_perf_event.h 
b/tools/include/uapi/linux/bpf_perf_event.h
index eb1b9d21250c..4856b4396ece 100644
--- a/tools/include/uapi/linux/bpf_perf_event.h
+++ b/tools/include/uapi/linux/bpf_perf_event.h
@@ -14,6 +14,10 @@ struct bpf_perf_event_data {
bpf_user_pt_regs_t regs;
__u64 sample_period;
__u64 addr;
+   struct {
+   u32 cpu;
+   u32 reserved;
+   }   cpu_entry;
  };
  
  #endif /* _UAPI__LINUX_BPF_PERF_EVENT_H__ */




Re: [syzbot] [bpf?] [trace?] WARNING in group_send_sig_info

2024-04-29 Thread Yonghong Song



On 4/27/24 9:34 AM, syzbot wrote:

Hello,

syzbot found the following issue on:

HEAD commit:443574b03387 riscv, bpf: Fix kfunc parameters incompatibil..
git tree:   bpf
console output: https://syzkaller.appspot.com/x/log.txt?x=11ca8fe718
kernel config:  https://syzkaller.appspot.com/x/.config?x=6fb1be60a193d440
dashboard link: https://syzkaller.appspot.com/bug?extid=1902c6d326478ce2dfb0
compiler:   Debian clang version 15.0.6, GNU ld (GNU Binutils for Debian) 
2.40

Unfortunately, I don't have any reproducer for this issue yet.

Downloadable assets:
disk image: 
https://storage.googleapis.com/syzbot-assets/3f355021a085/disk-443574b0.raw.xz
vmlinux: 
https://storage.googleapis.com/syzbot-assets/44cf4de7472a/vmlinux-443574b0.xz
kernel image: 
https://storage.googleapis.com/syzbot-assets/a99a36c7ad65/bzImage-443574b0.xz

IMPORTANT: if you fix the issue, please add the following tag to the commit:
Reported-by: syzbot+1902c6d326478ce2d...@syzkaller.appspotmail.com

[ cut here ]
raw_local_irq_restore() called with IRQs enabled
WARNING: CPU: 1 PID: 7785 at kernel/locking/irqflag-debug.c:10 
warn_bogus_irq_restore+0x29/0x40 kernel/locking/irqflag-debug.c:10
Modules linked in:
CPU: 1 PID: 7785 Comm: syz-executor.3 Not tainted 
6.8.0-syzkaller-05236-g443574b03387 #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 
03/27/2024
RIP: 0010:warn_bogus_irq_restore+0x29/0x40 kernel/locking/irqflag-debug.c:10
Code: 90 f3 0f 1e fa 90 80 3d de 59 01 04 00 74 06 90 c3 cc cc cc cc c6 05 cf 59 01 
04 01 90 48 c7 c7 20 ba aa 8b e8 f8 d5 e7 f5 90 <0f> 0b 90 90 90 c3 cc cc cc cc 
66 2e 0f 1f 84 00 00 00 00 00 0f 1f
RSP: 0018:c9000399fbb8 EFLAGS: 00010246

RAX: 4aede97b00455d00 RBX: 192000733f7c RCX: 88802a129e00
RDX:  RSI:  RDI: 
RBP: c9000399fc50 R08: 8157cc12 R09: 1110172a51a2
R10: dc00 R11: ed10172a51a3 R12: dc00
R13: 192000733f78 R14: c9000399fbe0 R15: 0246
FS:  7ae76480() GS:8880b950() knlGS:
CS:  0010 DS:  ES:  CR0: 80050033
CR2: 7ffc27e190f8 CR3: 6cb5 CR4: 003506f0
DR0:  DR1:  DR2: 
DR3:  DR6: fffe0ff0 DR7: 0400
Call Trace:
  
  __raw_spin_unlock_irqrestore include/linux/spinlock_api_smp.h:151 [inline]
  _raw_spin_unlock_irqrestore+0x120/0x140 kernel/locking/spinlock.c:194
  spin_unlock_irqrestore include/linux/spinlock.h:406 [inline]
  unlock_task_sighand include/linux/sched/signal.h:754 [inline]
  do_send_sig_info kernel/signal.c:1302 [inline]
  group_send_sig_info+0x2e0/0x310 kernel/signal.c:1453
  bpf_send_signal_common+0x2dd/0x430 kernel/trace/bpf_trace.c:881
  bpf_send_signal kernel/trace/bpf_trace.c:886 [inline]
  bpf_send_signal+0x19/0x30 kernel/trace/bpf_trace.c:884
  bpf_prog_8cc4ff36b5985b6a+0x1d/0x1f
  bpf_dispatcher_nop_func include/linux/bpf.h:1234 [inline]
  __bpf_prog_run include/linux/filter.h:650 [inline]
  bpf_prog_run include/linux/filter.h:664 [inline]
  __bpf_trace_run kernel/trace/bpf_trace.c:2381 [inline]
  bpf_trace_run2+0x375/0x420 kernel/trace/bpf_trace.c:2420
  trace_sys_exit include/trace/events/syscalls.h:44 [inline]
  syscall_exit_work+0x153/0x170 kernel/entry/common.c:163
  syscall_exit_to_user_mode_prepare kernel/entry/common.c:194 [inline]
  __syscall_exit_to_user_mode_work kernel/entry/common.c:199 [inline]
  syscall_exit_to_user_mode+0x273/0x360 kernel/entry/common.c:212
  do_syscall_64+0x10a/0x240 arch/x86/entry/common.c:89
  entry_SYSCALL_64_after_hwframe+0x6d/0x75


The following are related functions.

struct sighand_struct *__lock_task_sighand(struct task_struct *tsk,
   unsigned long *flags)
{
struct sighand_struct *sighand;

rcu_read_lock();
for (;;) {
sighand = rcu_dereference(tsk->sighand);
if (unlikely(sighand == NULL))
break;

/*
 * This sighand can be already freed and even reused, but
 * we rely on SLAB_TYPESAFE_BY_RCU and sighand_ctor() which
 * initializes ->siglock: this slab can't go away, it has
 * the same object type, ->siglock can't be reinitialized.
 *
 * We need to ensure that tsk->sighand is still the same
 * after we take the lock, we can race with de_thread() or
 * __exit_signal(). In the latter case the next iteration
 * must see ->sighand == NULL.
 */
spin_lock_irqsave(>siglock, *flags);
if (likely(sighand == rcu_access_pointer(tsk->sighand)))
break;
spin_unlock_irqrestore(>siglock, *flags);
}
rcu_read_unlock();


Re: BUG: unable to handle kernel paging request in bpf_probe_read_compat_str

2023-12-20 Thread Yonghong Song



On 12/20/23 1:19 AM, Hou Tao wrote:

Hi,

On 12/14/2023 11:40 AM, xingwei lee wrote:

Hello I found a bug in net/bpf in the lastest upstream linux and
comfired in the lastest net tree and lastest net bpf titled BUG:
unable to handle kernel paging request in bpf_probe_read_compat_str

If you fix this issue, please add the following tag to the commit:
Reported-by: xingwei Lee 

kernel: net 9702817384aa4a3700643d0b26e71deac0172cfd / bpf
2f2fee2bf74a7e31d06fc6cb7ba2bd4dd7753c99
Kernel config: 
https://syzkaller.appspot.com/text?tag=KernelConfig=b50bd31249191be8

in the lastest bpf tree, the crash like:

TITLE: BUG: unable to handle kernel paging request in bpf_probe_read_compat_str
CORRUPTED: false ()
MAINTAINERS (TO): [a...@linux-foundation.org linux...@kvack.org]
MAINTAINERS (CC): [linux-kernel@vger.kernel.org]

BUG: unable to handle page fault for address: ff0

Thanks for the report and reproducer. The output is incomplete. It
should be: "BUG: unable to handle page fault for address:
ff60". The address is a vsyscall address, so
handle_page_fault() considers that the fault address is in userspace
instead of kernel space, and there will be no fix-up for the exception
and oops happened. Will post a fix and a selftest for it.


There is a proposed fix here:

https://lore.kernel.org/bpf/87r0jwquhv.ffs@tglx/

Not sure the fix in the above link is merged to some upstream branch or not.


#PF: supervisor read access in kernel mode
#PF: error_code(0x) - not-present page
PGD cf7a067 P4D cf7a067 PUD cf7c067 PMD cf9f067 0
Oops:  [#1] PREEMPT SMP KASAN
CPU: 1 PID: 8219 Comm: 9de Not tainted 6.7.0-rc41
Hardware name: QEMU Standard PC (i440FX + PIIX, 4
RIP: 0010:strncpy_from_kernel_nofault+0xc4/0x270 mm/maccess.c:91
Code: 83 85 6c 17 00 00 01 48 8b 2c 24 eb 18 e8 0
RSP: 0018:c900114e7ac0 EFLAGS: 00010293
RAX:  RBX: c900114e7b30 RCX:2
RDX: 8880183abcc0 RSI: 81b8c9c4 RDI:c
RBP: ff60 R08: 0001 R09:0
R10: 0001 R11: 0001 R12:8
R13: ff60 R14: 0008 R15:0
FS:  () GS:88823bc0(0
CS:  0010 DS:  ES:  CR0: 80050033
CR2: ff60 CR3: 0cf77000 CR4:0
PKRU: 5554
Call Trace:

bpf_probe_read_kernel_str_common kernel/trace/bpf_trace.c:262 [inline]
bpf_probe_read_compat_str kernel/trace/bpf_trace.c:310 [inline]
bpf_probe_read_compat_str+0x12f/0x170 kernel/trace/bpf_trace.c:303
bpf_prog_f17ebaf3f5f7baf8+0x42/0x44
bpf_dispatcher_nop_func include/linux/bpf.h:1196 [inline]
__bpf_prog_run include/linux/filter.h:651 [inline]
bpf_prog_run include/linux/filter.h:658 [inline]
__bpf_trace_run kernel/trace/bpf_trace.c:2307 [inline]
bpf_trace_run2+0x14e/0x410 kernel/trace/bpf_trace.c:2346
trace_kfree include/trace/events/kmem.h:94 [inline]
kfree+0xec/0x150 mm/slab_common.c:1043
vma_numab_state_free include/linux/mm.h:638 [inline]
__vm_area_free+0x3e/0x140 kernel/fork.c:525
remove_vma+0x128/0x170 mm/mmap.c:146
exit_mmap+0x453/0xa70 mm/mmap.c:3332
__mmput+0x12a/0x4d0 kernel/fork.c:1349
mmput+0x62/0x70 kernel/fork.c:1371
exit_mm kernel/exit.c:567 [inline]
do_exit+0x9aa/0x2ac0 kernel/exit.c:858
do_group_exit+0xd4/0x2a0 kernel/exit.c:1021
__do_sys_exit_group kernel/exit.c:1032 [inline]
__se_sys_exit_group kernel/exit.c:1030 [inline]
__x64_sys_exit_group+0x3e/0x50 kernel/exit.c:1030
do_syscall_x64 arch/x86/entry/common.c:52 [inline]
do_syscall_64+0x41/0x110 arch/x86/entry/common.c:83
entry_SYSCALL_64_after_hwframe+0x63/0x6b


=* repro.c =*
// autogenerated by syzkaller (https://github.com/google/syzkaller)

#define _GNU_SOURCE

#include 
#include 
#include 
#include 
#include 
#include 
#include 
#include 

#ifndef __NR_bpf
#define __NR_bpf 321
#endif

#define BITMASK(bf_off, bf_len) (((1ull << (bf_len)) - 1) << (bf_off))
#define STORE_BY_BITMASK(type, htobe, addr, val, bf_off, bf_len) \
  *(type*)(addr) =   \
  htobe((htobe(*(type*)(addr)) & ~BITMASK((bf_off), (bf_len))) | \
(((type)(val) << (bf_off)) & BITMASK((bf_off), (bf_len

uint64_t r[1] = {0x};

int main(void) {
  syscall(__NR_mmap, /*addr=*/0x1000ul, /*len=*/0x1000ul, /*prot=*/0ul,
  /*flags=*/0x32ul, /*fd=*/-1, /*offset=*/0ul);
  syscall(__NR_mmap, /*addr=*/0x2000ul, /*len=*/0x100ul, /*prot=*/7ul,
  /*flags=*/0x32ul, /*fd=*/-1, /*offset=*/0ul);
  syscall(__NR_mmap, /*addr=*/0x2100ul, /*len=*/0x1000ul, /*prot=*/0ul,
  /*flags=*/0x32ul, /*fd=*/-1, /*offset=*/0ul);
  intptr_t res = 0;
  *(uint32_t*)0x20c0 = 0x11;
  *(uint32_t*)0x20c4 = 0xb;
  *(uint64_t*)0x20c8 = 0x2180;
  *(uint8_t*)0x2180 = 0x18;
  STORE_BY_BITMASK(uint8_t, , 0x2181, 0, 0, 4);
  STORE_BY_BITMASK(uint8_t, , 0x2181, 0, 4, 4);
  *(uint16_t*)0x2182 = 0;
  *(uint32_t*)0x2184 = 0;
  *(uint8_t*)0x2188 = 0;
  *(uint8_t*)0x2189 = 0;
  *(uint16_t*)0x218a = 0;
  

Re: [PATCH net] bpf: test_run: fix WARNING in format_decode

2023-11-21 Thread Yonghong Song



On 11/21/23 7:50 PM, Edward Adam Davis wrote:

Confirm that skb->len is not 0 to ensure that skb length is valid.

Fixes: 114039b34201 ("bpf: Move skb->len == 0 checks into __bpf_redirect")
Reported-by: syzbot+e2c932aec5c8a6e1d...@syzkaller.appspotmail.com
Signed-off-by: Edward Adam Davis 


Stan, Could you take a look at this patch?



---
  net/bpf/test_run.c | 3 +++
  1 file changed, 3 insertions(+)

diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index c9fdcc5cdce1..78258a822a5c 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -845,6 +845,9 @@ static int convert___skb_to_skb(struct sk_buff *skb, struct 
__sk_buff *__skb)
  {
struct qdisc_skb_cb *cb = (struct qdisc_skb_cb *)skb->cb;
  
+	if (!skb->len)

+   return -EINVAL;
+
if (!__skb)
return 0;
  




Re: [PATCH] bpf: Fix backport of "bpf: restrict unknown scalars of mixed signed bounds for unprivileged"

2021-04-19 Thread Yonghong Song




On 4/19/21 4:56 PM, Samuel Mendoza-Jonas wrote:

The 4.14 backport of 9d7eceede ("bpf: restrict unknown scalars of mixed
signed bounds for unprivileged") adds the PTR_TO_MAP_VALUE check to the
wrong location in adjust_ptr_min_max_vals(), most likely because 4.14
doesn't include the commit that updates the if-statement to a
switch-statement (aad2eeaf4 "bpf: Simplify ptr_min_max_vals adjustment").

Move the check to the proper location in adjust_ptr_min_max_vals().

Fixes: 17efa65350c5a ("bpf: restrict unknown scalars of mixed signed bounds for 
unprivileged")
Signed-off-by: Samuel Mendoza-Jonas 
Reviewed-by: Frank van der Linden 
Reviewed-by: Ethan Chen 


Just to be clear, the patch is for 4.14 stable branch.

Acked-by: Yonghong Song 


Re: [PATCH v2] tools: do not include scripts/Kbuild.include

2021-04-16 Thread Yonghong Song




On 4/16/21 6:00 AM, Masahiro Yamada wrote:

Since commit d9f4ff50d2aa ("kbuild: spilt cc-option and friends to
scripts/Makefile.compiler"), some kselftests fail to build.

The tools/ directory opted out Kbuild, and went in a different
direction. They copy any kind of files to the tools/ directory
in order to do whatever they want in their world.

tools/build/Build.include mimics scripts/Kbuild.include, but some
tool Makefiles included the Kbuild one to import a feature that is
missing in tools/build/Build.include:

  - Commit ec04aa3ae87b ("tools/thermal: tmon: use "-fstack-protector"
only if supported") included scripts/Kbuild.include from
tools/thermal/tmon/Makefile to import the cc-option macro.

  - Commit c2390f16fc5b ("selftests: kvm: fix for compilers that do
not support -no-pie") included scripts/Kbuild.include from
tools/testing/selftests/kvm/Makefile to import the try-run macro.

  - Commit 9cae4ace80ef ("selftests/bpf: do not ignore clang
failures") included scripts/Kbuild.include from
tools/testing/selftests/bpf/Makefile to import the .DELETE_ON_ERROR
target.

  - Commit 0695f8bca93e ("selftests/powerpc: Handle Makefile for
unrecognized option") included scripts/Kbuild.include from
tools/testing/selftests/powerpc/pmu/ebb/Makefile to import the
try-run macro.

Copy what they need into tools/build/Build.include, and make them
include it instead of scripts/Kbuild.include.

Link: 
https://lore.kernel.org/lkml/86dadf33-70f7-a5ac-cb8c-64966d2f4...@linux.ibm.com/
Fixes: d9f4ff50d2aa ("kbuild: spilt cc-option and friends to 
scripts/Makefile.compiler")
Reported-by: Janosch Frank 
Reported-by: Christian Borntraeger 
Signed-off-by: Masahiro Yamada 


LGTM although I see some tools Makefile directly added 
".DELETE_ON_ERROR:" in their Makefile.


Acked-by: Yonghong Song 


Re: 5.?? regression: strace testsuite OOpses kernel on ia64

2021-04-10 Thread Yonghong Song




On 4/9/21 2:20 PM, Sergei Trofimovich wrote:

On Tue, 23 Feb 2021 18:53:21 +
Sergei Trofimovich  wrote:


The crash seems to be related to sock_filter-v test from strace:
 https://github.com/strace/strace/blob/master/tests/seccomp-filter-v.c

Here is an OOps:

[  818.089904] BUG: Bad page map in process sock_filter-v  pte:0001 
pmd:118580001
[  818.089904] page:e6a429c8 refcount:1 mapcount:-1 
mapping: index:0x0 pfn:0x0
[  818.089904] flags: 0x1000(reserved)
[  818.089904] raw: 1000 a0004008 a0004008 

[  818.089904] raw:   0001fffe
[  818.089904] page dumped because: bad pte
[  818.089904] addr: vm_flags:04044011 
anon_vma: mapping: index:0
[  818.095483] file:(null) fault:0x0 mmap:0x0 readpage:0x0
[  818.095483] CPU: 0 PID: 5990 Comm: sock_filter-v Not tainted 
5.11.0-3-gbfa5a4929c90 #57
[  818.095483] Hardware name: hp server rx3600   , BIOS 04.03   
 04/08/2008
[  818.095483]
[  818.095483] Call Trace:
[  818.095483]  [] show_stack+0x90/0xc0
[  818.095483] sp=e00118707bb0 
bsp=e001187013c0
[  818.095483]  [] dump_stack+0x120/0x160
[  818.095483] sp=e00118707d80 
bsp=e00118701348
[  818.095483]  [] print_bad_pte+0x300/0x3a0
[  818.095483] sp=e00118707d80 
bsp=e001187012e0
[  818.099483]  [] unmap_page_range+0xa90/0x11a0
[  818.099483] sp=e00118707d80 
bsp=e00118701140
[  818.099483]  [] unmap_vmas+0xc0/0x100
[  818.099483] sp=e00118707da0 
bsp=e00118701108
[  818.099483]  [] exit_mmap+0x150/0x320
[  818.099483] sp=e00118707da0 
bsp=e001187010d8
[  818.099483]  [] mmput+0x60/0x200
[  818.099483] sp=e00118707e20 
bsp=e001187010b0
[  818.103482]  [] do_exit+0x6f0/0x18a0
[  818.103482] sp=e00118707e20 
bsp=e00118701038
[  818.103482]  [] do_group_exit+0x90/0x2a0
[  818.103482] sp=e00118707e30 
bsp=e00118700ff0
[  818.103482]  [] sys_exit_group+0x20/0x40
[  818.103482] sp=e00118707e30 
bsp=e00118700f98
[  818.107482]  [] ia64_trace_syscall+0xf0/0x130
[  818.107482] sp=e00118707e30 
bsp=e00118700f98
[  818.107482]  [] ia64_ivt+0x00040720/0x400
[  818.107482] sp=e00118708000 
bsp=e00118700f98
[  818.115482] Disabling lock debugging due to kernel taint
[  818.115482] BUG: Bad rss-counter state mm:2eec6412 type:MM_FILEPAGES 
val:-1
[  818.132256] Unable to handle kernel NULL pointer dereference (address 
0068)
[  818.133904] sock_filter-v-X[5999]: Oops 11012296146944 [1]
[  818.133904] Modules linked in: acpi_ipmi ipmi_si usb_storage e1000 
ipmi_devintf ipmi_msghandler rtc_efi
[  818.133904]
[  818.133904] CPU: 0 PID: 5999 Comm: sock_filter-v-X Tainted: GB   
  5.11.0-3-gbfa5a4929c90 #57
[  818.133904] Hardware name: hp server rx3600   , BIOS 04.03   
 04/08/2008
[  818.133904] psr : 121008026010 ifs : 8288 ip  : 
[]Tainted: GB (5.11.0-3-gbfa5a4929c90)
[  818.133904] ip is at bpf_prog_free+0x21/0xe0
[  818.133904] unat:  pfs : 0307 rsc : 
0003
[  818.133904] rnat:  bsps:  pr  : 
00106a5a51665965
[  818.133904] ldrs:  ccv : 12088904 fpsr: 
0009804c8a70033f
[  818.133904] csd :  ssd : 
[  818.133904] b0  : a00100d54080 b6  : a00100d53fe0 b7  : 
a001cef0
[  818.133904] f6  : 0ffefb0c50daa1b67f89a f7  : 0ffed8b3e4fdb0800
[  818.133904] f8  : 10017fbd1bc00 f9  : 1000eb95f
[  818.133904] f10 : 10008ade20716a6c83cc1 f11 : 1003e02b7
[  818.133904] r1  : a0010176b300 r2  : a0028004 r3  : 

[  818.133904] r8  : 0008 r9  : e0011873f800 r10 : 
e00102c18600
[  818.133904] r11 : e00102c19600 r12 : e0011873f7f0 r13 : 
e00118738000
[  818.133904] r14 : 0068 r15 : a0028028 r16 : 
e5606a70
[  818.133904] r17 : e00102c18600 r18 : e00104370748 r19 : 
e00102c18600
[  818.133904] r20 : e00102c18600 r21 : e5606a78 r22 : 
a0010156bd28
[  818.133904] r23 : a0010147fdf4 r24 : 4000 r25 : 
e00104370750
[  818.133904] r26 : a001012f7088 r27 : a00100d53fe0 r28 : 
0001
[  818.133904] r29 : e0011873f800 r30 : e0011873f810 r31 : 

Re: [syzbot] WARNING in bpf_test_run

2021-04-01 Thread Yonghong Song




On 4/1/21 3:05 PM, Yonghong Song wrote:



On 4/1/21 4:29 AM, syzbot wrote:

Hello,

syzbot found the following issue on:

HEAD commit:    36e79851 libbpf: Preserve empty DATASEC BTFs during 
static..

git tree:   bpf-next
console output: 
https://syzkaller.appspot.com/x/log.txt?x=1569bb06d0 
kernel config:  
https://syzkaller.appspot.com/x/.config?x=7eff0f22b8563a5f 
dashboard link: 
https://syzkaller.appspot.com/bug?extid=774c590240616eaa3423 
syz repro:  
https://syzkaller.appspot.com/x/repro.syz?x=17556b7cd0 
C reproducer:   
https://syzkaller.appspot.com/x/repro.c?x=1772be26d0 


The issue was bisected to:

commit 997acaf6b4b59c6a9c259740312a69ea549cc684
Author: Mark Rutland 
Date:   Mon Jan 11 15:37:07 2021 +

 lockdep: report broken irq restoration

bisection log:  
https://syzkaller.appspot.com/x/bisect.txt?x=10197016d0 
final oops: 
https://syzkaller.appspot.com/x/report.txt?x=12197016d0 
console output: 
https://syzkaller.appspot.com/x/log.txt?x=14197016d0 

IMPORTANT: if you fix the issue, please add the following tag to the 
commit:

Reported-by: syzbot+774c590240616eaa3...@syzkaller.appspotmail.com
Fixes: 997acaf6b4b5 ("lockdep: report broken irq restoration")

[ cut here ]
WARNING: CPU: 0 PID: 8725 at include/linux/bpf-cgroup.h:193 
bpf_cgroup_storage_set include/linux/bpf-cgroup.h:193 [inline]
WARNING: CPU: 0 PID: 8725 at include/linux/bpf-cgroup.h:193 
bpf_test_run+0x65e/0xaa0 net/bpf/test_run.c:109


I will look at this issue. Thanks!


Modules linked in:
CPU: 0 PID: 8725 Comm: syz-executor927 Not tainted 
5.12.0-rc4-syzkaller #0
Hardware name: Google Google Compute Engine/Google Compute Engine, 
BIOS Google 01/01/2011

RIP: 0010:bpf_cgroup_storage_set include/linux/bpf-cgroup.h:193 [inline]
RIP: 0010:bpf_test_run+0x65e/0xaa0 net/bpf/test_run.c:109
Code: e9 29 fe ff ff e8 b2 9d 3a fa 41 83 c6 01 bf 08 00 00 00 44 89 
f6 e8 51 a5 3a fa 41 83 fe 08 0f 85 74 fc ff ff e8 92 9d 3a fa <0f> 0b 
bd f0  ff e9 5c fd ff ff e8 81 9d 3a fa 83 c5 01 bf 08

RSP: 0018:c900017bfaf0 EFLAGS: 00010293
RAX:  RBX: c9f29000 RCX: 
RDX: 88801bc68000 RSI: 8739543e RDI: 0003
RBP: 0007 R08: 0008 R09: 0001
R10: 8739542f R11:  R12: dc00
R13: 888021dd54c0 R14: 0008 R15: 
FS:  7f00157d7700() GS:8880b9c0() 
knlGS:

CS:  0010 DS:  ES:  CR0: 80050033
CR2: 7f0015795718 CR3: 157ae000 CR4: 001506f0
DR0:  DR1:  DR2: 
DR3:  DR6: fffe0ff0 DR7: 0400
Call Trace:
  bpf_prog_test_run_skb+0xabc/0x1c70 net/bpf/test_run.c:628
  bpf_prog_test_run kernel/bpf/syscall.c:3132 [inline]
  __do_sys_bpf+0x218b/0x4f40 kernel/bpf/syscall.c:4411
  do_syscall_64+0x2d/0x70 arch/x86/entry/common.c:46


Run on my qemu (4 cpus) with C reproducer and I cannot reproduce the 
result. It already ran 30 minutes and still running. Checked the code, 
it is just doing a lot of parallel bpf_prog_test_run's.


The failure is in the below WARN_ON_ONCE code:

175 static inline int bpf_cgroup_storage_set(struct bpf_cgroup_storage
176 
*storage[MAX_BPF_CGROUP_STORAGE_TYPE])

177 {
178 enum bpf_cgroup_storage_type stype;
179 int i, err = 0;
180
181 preempt_disable();
182 for (i = 0; i < BPF_CGROUP_STORAGE_NEST_MAX; i++) {
183 if 
(unlikely(this_cpu_read(bpf_cgroup_storage_info[i].task) != NULL))

184 continue;
185
186 this_cpu_write(bpf_cgroup_storage_info[i].task, 
current);

187 for_each_cgroup_storage_type(stype)
188 
this_cpu_write(bpf_cgroup_storage_info[i].storage[stype],

189storage[stype]);
190 goto out;
191 }
192 err = -EBUSY;
193 WARN_ON_ONCE(1);
194
195 out:
196 preempt_enable();
197 return err;
198 }

Basically it shows the stress test triggered a warning due to
limited kernel resource.


  entry_SYSCALL_64_after_hwframe+0x44/0xae
RIP: 0033:0x446199
Code: 28 00 00 00 75 05 48 83 c4 28 c3 e8 11 15 00 00 90 48 89 f8 48 
89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 
01 f0  73 01 c3 48 c7 c1 b8 ff ff ff f7 d8 64 89 01 48

RSP: 002b:7f00157d72f8 EFLAGS: 0246 ORIG_RAX: 0141
RAX: ffda RBX: 004cb440 RCX: 00446199
RDX: 0028 RSI: 2080 RDI: 000a
RBP: 0049b074 R08:  R09: 
R10:  R11: 0246 R12: f9abde7200f522cd
R13: 3952ddf3af240c07 R14: 1631e0d82d3fa99d R15: 004cb448


---
This report is generated by a bot. It may contain errors.
See 
https://goo.gl/tpsmEJ   
f

Re: [syzbot] WARNING in bpf_test_run

2021-04-01 Thread Yonghong Song




On 4/1/21 4:29 AM, syzbot wrote:

Hello,

syzbot found the following issue on:

HEAD commit:36e79851 libbpf: Preserve empty DATASEC BTFs during static..
git tree:   bpf-next
console output: https://syzkaller.appspot.com/x/log.txt?x=1569bb06d0
kernel config:  https://syzkaller.appspot.com/x/.config?x=7eff0f22b8563a5f
dashboard link: https://syzkaller.appspot.com/bug?extid=774c590240616eaa3423
syz repro:  https://syzkaller.appspot.com/x/repro.syz?x=17556b7cd0
C reproducer:   https://syzkaller.appspot.com/x/repro.c?x=1772be26d0

The issue was bisected to:

commit 997acaf6b4b59c6a9c259740312a69ea549cc684
Author: Mark Rutland 
Date:   Mon Jan 11 15:37:07 2021 +

 lockdep: report broken irq restoration

bisection log:  https://syzkaller.appspot.com/x/bisect.txt?x=10197016d0
final oops: https://syzkaller.appspot.com/x/report.txt?x=12197016d0
console output: https://syzkaller.appspot.com/x/log.txt?x=14197016d0

IMPORTANT: if you fix the issue, please add the following tag to the commit:
Reported-by: syzbot+774c590240616eaa3...@syzkaller.appspotmail.com
Fixes: 997acaf6b4b5 ("lockdep: report broken irq restoration")

[ cut here ]
WARNING: CPU: 0 PID: 8725 at include/linux/bpf-cgroup.h:193 
bpf_cgroup_storage_set include/linux/bpf-cgroup.h:193 [inline]
WARNING: CPU: 0 PID: 8725 at include/linux/bpf-cgroup.h:193 
bpf_test_run+0x65e/0xaa0 net/bpf/test_run.c:109


I will look at this issue. Thanks!


Modules linked in:
CPU: 0 PID: 8725 Comm: syz-executor927 Not tainted 5.12.0-rc4-syzkaller #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 
01/01/2011
RIP: 0010:bpf_cgroup_storage_set include/linux/bpf-cgroup.h:193 [inline]
RIP: 0010:bpf_test_run+0x65e/0xaa0 net/bpf/test_run.c:109
Code: e9 29 fe ff ff e8 b2 9d 3a fa 41 83 c6 01 bf 08 00 00 00 44 89 f6 e8 51 a5 3a 
fa 41 83 fe 08 0f 85 74 fc ff ff e8 92 9d 3a fa <0f> 0b bd f0 ff ff ff e9 5c fd 
ff ff e8 81 9d 3a fa 83 c5 01 bf 08
RSP: 0018:c900017bfaf0 EFLAGS: 00010293
RAX:  RBX: c9f29000 RCX: 
RDX: 88801bc68000 RSI: 8739543e RDI: 0003
RBP: 0007 R08: 0008 R09: 0001
R10: 8739542f R11:  R12: dc00
R13: 888021dd54c0 R14: 0008 R15: 
FS:  7f00157d7700() GS:8880b9c0() knlGS:
CS:  0010 DS:  ES:  CR0: 80050033
CR2: 7f0015795718 CR3: 157ae000 CR4: 001506f0
DR0:  DR1:  DR2: 
DR3:  DR6: fffe0ff0 DR7: 0400
Call Trace:
  bpf_prog_test_run_skb+0xabc/0x1c70 net/bpf/test_run.c:628
  bpf_prog_test_run kernel/bpf/syscall.c:3132 [inline]
  __do_sys_bpf+0x218b/0x4f40 kernel/bpf/syscall.c:4411
  do_syscall_64+0x2d/0x70 arch/x86/entry/common.c:46
  entry_SYSCALL_64_after_hwframe+0x44/0xae
RIP: 0033:0x446199
Code: 28 00 00 00 75 05 48 83 c4 28 c3 e8 11 15 00 00 90 48 89 f8 48 89 f7 48 89 d6 
48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 
c7 c1 b8 ff ff ff f7 d8 64 89 01 48
RSP: 002b:7f00157d72f8 EFLAGS: 0246 ORIG_RAX: 0141
RAX: ffda RBX: 004cb440 RCX: 00446199
RDX: 0028 RSI: 2080 RDI: 000a
RBP: 0049b074 R08:  R09: 
R10:  R11: 0246 R12: f9abde7200f522cd
R13: 3952ddf3af240c07 R14: 1631e0d82d3fa99d R15: 004cb448


---
This report is generated by a bot. It may contain errors.
See https://goo.gl/tpsmEJ  for more information about syzbot.
syzbot engineers can be reached at syzkal...@googlegroups.com.

syzbot will keep track of this issue. See:
https://goo.gl/tpsmEJ#status  for how to communicate with syzbot.
For information about bisection process see: https://goo.gl/tpsmEJ#bisection
syzbot can test patches for this issue, for details see:
https://goo.gl/tpsmEJ#testing-patches



Re: linux-next: manual merge of the net-next tree with the net tree

2021-03-19 Thread Yonghong Song




On 3/19/21 12:21 AM, Daniel Borkmann wrote:

On 3/19/21 3:11 AM, Piotr Krysiuk wrote:

Hi Daniel,

On Fri, Mar 19, 2021 at 12:16 AM Stephen Rothwell 
wrote:


diff --cc kernel/bpf/verifier.c
index 44e4ec1640f1,f9096b049cd6..
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@@ -5876,10 -6056,22 +6060,23 @@@ static int 
retrieve_ptr_limit(const str

 if (mask_to_left)
 *ptr_limit = MAX_BPF_STACK + off;
 else
  -  *ptr_limit = -off;
  -  return 0;
  +  *ptr_limit = -off - 1;
  +  return *ptr_limit >= max ? -ERANGE : 0;
+   case PTR_TO_MAP_KEY:
+   /* Currently, this code is not exercised as the only use
+    * is bpf_for_each_map_elem() helper which requires
+    * bpf_capble. The code has been tested manually for
+    * future use.
+    */
+   if (mask_to_left) {
+   *ptr_limit = ptr_reg->umax_value + ptr_reg->off;
+   } else {
+   off = ptr_reg->smin_value + ptr_reg->off;
+   *ptr_limit = ptr_reg->map_ptr->key_size - off;
+   }
+   return 0;



PTR_TO_MAP_VALUE logic above looks like copy-paste of old 
PTR_TO_MAP_VALUE

code from before "bpf: Fix off-by-one for area size in creating mask to
left" and is apparently affected by the same off-by-one, except this time
on "key_size" area and not "value_size".

This needs to be fixed in the same way as we did with PTR_TO_MAP_VALUE.
What is the best way to proceed?


Hm, not sure why PTR_TO_MAP_KEY was added by 69c087ba6225 in the first 
place, I
presume noone expects this to be used from unprivileged as the comment 
says.
Resolution should be to remove the PTR_TO_MAP_KEY case entirely from 
that switch

until we have an actual user.


Alexei suggested so that we don't forget it in the future if
bpf_capable() requirement is removed.
   https://lore.kernel.org/bpf/c837ae55-2487-2f39-47f6-a18781dc6...@fb.com/

I am okay with either way, fix it or remove it.



Thanks,
Daniel


Re: CLANG LTO compatibility issue with DEBUG_INFO_BTF

2021-03-18 Thread Yonghong Song




On 3/18/21 8:45 PM, Jisheng Zhang wrote:

Hi,

When trying the latest 5.12-rc3 with both LTO_CLANG_THIN and DEBUG_INFO_BTF
enabled, I met lots of warnings such as:

...
tag__recode_dwarf_type: couldn't find 0x4a7ade5 type for 0x4ab9f88 
(subroutine_type)!
ftype__recode_dwarf_types: couldn't find 0x4a7ade5 type for 0x4ab9fa4 
(formal_parameter)!
...
namespace__recode_dwarf_types: couldn't find 0x4a8ff4a type for 0x4aba05c 
(member)!
namespace__recode_dwarf_types: couldn't find 0x4a7ae9b type for 0x4aba084 
(member)!
...
WARN: multiple IDs found for 'path': 281, 729994 - using 281
WARN: multiple IDs found for 'task_struct': 421, 730101 - using 421
...


then finally get build error:
FAILED unresolved symbol vfs_truncate


Is this a known issue? Do we need to make DEBUG_INFO_BTF depend on !LTO?


This is a known issue for pahole. pahole does not handle dwarf well 
generated with LTO. Bill Wendling from google is looking at the issue 
and I will help look at the issue as well. Since bpf heavily depends

on BTF, at this point, I suggest if you are using bpf, please do not
turn on LTO. Or if you build with LTO, just turn off DEBUG_INFO_BTF
in your config. Thanks!



pahole version: v1.20
clang version: 11.0


Thanks



Re: [PATCH] bpf: selftests: remove unused 'nospace_err' in tests for batched ops in array maps

2021-03-15 Thread Yonghong Song




On 3/15/21 6:29 AM, Pedro Tammela wrote:

This seems to be a reminiscent from the hashmap tests.

Signed-off-by: Pedro Tammela 


Acked-by: Yonghong Song 


Re: [BUG] One-liner array initialization with two pointers in BPF results in NULLs

2021-03-10 Thread Yonghong Song




On 3/10/21 3:48 AM, Florent Revest wrote:

On Wed, Mar 10, 2021 at 6:16 AM Yonghong Song  wrote:

On 3/9/21 7:43 PM, Yonghong Song wrote:

On 3/9/21 5:54 PM, Florent Revest wrote:

I noticed that initializing an array of pointers using this syntax:
__u64 array[] = { (__u64), (__u64) };
(which is a fairly common operation with macros such as BPF_SEQ_PRINTF)
always results in array[0] and array[1] being NULL.

Interestingly, if the array is only initialized with one pointer, ex:
__u64 array[] = { (__u64) };
Then array[0] will not be NULL.

Or if the array is initialized field by field, ex:
__u64 array[2];
array[0] = (__u64)
array[1] = (__u64)
Then array[0] and array[1] will not be NULL either.

I'm assuming that this should have something to do with relocations
and might be a bug in clang or in libbpf but because I don't know much
about these, I thought that reporting could be a good first step. :)


Thanks for reporting. What you guess is correct, this is due to
relocations :-(

The compiler notoriously tend to put complex initial values into
rodata section. For example, for
 __u64 array[] = { (__u64), (__u64) };
the compiler will put
 { (__u64), (__u64) }
into rodata section.

But  and  themselves need relocation since they are
address of static variables which will sit inside .data section.

So in the elf file, you will see the following relocations:

RELOCATION RECORDS FOR [.rodata]:
OFFSET   TYPE VALUE
0018 R_BPF_64_64  .data
0020 R_BPF_64_64  .data


Right :) Thank you for the explanations Yonghong!


Currently, libbpf does not handle relocation inside .rodata
section, so they content remains 0.


Just for my own edification, why is .rodata relocation not yet handled
in libbpf ? Is it because of a read-only mapping that makes it more
difficult ?


We don't have this use case before. In general, people do not put
string pointers in init code in the declaration. I think 
bpf_seq_printf() is special about this and hence triggering

the issue.

To support relocation of rodata section, kernel needs to be
involved and this is actually more complicated as
the relocation is against .data section. Two issues the kernel
needs to deal with:
   - .data section will be another map in kernel, so i.e.,
 relocation of .rodata map value against another map.
   - .data section may be modified, some protection might
 be needed to prevent this. We may ignore this requirement
 since user space may have similar issue.

This is a corner case, if we can workaround in the libbpf, in
this particular case, bpf_tracing.h. I think it will be
good enough, not adding further complexity in kernel for
such a corner case.




That is why you see the issue with pointer as NULL.

With array size of 1, compiler does not bother to put it into
rodata section.

I *guess* that it works in the macro due to some kind of heuristics,
e.g., nested blocks, etc, and llvm did not promote the array init value
to rodata. I will double check whether llvm can complete prevent
such transformation.

Maybe in the future libbpf is able to handle relocations for
rodata section too. But for the time being, please just consider to use
either macro, or the explicit array assignment.


Digging into the compiler, the compiler tries to make *const* initial
value into rodata section if the initial value size > 64, so in
this case, macro does not work either. I think this is how you
discovered the issue.


Indeed, I was using a macro similar to BPF_SEQ_PRINTF and this is how
I found the bug.


The llvm does not provide target hooks to
influence this transformation.


Oh, that is unfortunate :) Thanks for looking into it! I feel that the
real fix would be in libbpf anyway and the rest is just workarounds.


The real fix will need libbpf and kernel.




So, there are two workarounds,
(1).__u64 param_working[2];
  param_working[0] = (__u64)str1;
  param_working[1] = (__u64)str2;
(2). BPF_SEQ_PRINTF(seq, "%s ", str1);
   BPF_SEQ_PRINTF(seq, "%s", str2);


(2) is a bit impractical for my actual usecase. I am implementing a
bpf_snprintf helper (patch series Coming Soon TM) and I wanted to keep
the selftest short with a few BPF_SNPRINTF() calls that exercise most
format specifiers.


In practice, if you have at least one non-const format argument,
you should be fine. But if all format arguments are constant, then
none of them should be strings.


Just for context, this does not only happen for strings but also for
all sorts of pointers, for example, when I try to do address lookup of
global __ksym variables, which is important for my selftest.


Currently, in bpf_seq_printf(), we do memory copy for string
and certain ipv4/ipv6 addresses. ipv4 is not an issue as the compiler 
less likely put it into rodata. for ipv6,

if it is a constant, we can just directly put it into the format
string. For many other sort of pointers, we ju

Re: [BUG] One-liner array initialization with two pointers in BPF results in NULLs

2021-03-09 Thread Yonghong Song




On 3/9/21 7:43 PM, Yonghong Song wrote:



On 3/9/21 5:54 PM, Florent Revest wrote:

I noticed that initializing an array of pointers using this syntax:
__u64 array[] = { (__u64), (__u64) };
(which is a fairly common operation with macros such as BPF_SEQ_PRINTF)
always results in array[0] and array[1] being NULL.

Interestingly, if the array is only initialized with one pointer, ex:
__u64 array[] = { (__u64) };
Then array[0] will not be NULL.

Or if the array is initialized field by field, ex:
__u64 array[2];
array[0] = (__u64)
array[1] = (__u64)
Then array[0] and array[1] will not be NULL either.

I'm assuming that this should have something to do with relocations
and might be a bug in clang or in libbpf but because I don't know much
about these, I thought that reporting could be a good first step. :)


Thanks for reporting. What you guess is correct, this is due to 
relocations :-(


The compiler notoriously tend to put complex initial values into
rodata section. For example, for
    __u64 array[] = { (__u64), (__u64) };
the compiler will put
    { (__u64), (__u64) }
into rodata section.

But  and  themselves need relocation since they are
address of static variables which will sit inside .data section.

So in the elf file, you will see the following relocations:

RELOCATION RECORDS FOR [.rodata]:
OFFSET   TYPE VALUE
0018 R_BPF_64_64  .data
0020 R_BPF_64_64  .data

Currently, libbpf does not handle relocation inside .rodata
section, so they content remains 0.

That is why you see the issue with pointer as NULL.

With array size of 1, compiler does not bother to put it into
rodata section.

I *guess* that it works in the macro due to some kind of heuristics,
e.g., nested blocks, etc, and llvm did not promote the array init value
to rodata. I will double check whether llvm can complete prevent
such transformation.

Maybe in the future libbpf is able to handle relocations for
rodata section too. But for the time being, please just consider to use 
either macro, or the explicit array assignment.


Digging into the compiler, the compiler tries to make *const* initial
value into rodata section if the initial value size > 64, so in
this case, macro does not work either. I think this is how you
discovered the issue. The llvm does not provide target hooks to 
influence this transformation.


So, there are two workarounds,
(1).__u64 param_working[2];
param_working[0] = (__u64)str1;
param_working[1] = (__u64)str2;
(2). BPF_SEQ_PRINTF(seq, "%s ", str1);
 BPF_SEQ_PRINTF(seq, "%s", str2);

In practice, if you have at least one non-const format argument,
you should be fine. But if all format arguments are constant, then
none of them should be strings. Maybe we could change marco
   unsigned long long ___param[] = { args };
to declare an array explicitly and then have a loop to
assign each array element?



Thanks for the reproducer!



I attached below a repro with a dummy selftest that I expect should pass
but fails to pass with the latest clang and bpf-next. Hopefully, the
logic should be simple: I try to print two strings from pointers in an
array using bpf_seq_printf but depending on how the array is initialized
the helper either receives the string pointers or NULL pointers:

test_bug:FAIL:read unexpected read: actual 'str1= str2= str1=STR1
str2=STR2 ' != expected 'str1=STR1 str2=STR2 str1=STR1 str2=STR2 '

Signed-off-by: Florent Revest 
---
  tools/testing/selftests/bpf/prog_tests/bug.c | 41 +++
  tools/testing/selftests/bpf/progs/test_bug.c | 43 
  2 files changed, 84 insertions(+)
  create mode 100644 tools/testing/selftests/bpf/prog_tests/bug.c
  create mode 100644 tools/testing/selftests/bpf/progs/test_bug.c

diff --git a/tools/testing/selftests/bpf/prog_tests/bug.c 
b/tools/testing/selftests/bpf/prog_tests/bug.c

new file mode 100644
index ..4b0fafd936b7
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/bug.c
@@ -0,0 +1,41 @@
+#include 
+#include "test_bug.skel.h"
+
+static int duration;
+
+void test_bug(void)
+{
+    struct test_bug *skel;
+    struct bpf_link *link;
+    char buf[64] = {};
+    int iter_fd, len;
+
+    skel = test_bug__open_and_load();
+    if (CHECK(!skel, "test_bug__open_and_load",
+  "skeleton open_and_load failed\n"))
+    goto destroy;
+
+    link = bpf_program__attach_iter(skel->progs.bug, NULL);
+    if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n"))
+    goto destroy;
+
+    iter_fd = bpf_iter_create(bpf_link__fd(link));
+    if (CHECK(iter_fd < 0, "create_iter", "create_iter failed\n"))
+    goto free_link;
+
+    len = read(iter_fd, buf, sizeof(buf));
+    CHECK(len < 0, "read", "read failed: %s\n", strerror(errno));
+    // BUG: We expect the strings to be printed in 

Re: [BUG] One-liner array initialization with two pointers in BPF results in NULLs

2021-03-09 Thread Yonghong Song




On 3/9/21 5:54 PM, Florent Revest wrote:

I noticed that initializing an array of pointers using this syntax:
__u64 array[] = { (__u64), (__u64) };
(which is a fairly common operation with macros such as BPF_SEQ_PRINTF)
always results in array[0] and array[1] being NULL.

Interestingly, if the array is only initialized with one pointer, ex:
__u64 array[] = { (__u64) };
Then array[0] will not be NULL.

Or if the array is initialized field by field, ex:
__u64 array[2];
array[0] = (__u64)
array[1] = (__u64)
Then array[0] and array[1] will not be NULL either.

I'm assuming that this should have something to do with relocations
and might be a bug in clang or in libbpf but because I don't know much
about these, I thought that reporting could be a good first step. :)


Thanks for reporting. What you guess is correct, this is due to 
relocations :-(


The compiler notoriously tend to put complex initial values into
rodata section. For example, for
   __u64 array[] = { (__u64), (__u64) };
the compiler will put
   { (__u64), (__u64) }
into rodata section.

But  and  themselves need relocation since they are
address of static variables which will sit inside .data section.

So in the elf file, you will see the following relocations:

RELOCATION RECORDS FOR [.rodata]:
OFFSET   TYPE VALUE
0018 R_BPF_64_64  .data
0020 R_BPF_64_64  .data

Currently, libbpf does not handle relocation inside .rodata
section, so they content remains 0.

That is why you see the issue with pointer as NULL.

With array size of 1, compiler does not bother to put it into
rodata section.

I *guess* that it works in the macro due to some kind of heuristics,
e.g., nested blocks, etc, and llvm did not promote the array init value
to rodata. I will double check whether llvm can complete prevent
such transformation.

Maybe in the future libbpf is able to handle relocations for
rodata section too. But for the time being, please just consider to use 
either macro, or the explicit array assignment.


Thanks for the reproducer!



I attached below a repro with a dummy selftest that I expect should pass
but fails to pass with the latest clang and bpf-next. Hopefully, the
logic should be simple: I try to print two strings from pointers in an
array using bpf_seq_printf but depending on how the array is initialized
the helper either receives the string pointers or NULL pointers:

test_bug:FAIL:read unexpected read: actual 'str1= str2= str1=STR1
str2=STR2 ' != expected 'str1=STR1 str2=STR2 str1=STR1 str2=STR2 '

Signed-off-by: Florent Revest 
---
  tools/testing/selftests/bpf/prog_tests/bug.c | 41 +++
  tools/testing/selftests/bpf/progs/test_bug.c | 43 
  2 files changed, 84 insertions(+)
  create mode 100644 tools/testing/selftests/bpf/prog_tests/bug.c
  create mode 100644 tools/testing/selftests/bpf/progs/test_bug.c

diff --git a/tools/testing/selftests/bpf/prog_tests/bug.c 
b/tools/testing/selftests/bpf/prog_tests/bug.c
new file mode 100644
index ..4b0fafd936b7
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/bug.c
@@ -0,0 +1,41 @@
+#include 
+#include "test_bug.skel.h"
+
+static int duration;
+
+void test_bug(void)
+{
+   struct test_bug *skel;
+   struct bpf_link *link;
+   char buf[64] = {};
+   int iter_fd, len;
+
+   skel = test_bug__open_and_load();
+   if (CHECK(!skel, "test_bug__open_and_load",
+ "skeleton open_and_load failed\n"))
+   goto destroy;
+
+   link = bpf_program__attach_iter(skel->progs.bug, NULL);
+   if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n"))
+   goto destroy;
+
+   iter_fd = bpf_iter_create(bpf_link__fd(link));
+   if (CHECK(iter_fd < 0, "create_iter", "create_iter failed\n"))
+   goto free_link;
+
+   len = read(iter_fd, buf, sizeof(buf));
+   CHECK(len < 0, "read", "read failed: %s\n", strerror(errno));
+   // BUG: We expect the strings to be printed in both cases but only the
+   // second case works.
+   // actual 'str1= str2= str1=STR1 str2=STR2 '
+   // != expected 'str1=STR1 str2=STR2 str1=STR1 str2=STR2 '
+   ASSERT_STREQ(buf, "str1=STR1 str2=STR2 str1=STR1 str2=STR2 ", "read");
+
+   close(iter_fd);
+
+free_link:
+   bpf_link__destroy(link);
+destroy:
+   test_bug__destroy(skel);
+}
+
diff --git a/tools/testing/selftests/bpf/progs/test_bug.c 
b/tools/testing/selftests/bpf/progs/test_bug.c
new file mode 100644
index ..c41e69483785
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_bug.c
@@ -0,0 +1,43 @@
+#include "bpf_iter.h"
+#include 
+#include 
+
+char _license[] SEC("license") = "GPL";
+
+SEC("iter/task")
+int bug(struct bpf_iter__task *ctx)
+{
+   struct seq_file *seq = ctx->meta->seq;
+
+   /* We want to print two strings */
+   static const char fmt[] = "str1=%s str2=%s ";
+   static char str1[] = 

Re: [PATCH] selftests/bpf: Simplify the calculation of variables

2021-03-03 Thread Yonghong Song




On 3/2/21 11:52 PM, Jiapeng Chong wrote:

Fix the following coccicheck warnings:

./tools/testing/selftests/bpf/test_sockmap.c:735:35-37: WARNING !A || A
&& B is equivalent to !A || B.

Reported-by: Abaci Robot 
Signed-off-by: Jiapeng Chong 


Acked-by: Yonghong Song 


Re: [PATCH] bpf: Simplify the calculation of variables

2021-03-03 Thread Yonghong Song




On 3/2/21 11:20 PM, Jiapeng Chong wrote:

Fix the following coccicheck warnings:

./tools/bpf/bpf_dbg.c:1201:55-57: WARNING !A || A && B is equivalent to
!A || B.

Reported-by: Abaci Robot 
Signed-off-by: Jiapeng Chong 


Acked-by: Yonghong Song 


Re: [PATCH v5 bpf-next 1/6] bpf: enable task local storage for tracing programs

2021-02-23 Thread Yonghong Song




On 2/23/21 2:28 PM, Song Liu wrote:

To access per-task data, BPF programs usually creates a hash table with
pid as the key. This is not ideal because:
  1. The user need to estimate the proper size of the hash table, which may
 be inaccurate;
  2. Big hash tables are slow;
  3. To clean up the data properly during task terminations, the user need
 to write extra logic.

Task local storage overcomes these issues and offers a better option for
these per-task data. Task local storage is only available to BPF_LSM. Now
enable it for tracing programs.

Unlike LSM programs, tracing programs can be called in IRQ contexts.
Helpers that access task local storage are updated to use
raw_spin_lock_irqsave() instead of raw_spin_lock_bh().

Tracing programs can attach to functions on the task free path, e.g.
exit_creds(). To avoid allocating task local storage after
bpf_task_storage_free(). bpf_task_storage_get() is updated to not allocate
new storage when the task is not refcounted (task->usage == 0).

Reported-by: kernel test robot 


For a patch like this, typically we do not put the above
Reported-by here as it is not really reported by the
kernel test robot. If no revision is required, maybe
maintainer can remove it before applying.


Acked-by: KP Singh 
Signed-off-by: Song Liu 
---
  include/linux/bpf.h|  7 ++
  include/linux/bpf_lsm.h| 22 -
  include/linux/bpf_types.h  |  2 +-
  include/linux/sched.h  |  5 
  kernel/bpf/Makefile|  3 +--
  kernel/bpf/bpf_local_storage.c | 28 +-
  kernel/bpf/bpf_lsm.c   |  4 
  kernel/bpf/bpf_task_storage.c  | 43 +-
  kernel/fork.c  |  5 
  kernel/trace/bpf_trace.c   |  4 
  10 files changed, 51 insertions(+), 72 deletions(-)


[...]


Re: KMSAN: uninit-value in bpf_iter_prog_supported

2021-02-09 Thread Yonghong Song




On 2/8/21 11:35 PM, Dmitry Vyukov wrote:

On Sun, Feb 7, 2021 at 1:20 PM syzbot
 wrote:


Hello,

syzbot found the following issue on:

HEAD commit:73d62e81 kmsan: random: prevent boot-time reports in _mix_..
git tree:   https://github.com/google/kmsan.git master
console output: https://syzkaller.appspot.com/x/log.txt?x=17ac5f64d0
kernel config:  https://syzkaller.appspot.com/x/.config?x=df698232b2ac45c9
dashboard link: https://syzkaller.appspot.com/bug?extid=580f4f2a272e452d55cb
compiler:   Debian clang version 11.0.1-2
userspace arch: i386

Unfortunately, I don't have any reproducer for this issue yet.

IMPORTANT: if you fix the issue, please add the following tag to the commit:
Reported-by: syzbot+580f4f2a272e452d5...@syzkaller.appspotmail.com


+BPF maintainers


=
BUG: KMSAN: uninit-value in bpf_iter_prog_supported+0x3dd/0x6a0 
syzkaller/managers/upstream-kmsan-gce-386/kernel/kernel/bpf/bpf_iter.c:329


I will take a look. Thanks.


CPU: 0 PID: 18494 Comm: bpf_preload Not tainted 5.10.0-rc4-syzkaller #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 
01/01/2011
Call Trace:
  __dump_stack 
syzkaller/managers/upstream-kmsan-gce-386/kernel/lib/dump_stack.c:77 [inline]
  dump_stack+0x21c/0x280 
syzkaller/managers/upstream-kmsan-gce-386/kernel/lib/dump_stack.c:118
  kmsan_report+0xfb/0x1e0 
syzkaller/managers/upstream-kmsan-gce-386/kernel/mm/kmsan/kmsan_report.c:118
  __msan_warning+0x5f/0xa0 
syzkaller/managers/upstream-kmsan-gce-386/kernel/mm/kmsan/kmsan_instr.c:197
  bpf_iter_prog_supported+0x3dd/0x6a0 
syzkaller/managers/upstream-kmsan-gce-386/kernel/kernel/bpf/bpf_iter.c:329
  check_attach_btf_id 
syzkaller/managers/upstream-kmsan-gce-386/kernel/kernel/bpf/verifier.c:11772 
[inline]
  bpf_check+0x11872/0x1c380 
syzkaller/managers/upstream-kmsan-gce-386/kernel/kernel/bpf/verifier.c:11900
  bpf_prog_load 
syzkaller/managers/upstream-kmsan-gce-386/kernel/kernel/bpf/syscall.c:2210 
[inline]
  __do_sys_bpf+0x17483/0x1aee0 
syzkaller/managers/upstream-kmsan-gce-386/kernel/kernel/bpf/syscall.c:4399
  __se_sys_bpf+0x8e/0xa0 
syzkaller/managers/upstream-kmsan-gce-386/kernel/kernel/bpf/syscall.c:4357
  __x64_sys_bpf+0x4a/0x70 
syzkaller/managers/upstream-kmsan-gce-386/kernel/kernel/bpf/syscall.c:4357
  do_syscall_64+0x9f/0x140 
syzkaller/managers/upstream-kmsan-gce-386/kernel/arch/x86/entry/common.c:48
  entry_SYSCALL_64_after_hwframe+0x44/0xa9
RIP: 0033:0x7fb70b5ab469
Code: 00 f3 c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 40 00 48 89 f8 48 89 f7 48 89 d6 
48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 
8b 0d ff 49 2b 00 f7 d8 64 89 01 48
RSP: 002b:7ffdbb4cde38 EFLAGS: 0246 ORIG_RAX: 0141
RAX: ffda RBX: 0065b110 RCX: 7fb70b5ab469
RDX: 0078 RSI: 7ffdbb4cdef0 RDI: 0005
RBP: 7ffdbb4cdef0 R08: 00100017 R09: 
R10: 7ffdbb4ce0e8 R11: 0246 R12: 
R13: 7ffdbb4cdf20 R14:  R15: 

Uninit was created at:
  kmsan_save_stack_with_flags 
syzkaller/managers/upstream-kmsan-gce-386/kernel/mm/kmsan/kmsan.c:121 [inline]
  kmsan_internal_poison_shadow+0x5c/0xf0 
syzkaller/managers/upstream-kmsan-gce-386/kernel/mm/kmsan/kmsan.c:104
  kmsan_slab_alloc+0x8d/0xe0 
syzkaller/managers/upstream-kmsan-gce-386/kernel/mm/kmsan/kmsan_hooks.c:76
  slab_alloc_node 
syzkaller/managers/upstream-kmsan-gce-386/kernel/mm/slub.c:2906 [inline]
  slab_alloc syzkaller/managers/upstream-kmsan-gce-386/kernel/mm/slub.c:2915 
[inline]
  kmem_cache_alloc_trace+0x893/0x1000 
syzkaller/managers/upstream-kmsan-gce-386/kernel/mm/slub.c:2932
  kmalloc 
syzkaller/managers/upstream-kmsan-gce-386/kernel/./include/linux/slab.h:552 
[inline]
  bpf_iter_reg_target+0x81/0x3f0 
syzkaller/managers/upstream-kmsan-gce-386/kernel/kernel/bpf/bpf_iter.c:276
  bpf_sk_storage_map_iter_init+0x6a/0x85 
syzkaller/managers/upstream-kmsan-gce-386/kernel/net/core/bpf_sk_storage.c:870
  do_one_initcall+0x362/0x8d0 
syzkaller/managers/upstream-kmsan-gce-386/kernel/init/main.c:1220
  do_initcall_level+0x1e7/0x35a 
syzkaller/managers/upstream-kmsan-gce-386/kernel/init/main.c:1293
  do_initcalls+0x127/0x1cb 
syzkaller/managers/upstream-kmsan-gce-386/kernel/init/main.c:1309
  do_basic_setup+0x33/0x36 
syzkaller/managers/upstream-kmsan-gce-386/kernel/init/main.c:1329
  kernel_init_freeable+0x238/0x38b 
syzkaller/managers/upstream-kmsan-gce-386/kernel/init/main.c:1529
  kernel_init+0x1f/0x840 
syzkaller/managers/upstream-kmsan-gce-386/kernel/init/main.c:1418
  ret_from_fork+0x1f/0x30 
syzkaller/managers/upstream-kmsan-gce-386/kernel/arch/x86/entry/entry_64.S:296
=


---
This report is generated by a bot. It may contain errors.
See https://goo.gl/tpsmEJ  for more information about syzbot.
syzbot engineers can be reached at 

Re: ERROR: INT DW_ATE_unsigned_1 Error emitting BTF type

2021-02-06 Thread Yonghong Song




On 2/6/21 11:44 AM, Sedat Dilek wrote:

On Sat, Feb 6, 2021 at 8:33 PM Yonghong Song  wrote:




On 2/6/21 11:28 AM, Sedat Dilek wrote:

On Sat, Feb 6, 2021 at 8:22 PM Sedat Dilek  wrote:


On Sat, Feb 6, 2021 at 8:17 PM Yonghong Song  wrote:




On 2/6/21 10:10 AM, Sedat Dilek wrote:

On Sat, Feb 6, 2021 at 6:53 PM Yonghong Song  wrote:




On 2/6/21 8:24 AM, Mark Wieelard wrote:

Hi,

On Sat, Feb 06, 2021 at 12:26:44AM -0800, Yonghong Song wrote:

With the above vmlinux, the issue appears to be handling
DW_ATE_signed_1, DW_ATE_unsigned_{1,24,40}.

The following patch should fix the issue:


That doesn't really make sense to me. Why is the compiler emitting a
DW_TAG_base_type that needs to be interpreted according to the
DW_AT_name attribute?

If the issue is that the size of the base type cannot be expressed in
bytes then the DWARF spec provides the following option:

If the value of an object of the given type does not fully occupy
the storage described by a byte size attribute, the base type
entry may also have a DW_AT_bit_size and a DW_AT_data_bit_offset
attribute, both of whose values are integer constant values (see
Section 2.19 on page 55). The bit size attribute describes the
actual size in bits used to represent values of the given
type. The data bit offset attribute is the offset in bits from the
beginning of the containing storage to the beginning of the
value. Bits that are part of the offset are padding.  If this
attribute is omitted a default data bit offset of zero is assumed.

Would it be possible to use that encoding of those special types?  If


I agree with you. I do not like comparing me as well. Unfortunately,
there is no enough information in dwarf to find out actual information.
The following is the dwarf dump with vmlinux (Sedat provided) for
DW_ATE_unsigned_1.

0x000e97e9:   DW_TAG_base_type
DW_AT_name  ("DW_ATE_unsigned_1")
DW_AT_encoding  (DW_ATE_unsigned)
DW_AT_byte_size (0x00)

There is no DW_AT_bit_size and DW_AT_bit_offset for base type.
AFAIK, these two attributes typically appear in struct/union members
together with DW_AT_byte_size.

Maybe compilers (clang in this case) can emit DW_AT_bit_size = 1
and DW_AT_bit_offset = 0/7 (depending on big/little endian) and
this case, we just test and get DW_AT_bit_size and it should work.

But I think BTF does not need this (DW_ATE_unsigned_1) for now.
I checked dwarf dump and it is mostly used for some arith operation
encoded in dump (in this case, e.g., shift by 1 bit)

0x15cf:   DW_TAG_base_type
DW_AT_name  ("DW_ATE_unsigned_1")
DW_AT_encoding  (DW_ATE_unsigned)
DW_AT_byte_size (0x00)

0x00010ed9: DW_TAG_formal_parameter
  DW_AT_location(DW_OP_lit0, DW_OP_not,
DW_OP_convert (0x15cf) "DW_ATE_unsigned_1", DW_OP_convert
(0x15d4) "DW_ATE_unsigned_8", DW_OP_stack_value)
  DW_AT_abstract_origin (0x00013984 "branch")

Look at clang frontend, only the following types are encoded with
unsigned dwarf type.

  case BuiltinType::UShort:
  case BuiltinType::UInt:
  case BuiltinType::UInt128:
  case BuiltinType::ULong:
  case BuiltinType::WChar_U:
  case BuiltinType::ULongLong:
Encoding = llvm::dwarf::DW_ATE_unsigned;
break;



not, can we try to come up with some extension that doesn't require
consumers to match magic names?



You want me to upload mlx5_core.ko?


I just sent out a patch. You are cc'ed. I also attached in this email.
Yes, it would be great if you can upload mlx5_core.ko so I can
double check with this DW_ATE_unsigned_160 which is really usual.



Yupp, just built a new pahole :-).
Re-building linux-kernel...

Will upload mlx5_core.ko - need zstd-ed it before.



Hmm, I guess you want a mlx5_core.ko with your patch applied-to-pahole-1.20 :-)?


this should work too. I want to check dwarf data. My patch won't impact
dwarf generation.



Usual Dropbox-Link:

https://www.dropbox.com/sh/kvyh8ps7na0r1h5/AABfyNfDZ2bESse_bo4h05fFa?dl=0

See "for-yhs" directory:

1. mlx5-module_yhs-v1 ("[PATCH dwarves] btf_encoder: sanitize
non-regular int base type")
2. mlx5-module_yhs-dileks-v4 (with the last diff-v4 I tried successfully)


Thanks, with llvm-dwarfdump, I can see

0x00d65616:   DW_TAG_base_type
DW_AT_name  ("DW_ATE_unsigned_160")
DW_AT_encoding  (DW_ATE_unsigned)
DW_AT_byte_size (0x14)

0x00d88e81: DW_TAG_variable
  DW_AT_location(indexed (0xad) loclist = 
0x0005df42:
 [0x00088c8e, 0x00088c97): 
DW_OP_breg9 R9+0, DW_OP_convert (0x00d65616) "DW_ATE_unsigned_160&qu

Re: ERROR: INT DW_ATE_unsigned_1 Error emitting BTF type

2021-02-06 Thread Yonghong Song




On 2/6/21 11:28 AM, Sedat Dilek wrote:

On Sat, Feb 6, 2021 at 8:22 PM Sedat Dilek  wrote:


On Sat, Feb 6, 2021 at 8:17 PM Yonghong Song  wrote:




On 2/6/21 10:10 AM, Sedat Dilek wrote:

On Sat, Feb 6, 2021 at 6:53 PM Yonghong Song  wrote:




On 2/6/21 8:24 AM, Mark Wieelard wrote:

Hi,

On Sat, Feb 06, 2021 at 12:26:44AM -0800, Yonghong Song wrote:

With the above vmlinux, the issue appears to be handling
DW_ATE_signed_1, DW_ATE_unsigned_{1,24,40}.

The following patch should fix the issue:


That doesn't really make sense to me. Why is the compiler emitting a
DW_TAG_base_type that needs to be interpreted according to the
DW_AT_name attribute?

If the issue is that the size of the base type cannot be expressed in
bytes then the DWARF spec provides the following option:

   If the value of an object of the given type does not fully occupy
   the storage described by a byte size attribute, the base type
   entry may also have a DW_AT_bit_size and a DW_AT_data_bit_offset
   attribute, both of whose values are integer constant values (see
   Section 2.19 on page 55). The bit size attribute describes the
   actual size in bits used to represent values of the given
   type. The data bit offset attribute is the offset in bits from the
   beginning of the containing storage to the beginning of the
   value. Bits that are part of the offset are padding.  If this
   attribute is omitted a default data bit offset of zero is assumed.

Would it be possible to use that encoding of those special types?  If


I agree with you. I do not like comparing me as well. Unfortunately,
there is no enough information in dwarf to find out actual information.
The following is the dwarf dump with vmlinux (Sedat provided) for
DW_ATE_unsigned_1.

0x000e97e9:   DW_TAG_base_type
   DW_AT_name  ("DW_ATE_unsigned_1")
   DW_AT_encoding  (DW_ATE_unsigned)
   DW_AT_byte_size (0x00)

There is no DW_AT_bit_size and DW_AT_bit_offset for base type.
AFAIK, these two attributes typically appear in struct/union members
together with DW_AT_byte_size.

Maybe compilers (clang in this case) can emit DW_AT_bit_size = 1
and DW_AT_bit_offset = 0/7 (depending on big/little endian) and
this case, we just test and get DW_AT_bit_size and it should work.

But I think BTF does not need this (DW_ATE_unsigned_1) for now.
I checked dwarf dump and it is mostly used for some arith operation
encoded in dump (in this case, e.g., shift by 1 bit)

0x15cf:   DW_TAG_base_type
   DW_AT_name  ("DW_ATE_unsigned_1")
   DW_AT_encoding  (DW_ATE_unsigned)
   DW_AT_byte_size (0x00)

0x00010ed9: DW_TAG_formal_parameter
 DW_AT_location(DW_OP_lit0, DW_OP_not,
DW_OP_convert (0x15cf) "DW_ATE_unsigned_1", DW_OP_convert
(0x15d4) "DW_ATE_unsigned_8", DW_OP_stack_value)
 DW_AT_abstract_origin (0x00013984 "branch")

Look at clang frontend, only the following types are encoded with
unsigned dwarf type.

 case BuiltinType::UShort:
 case BuiltinType::UInt:
 case BuiltinType::UInt128:
 case BuiltinType::ULong:
 case BuiltinType::WChar_U:
 case BuiltinType::ULongLong:
   Encoding = llvm::dwarf::DW_ATE_unsigned;
   break;



not, can we try to come up with some extension that doesn't require
consumers to match magic names?



You want me to upload mlx5_core.ko?


I just sent out a patch. You are cc'ed. I also attached in this email.
Yes, it would be great if you can upload mlx5_core.ko so I can
double check with this DW_ATE_unsigned_160 which is really usual.



Yupp, just built a new pahole :-).
Re-building linux-kernel...

Will upload mlx5_core.ko - need zstd-ed it before.



Hmm, I guess you want a mlx5_core.ko with your patch applied-to-pahole-1.20 :-)?


this should work too. I want to check dwarf data. My patch won't impact 
dwarf generation.





- Sedat -



When looking with llvm-dwarf for DW_ATE_unsigned_160:

0x00d65616:   DW_TAG_base_type
 DW_AT_name  ("DW_ATE_unsigned_160")
 DW_AT_encoding  (DW_ATE_unsigned)
 DW_AT_byte_size (0x14)

If you need further information, please let me know.

Thanks.

- Sedat -



Re: ERROR: INT DW_ATE_unsigned_1 Error emitting BTF type

2021-02-06 Thread Yonghong Song



On 2/6/21 10:10 AM, Sedat Dilek wrote:

On Sat, Feb 6, 2021 at 6:53 PM Yonghong Song  wrote:




On 2/6/21 8:24 AM, Mark Wieelard wrote:

Hi,

On Sat, Feb 06, 2021 at 12:26:44AM -0800, Yonghong Song wrote:

With the above vmlinux, the issue appears to be handling
DW_ATE_signed_1, DW_ATE_unsigned_{1,24,40}.

The following patch should fix the issue:


That doesn't really make sense to me. Why is the compiler emitting a
DW_TAG_base_type that needs to be interpreted according to the
DW_AT_name attribute?

If the issue is that the size of the base type cannot be expressed in
bytes then the DWARF spec provides the following option:

  If the value of an object of the given type does not fully occupy
  the storage described by a byte size attribute, the base type
  entry may also have a DW_AT_bit_size and a DW_AT_data_bit_offset
  attribute, both of whose values are integer constant values (see
  Section 2.19 on page 55). The bit size attribute describes the
  actual size in bits used to represent values of the given
  type. The data bit offset attribute is the offset in bits from the
  beginning of the containing storage to the beginning of the
  value. Bits that are part of the offset are padding.  If this
  attribute is omitted a default data bit offset of zero is assumed.

Would it be possible to use that encoding of those special types?  If


I agree with you. I do not like comparing me as well. Unfortunately,
there is no enough information in dwarf to find out actual information.
The following is the dwarf dump with vmlinux (Sedat provided) for
DW_ATE_unsigned_1.

0x000e97e9:   DW_TAG_base_type
  DW_AT_name  ("DW_ATE_unsigned_1")
  DW_AT_encoding  (DW_ATE_unsigned)
  DW_AT_byte_size (0x00)

There is no DW_AT_bit_size and DW_AT_bit_offset for base type.
AFAIK, these two attributes typically appear in struct/union members
together with DW_AT_byte_size.

Maybe compilers (clang in this case) can emit DW_AT_bit_size = 1
and DW_AT_bit_offset = 0/7 (depending on big/little endian) and
this case, we just test and get DW_AT_bit_size and it should work.

But I think BTF does not need this (DW_ATE_unsigned_1) for now.
I checked dwarf dump and it is mostly used for some arith operation
encoded in dump (in this case, e.g., shift by 1 bit)

0x15cf:   DW_TAG_base_type
  DW_AT_name  ("DW_ATE_unsigned_1")
  DW_AT_encoding  (DW_ATE_unsigned)
  DW_AT_byte_size (0x00)

0x00010ed9: DW_TAG_formal_parameter
DW_AT_location(DW_OP_lit0, DW_OP_not,
DW_OP_convert (0x15cf) "DW_ATE_unsigned_1", DW_OP_convert
(0x15d4) "DW_ATE_unsigned_8", DW_OP_stack_value)
DW_AT_abstract_origin (0x00013984 "branch")

Look at clang frontend, only the following types are encoded with
unsigned dwarf type.

case BuiltinType::UShort:
case BuiltinType::UInt:
case BuiltinType::UInt128:
case BuiltinType::ULong:
case BuiltinType::WChar_U:
case BuiltinType::ULongLong:
  Encoding = llvm::dwarf::DW_ATE_unsigned;
  break;



not, can we try to come up with some extension that doesn't require
consumers to match magic names?



You want me to upload mlx5_core.ko?


I just sent out a patch. You are cc'ed. I also attached in this email.
Yes, it would be great if you can upload mlx5_core.ko so I can
double check with this DW_ATE_unsigned_160 which is really usual.



When looking with llvm-dwarf for DW_ATE_unsigned_160:

0x00d65616:   DW_TAG_base_type
DW_AT_name  ("DW_ATE_unsigned_160")
DW_AT_encoding  (DW_ATE_unsigned)
DW_AT_byte_size (0x14)

If you need further information, please let me know.

Thanks.

- Sedat -

From 239c797090abbdc5253d0ff1e9e657c5006fbbee Mon Sep 17 00:00:00 2001
From: Yonghong Song 
Date: Sat, 6 Feb 2021 10:21:45 -0800
Subject: [PATCH dwarves] btf_encoder: sanitize non-regular int base type

clang with dwarf5 may generate non-regular int base type,
i.e., not a signed/unsigned char/short/int/longlong/__int128.
Such base types are often used to describe
how an actual parameter or variable is generated. For example,

0x15cf:   DW_TAG_base_type
DW_AT_name  ("DW_ATE_unsigned_1")
DW_AT_encoding  (DW_ATE_unsigned)
DW_AT_byte_size (0x00)

0x00010ed9: DW_TAG_formal_parameter
  DW_AT_location(DW_OP_lit0,
 DW_OP_not,
 DW_OP_convert (0x15cf) 
"DW_ATE_unsigned_1",
 DW_OP_convert (0x15d4) 
"DW_ATE_unsigned_8",
 DW_OP_stack_value)
  DW_AT_abstract_origin (0x00013984 "branch")


Re: ERROR: INT DW_ATE_unsigned_1 Error emitting BTF type

2021-02-06 Thread Yonghong Song




On 2/6/21 8:24 AM, Mark Wieelard wrote:

Hi,

On Sat, Feb 06, 2021 at 12:26:44AM -0800, Yonghong Song wrote:

With the above vmlinux, the issue appears to be handling
DW_ATE_signed_1, DW_ATE_unsigned_{1,24,40}.

The following patch should fix the issue:


That doesn't really make sense to me. Why is the compiler emitting a
DW_TAG_base_type that needs to be interpreted according to the
DW_AT_name attribute?

If the issue is that the size of the base type cannot be expressed in
bytes then the DWARF spec provides the following option:

 If the value of an object of the given type does not fully occupy
 the storage described by a byte size attribute, the base type
 entry may also have a DW_AT_bit_size and a DW_AT_data_bit_offset
 attribute, both of whose values are integer constant values (see
 Section 2.19 on page 55). The bit size attribute describes the
 actual size in bits used to represent values of the given
 type. The data bit offset attribute is the offset in bits from the
 beginning of the containing storage to the beginning of the
 value. Bits that are part of the offset are padding.  If this
 attribute is omitted a default data bit offset of zero is assumed.

Would it be possible to use that encoding of those special types?  If


I agree with you. I do not like comparing me as well. Unfortunately, 
there is no enough information in dwarf to find out actual information.

The following is the dwarf dump with vmlinux (Sedat provided) for
DW_ATE_unsigned_1.

0x000e97e9:   DW_TAG_base_type
DW_AT_name  ("DW_ATE_unsigned_1")
DW_AT_encoding  (DW_ATE_unsigned)
DW_AT_byte_size (0x00)

There is no DW_AT_bit_size and DW_AT_bit_offset for base type.
AFAIK, these two attributes typically appear in struct/union members
together with DW_AT_byte_size.

Maybe compilers (clang in this case) can emit DW_AT_bit_size = 1
and DW_AT_bit_offset = 0/7 (depending on big/little endian) and
this case, we just test and get DW_AT_bit_size and it should work.

But I think BTF does not need this (DW_ATE_unsigned_1) for now.
I checked dwarf dump and it is mostly used for some arith operation
encoded in dump (in this case, e.g., shift by 1 bit)

0x15cf:   DW_TAG_base_type
DW_AT_name  ("DW_ATE_unsigned_1")
DW_AT_encoding  (DW_ATE_unsigned)
DW_AT_byte_size (0x00)

0x00010ed9: DW_TAG_formal_parameter
  DW_AT_location(DW_OP_lit0, DW_OP_not, 
DW_OP_convert (0x15cf) "DW_ATE_unsigned_1", DW_OP_convert 
(0x15d4) "DW_ATE_unsigned_8", DW_OP_stack_value)

  DW_AT_abstract_origin (0x00013984 "branch")

Look at clang frontend, only the following types are encoded with 
unsigned dwarf type.


  case BuiltinType::UShort:
  case BuiltinType::UInt:
  case BuiltinType::UInt128:
  case BuiltinType::ULong:
  case BuiltinType::WChar_U:
  case BuiltinType::ULongLong:
Encoding = llvm::dwarf::DW_ATE_unsigned;
break;



not, can we try to come up with some extension that doesn't require
consumers to match magic names?

Thanks,

Mark



Re: ERROR: INT DW_ATE_unsigned_1 Error emitting BTF type

2021-02-06 Thread Yonghong Song




On 2/5/21 10:52 PM, Sedat Dilek wrote:

On Sat, Feb 6, 2021 at 7:26 AM Sedat Dilek  wrote:


On Sat, Feb 6, 2021 at 6:53 AM Sedat Dilek  wrote:


On Sat, Feb 6, 2021 at 6:44 AM Sedat Dilek  wrote:


On Sat, Feb 6, 2021 at 4:34 AM Sedat Dilek  wrote:


On Fri, Feb 5, 2021 at 10:54 PM Yonghong Song  wrote:




On 2/5/21 12:31 PM, Sedat Dilek wrote:

On Fri, Feb 5, 2021 at 9:03 PM Yonghong Song  wrote:




On 2/5/21 11:24 AM, Arnaldo Carvalho de Melo wrote:

Em Fri, Feb 05, 2021 at 11:10:08AM -0800, Yonghong Song escreveu:

On 2/5/21 11:06 AM, Sedat Dilek wrote:

On Fri, Feb 5, 2021 at 7:53 PM Sedat Dilek  wrote:
Grepping through linux.git/tools I guess some BTF tools/libs need to
know what BTF_INT_UNSIGNED is?



BTF_INT_UNSIGNED needs kernel support. Maybe to teach pahole to
ignore this for now until kernel infrastructure is ready.


Yeah, I thought about doing that.


Not sure whether this information will be useful or not
for BTF. This needs to be discussed separately.


Maybe search for the rationale for its introduction in DWARF.


In LLVM, we have:
 uint8_t BTFEncoding;
 switch (Encoding) {
 case dwarf::DW_ATE_boolean:
   BTFEncoding = BTF::INT_BOOL;
   break;
 case dwarf::DW_ATE_signed:
 case dwarf::DW_ATE_signed_char:
   BTFEncoding = BTF::INT_SIGNED;
   break;
 case dwarf::DW_ATE_unsigned:
 case dwarf::DW_ATE_unsigned_char:
   BTFEncoding = 0;
   break;

I think DW_ATE_unsigned can be ignored in pahole since
the default encoding = 0. A simple comment is enough.



Yonghong Son, do you have a patch/diff for me?


Looking at error message from log:

   LLVM_OBJCOPY=/opt/binutils/bin/objcopy /opt/pahole/bin/pahole -J
.tmp_vmlinux.btf
[115] INT DW_ATE_unsigned_1 Error emitting BTF type
Encountered error while encoding BTF.

Not exactly what is the root cause. Maybe bt->bit_size is not
encoded correctly. Could you put vmlinux (in the above it is
.tmp_vmlinux.btf) somewhere, I or somebody else can investigate
and provide a proper fix.



[ TO: Masahiro ]

Thanks for taking care Yonghong - hope this is your first name, if not
I am sorry.
In case of mixing my first and last name you will make me female -
Dilek is a Turkish female first name :-).
So, in some cultures you need to be careful.

Anyway... back to business and facts.

Out of frustration I killed my last build via `make distclean`.
The whole day I tested diverse combination of GCC-10 and LLVM-12
together with BTF Kconfigs, selfmade pahole, etc.

I will do ne run with some little changes:

#1: Pass LLVM_IAS=1 to make (means use Clang's Integrated ASsembler -
as per Nick this leads to the same error - should be unrelated)
#2: I did: DEBUG_INFO_COMPRESSED y -> n

#2 I did in case you need vmlinux and I have to upload - I will
compress the resulting vmlinux with ZSTD.
You need vmlinux or .tmp_vmlinux.btf file?
Nick was not allowed from his company to download from a Dropbox link.
So, as an alternative I can offer GoogleDrive...
...or bomb into your INBOX :-).

Now, why I CCed Masahiro:

In case of ERRORs when running `scripts/link-vmlinux.sh` above files
will be removed.

Last, I found a hack to bypass this - means to keep these files (I
need to check old emails).

Masahiro, you see a possibility to have a way to keep these files in
case of ERRORs without doing hackery?

 From a previous post in this thread:

+ info BTF .btf.vmlinux.bin.o
+ [  != silent_ ]
+ printf   %-7s %s\n BTF .btf.vmlinux.bin.o
  BTF .btf.vmlinux.bin.o
+ LLVM_OBJCOPY=llvm-objcopy /opt/pahole/bin/pahole -J .tmp_vmlinux.btf
[2] INT long unsigned int Error emitting BTF type
Encountered error while encoding BTF.
+ llvm-objcopy --only-section=.BTF --set-section-flags
.BTF=alloc,readonly --strip-all .tmp_vmlinux.btf .btf.vmlinux.bin.o
...
+ info BTFIDS vmlinux
+ [  != silent_ ]
+ printf   %-7s %s\n BTFIDS vmlinux
  BTFIDS  vmlinux
+ ./tools/bpf/resolve_btfids/resolve_btfids vmlinux
FAILED: load BTF from vmlinux: Invalid argument
+ on_exit
+ [ 255 -ne 0 ]
+ cleanup
+ rm -f .btf.vmlinux.bin.o
+ rm -f .tmp_System.map
+ rm -f .tmp_vmlinux.btf .tmp_vmlinux.kallsyms1
.tmp_vmlinux.kallsyms1.S .tmp_vmlinux.kallsyms1.o
.tmp_vmlinux.kallsyms2 .tmp_vmlinux.kallsyms2.S .tmp_vmlinux.kallsyms
2.o
+ rm -f System.map
+ rm -f vmlinux
+ rm -f vmlinux.o
make[3]: *** [Makefile:1166: vmlinux] Error 255

^^^ Look here.



With this diff:

$ git diff scripts/link-vmlinux.sh
diff --git a/scripts/link-vmlinux.sh b/scripts/link-vmlinux.sh
index eef40fa9485d..40f1b6aae553 100755
--- a/scripts/link-vmlinux.sh
+++ b/scripts/link-vmlinux.sh
@@ -330,7 +330,7 @@ vmlinux_link vmlinux "${kallsymso}" ${btf_vmlinux_bin_o}
# fill in BTF IDs
if [ -n "${CONFIG_DEBUG_INFO_BTF}" -a -n "${CONFIG_BPF}" ]; then
info BTFIDS vmlinux
-   ${RESOLVE_BTFIDS} vmlinux
+   ##${RESOLVE_BTFIDS} vmlinux
fi

if [ -n "${CONFIG_BUILDTIME_TABLE_SORT}" ]; then

This files are kept - not removed:

$ LC_ALL=C ll .*btf* vmlinux

Re: ERROR: INT DW_ATE_unsigned_1 Error emitting BTF type

2021-02-05 Thread Yonghong Song




On 2/5/21 12:31 PM, Sedat Dilek wrote:

On Fri, Feb 5, 2021 at 9:03 PM Yonghong Song  wrote:




On 2/5/21 11:24 AM, Arnaldo Carvalho de Melo wrote:

Em Fri, Feb 05, 2021 at 11:10:08AM -0800, Yonghong Song escreveu:

On 2/5/21 11:06 AM, Sedat Dilek wrote:

On Fri, Feb 5, 2021 at 7:53 PM Sedat Dilek  wrote:
Grepping through linux.git/tools I guess some BTF tools/libs need to
know what BTF_INT_UNSIGNED is?



BTF_INT_UNSIGNED needs kernel support. Maybe to teach pahole to
ignore this for now until kernel infrastructure is ready.


Yeah, I thought about doing that.


Not sure whether this information will be useful or not
for BTF. This needs to be discussed separately.


Maybe search for the rationale for its introduction in DWARF.


In LLVM, we have:
uint8_t BTFEncoding;
switch (Encoding) {
case dwarf::DW_ATE_boolean:
  BTFEncoding = BTF::INT_BOOL;
  break;
case dwarf::DW_ATE_signed:
case dwarf::DW_ATE_signed_char:
  BTFEncoding = BTF::INT_SIGNED;
  break;
case dwarf::DW_ATE_unsigned:
case dwarf::DW_ATE_unsigned_char:
  BTFEncoding = 0;
  break;

I think DW_ATE_unsigned can be ignored in pahole since
the default encoding = 0. A simple comment is enough.



Yonghong Son, do you have a patch/diff for me?


Looking at error message from log:

 LLVM_OBJCOPY=/opt/binutils/bin/objcopy /opt/pahole/bin/pahole -J
.tmp_vmlinux.btf
[115] INT DW_ATE_unsigned_1 Error emitting BTF type
Encountered error while encoding BTF.

Not exactly what is the root cause. Maybe bt->bit_size is not
encoded correctly. Could you put vmlinux (in the above it is
.tmp_vmlinux.btf) somewhere, I or somebody else can investigate
and provide a proper fix.


Thanks.

- Sedat -



Re: ERROR: INT DW_ATE_unsigned_1 Error emitting BTF type

2021-02-05 Thread Yonghong Song




On 2/5/21 11:24 AM, Arnaldo Carvalho de Melo wrote:

Em Fri, Feb 05, 2021 at 11:10:08AM -0800, Yonghong Song escreveu:

On 2/5/21 11:06 AM, Sedat Dilek wrote:

On Fri, Feb 5, 2021 at 7:53 PM Sedat Dilek  wrote:
Grepping through linux.git/tools I guess some BTF tools/libs need to
know what BTF_INT_UNSIGNED is?
  

BTF_INT_UNSIGNED needs kernel support. Maybe to teach pahole to
ignore this for now until kernel infrastructure is ready.


Yeah, I thought about doing that.


Not sure whether this information will be useful or not
for BTF. This needs to be discussed separately.


Maybe search for the rationale for its introduction in DWARF.


In LLVM, we have:
  uint8_t BTFEncoding;
  switch (Encoding) {
  case dwarf::DW_ATE_boolean:
BTFEncoding = BTF::INT_BOOL;
break;
  case dwarf::DW_ATE_signed:
  case dwarf::DW_ATE_signed_char:
BTFEncoding = BTF::INT_SIGNED;
break;
  case dwarf::DW_ATE_unsigned:
  case dwarf::DW_ATE_unsigned_char:
BTFEncoding = 0;
break;

I think DW_ATE_unsigned can be ignored in pahole since
the default encoding = 0. A simple comment is enough.



- ARnaldo



Re: ERROR: INT DW_ATE_unsigned_1 Error emitting BTF type

2021-02-05 Thread Yonghong Song




On 2/5/21 11:15 AM, Sedat Dilek wrote:

On Fri, Feb 5, 2021 at 8:10 PM Yonghong Song  wrote:




On 2/5/21 11:06 AM, Sedat Dilek wrote:

On Fri, Feb 5, 2021 at 7:53 PM Sedat Dilek  wrote:


On Fri, Feb 5, 2021 at 6:48 PM Sedat Dilek  wrote:


On Fri, Feb 5, 2021 at 4:28 PM Arnaldo Carvalho de Melo
 wrote:


Em Fri, Feb 05, 2021 at 04:23:59PM +0100, Sedat Dilek escreveu:

On Fri, Feb 5, 2021 at 3:41 PM Sedat Dilek  wrote:


On Fri, Feb 5, 2021 at 3:37 PM Sedat Dilek  wrote:


Hi,

when building with pahole v1.20 and binutils v2.35.2 plus Clang
v12.0.0-rc1 and DWARF-v5 I see:
...
+ info BTF .btf.vmlinux.bin.o
+ [  != silent_ ]
+ printf   %-7s %s\n BTF .btf.vmlinux.bin.o
   BTF .btf.vmlinux.bin.o
+ LLVM_OBJCOPY=/opt/binutils/bin/objcopy /opt/pahole/bin/pahole -J
.tmp_vmlinux.btf
[115] INT DW_ATE_unsigned_1 Error emitting BTF type
Encountered error while encoding BTF.


Grepping the pahole sources:

$ git grep DW_ATE
dwarf_loader.c: bt->is_bool = encoding == DW_ATE_boolean;
dwarf_loader.c: bt->is_signed = encoding == DW_ATE_signed;

Missing DW_ATE_unsigned encoding?



Checked the LLVM sources:

clang/lib/CodeGen/CGDebugInfo.cpp:Encoding =
llvm::dwarf::DW_ATE_unsigned_char;
clang/lib/CodeGen/CGDebugInfo.cpp:Encoding = llvm::dwarf::DW_ATE_unsigned;
clang/lib/CodeGen/CGDebugInfo.cpp:Encoding =
llvm::dwarf::DW_ATE_unsigned_fixed;
clang/lib/CodeGen/CGDebugInfo.cpp:
? llvm::dwarf::DW_ATE_unsigned
...
lld/test/wasm/debuginfo.test:CHECK-NEXT:DW_AT_encoding
   (DW_ATE_unsigned)

So, I will switch from GNU ld.bfd v2.35.2 to LLD-12.


Thanks for the research, probably your conclusion is correct, can you go
the next step and add that part and check if the end result is the
expected one?



Still building...

Can you give me a hand on what has to be changed in dwarves/pahole?

I guess switching from ld.bfd to ld.lld will show the same ERROR.



This builds successfully - untested:

$ git diff
diff --git a/btf_loader.c b/btf_loader.c
index ec286f413f36..a39edd3362db 100644
--- a/btf_loader.c
+++ b/btf_loader.c
@@ -107,6 +107,7 @@ static struct base_type *base_type__new(strings_t
name, uint32_t attrs,
 bt->bit_size = size;
 bt->is_signed = attrs & BTF_INT_SIGNED;
 bt->is_bool = attrs & BTF_INT_BOOL;
+   bt->is_unsigned = attrs & BTF_INT_UNSIGNED;
 bt->name_has_encoding = false;
 bt->float_type = float_type;
 }
diff --git a/ctf.h b/ctf.h
index 25b79892bde3..9e47c3c74677 100644
--- a/ctf.h
+++ b/ctf.h
@@ -100,6 +100,7 @@ struct ctf_full_type {
#define CTF_TYPE_INT_CHAR  0x2
#define CTF_TYPE_INT_BOOL  0x4
#define CTF_TYPE_INT_VARARGS   0x8
+#define CTF_TYPE_INT_UNSIGNED  0x16

#define CTF_TYPE_FP_ATTRS(VAL) ((VAL) >> 24)
#define CTF_TYPE_FP_OFFSET(VAL)(((VAL) >> 16) & 0xff)
diff --git a/dwarf_loader.c b/dwarf_loader.c
index b73d7867e1e6..79d40f183c24 100644
--- a/dwarf_loader.c
+++ b/dwarf_loader.c
@@ -473,6 +473,7 @@ static struct base_type *base_type__new(Dwarf_Die
*die, struct cu *cu)
 bt->is_bool = encoding == DW_ATE_boolean;
 bt->is_signed = encoding == DW_ATE_signed;
 bt->is_varargs = false;
+   bt->is_unsigned = encoding == DW_ATE_unsigned;
 bt->name_has_encoding = true;
 }

diff --git a/dwarves.h b/dwarves.h
index 98caf1abc54d..edf32d2e6f80 100644
--- a/dwarves.h
+++ b/dwarves.h
@@ -1261,6 +1261,7 @@ struct base_type {
 uint8_t is_signed:1;
 uint8_t is_bool:1;
 uint8_t is_varargs:1;
+   uint8_t is_unsigned:1;
 uint8_t float_type:4;
};

diff --git a/lib/bpf b/lib/bpf
--- a/lib/bpf
+++ b/lib/bpf
@@ -1 +1 @@
-Subproject commit 5af3d86b5a2c5fecdc3ab83822d083edd32b4396
+Subproject commit 5af3d86b5a2c5fecdc3ab83822d083edd32b4396-dirty
diff --git a/libbtf.c b/libbtf.c
index 9f7628304495..a0661a7bbed9 100644
--- a/libbtf.c
+++ b/libbtf.c
@@ -247,6 +247,8 @@ static const char *
btf_elf__int_encoding_str(uint8_t encoding)
 return "CHAR";
 else if (encoding == BTF_INT_BOOL)
 return "BOOL";
+   else if (encoding == BTF_INT_UNSIGNED)
+   return "UNSIGNED";
 else
 return "UNKN";
}
@@ -379,6 +381,8 @@ int32_t btf_elf__add_base_type(struct btf_elf
*btfe, const struct base_type *bt,
 encoding = BTF_INT_SIGNED;
 } else if (bt->is_bool) {
 encoding = BTF_INT_BOOL;
+   } else if (bt->is_unsigned) {
+   encoding = BTF_INT_UNSIGNED;
 } else if (bt->float_type) {
 fprintf(stderr, "float_type is not supported\n");
 return -1;

Additionally - I cannot see it with `git diff`:

[ lib/bpf/include/uapi/linux/

Re: ERROR: INT DW_ATE_unsigned_1 Error emitting BTF type

2021-02-05 Thread Yonghong Song




On 2/5/21 11:06 AM, Sedat Dilek wrote:

On Fri, Feb 5, 2021 at 7:53 PM Sedat Dilek  wrote:


On Fri, Feb 5, 2021 at 6:48 PM Sedat Dilek  wrote:


On Fri, Feb 5, 2021 at 4:28 PM Arnaldo Carvalho de Melo
 wrote:


Em Fri, Feb 05, 2021 at 04:23:59PM +0100, Sedat Dilek escreveu:

On Fri, Feb 5, 2021 at 3:41 PM Sedat Dilek  wrote:


On Fri, Feb 5, 2021 at 3:37 PM Sedat Dilek  wrote:


Hi,

when building with pahole v1.20 and binutils v2.35.2 plus Clang
v12.0.0-rc1 and DWARF-v5 I see:
...
+ info BTF .btf.vmlinux.bin.o
+ [  != silent_ ]
+ printf   %-7s %s\n BTF .btf.vmlinux.bin.o
  BTF .btf.vmlinux.bin.o
+ LLVM_OBJCOPY=/opt/binutils/bin/objcopy /opt/pahole/bin/pahole -J
.tmp_vmlinux.btf
[115] INT DW_ATE_unsigned_1 Error emitting BTF type
Encountered error while encoding BTF.


Grepping the pahole sources:

$ git grep DW_ATE
dwarf_loader.c: bt->is_bool = encoding == DW_ATE_boolean;
dwarf_loader.c: bt->is_signed = encoding == DW_ATE_signed;

Missing DW_ATE_unsigned encoding?



Checked the LLVM sources:

clang/lib/CodeGen/CGDebugInfo.cpp:Encoding =
llvm::dwarf::DW_ATE_unsigned_char;
clang/lib/CodeGen/CGDebugInfo.cpp:Encoding = llvm::dwarf::DW_ATE_unsigned;
clang/lib/CodeGen/CGDebugInfo.cpp:Encoding =
llvm::dwarf::DW_ATE_unsigned_fixed;
clang/lib/CodeGen/CGDebugInfo.cpp:
   ? llvm::dwarf::DW_ATE_unsigned
...
lld/test/wasm/debuginfo.test:CHECK-NEXT:DW_AT_encoding
  (DW_ATE_unsigned)

So, I will switch from GNU ld.bfd v2.35.2 to LLD-12.


Thanks for the research, probably your conclusion is correct, can you go
the next step and add that part and check if the end result is the
expected one?



Still building...

Can you give me a hand on what has to be changed in dwarves/pahole?

I guess switching from ld.bfd to ld.lld will show the same ERROR.



This builds successfully - untested:

$ git diff
diff --git a/btf_loader.c b/btf_loader.c
index ec286f413f36..a39edd3362db 100644
--- a/btf_loader.c
+++ b/btf_loader.c
@@ -107,6 +107,7 @@ static struct base_type *base_type__new(strings_t
name, uint32_t attrs,
bt->bit_size = size;
bt->is_signed = attrs & BTF_INT_SIGNED;
bt->is_bool = attrs & BTF_INT_BOOL;
+   bt->is_unsigned = attrs & BTF_INT_UNSIGNED;
bt->name_has_encoding = false;
bt->float_type = float_type;
}
diff --git a/ctf.h b/ctf.h
index 25b79892bde3..9e47c3c74677 100644
--- a/ctf.h
+++ b/ctf.h
@@ -100,6 +100,7 @@ struct ctf_full_type {
#define CTF_TYPE_INT_CHAR  0x2
#define CTF_TYPE_INT_BOOL  0x4
#define CTF_TYPE_INT_VARARGS   0x8
+#define CTF_TYPE_INT_UNSIGNED  0x16

#define CTF_TYPE_FP_ATTRS(VAL) ((VAL) >> 24)
#define CTF_TYPE_FP_OFFSET(VAL)(((VAL) >> 16) & 0xff)
diff --git a/dwarf_loader.c b/dwarf_loader.c
index b73d7867e1e6..79d40f183c24 100644
--- a/dwarf_loader.c
+++ b/dwarf_loader.c
@@ -473,6 +473,7 @@ static struct base_type *base_type__new(Dwarf_Die
*die, struct cu *cu)
bt->is_bool = encoding == DW_ATE_boolean;
bt->is_signed = encoding == DW_ATE_signed;
bt->is_varargs = false;
+   bt->is_unsigned = encoding == DW_ATE_unsigned;
bt->name_has_encoding = true;
}

diff --git a/dwarves.h b/dwarves.h
index 98caf1abc54d..edf32d2e6f80 100644
--- a/dwarves.h
+++ b/dwarves.h
@@ -1261,6 +1261,7 @@ struct base_type {
uint8_t is_signed:1;
uint8_t is_bool:1;
uint8_t is_varargs:1;
+   uint8_t is_unsigned:1;
uint8_t float_type:4;
};

diff --git a/lib/bpf b/lib/bpf
--- a/lib/bpf
+++ b/lib/bpf
@@ -1 +1 @@
-Subproject commit 5af3d86b5a2c5fecdc3ab83822d083edd32b4396
+Subproject commit 5af3d86b5a2c5fecdc3ab83822d083edd32b4396-dirty
diff --git a/libbtf.c b/libbtf.c
index 9f7628304495..a0661a7bbed9 100644
--- a/libbtf.c
+++ b/libbtf.c
@@ -247,6 +247,8 @@ static const char *
btf_elf__int_encoding_str(uint8_t encoding)
return "CHAR";
else if (encoding == BTF_INT_BOOL)
return "BOOL";
+   else if (encoding == BTF_INT_UNSIGNED)
+   return "UNSIGNED";
else
return "UNKN";
}
@@ -379,6 +381,8 @@ int32_t btf_elf__add_base_type(struct btf_elf
*btfe, const struct base_type *bt,
encoding = BTF_INT_SIGNED;
} else if (bt->is_bool) {
encoding = BTF_INT_BOOL;
+   } else if (bt->is_unsigned) {
+   encoding = BTF_INT_UNSIGNED;
} else if (bt->float_type) {
fprintf(stderr, "float_type is not supported\n");
return -1;

Additionally - I cannot see it with `git diff`:

[ lib/bpf/include/uapi/linux/btf.h ]

/* Attributes stored in the BTF_INT_ENCODING */
#define BTF_INT_SIGNED (1 << 0)
#define BTF_INT_CHAR (1 << 1)
#define BTF_INT_BOOL (1 << 2)
#define BTF_INT_UNSIGNED (1 << 3)

Comments?



Hmmm...

+ info BTF .btf.vmlinux.bin.o
+ [  != silent_ 

Re: [PATCH bpf-next v3] bpf: Propagate stack bounds to registers in atomics w/ BPF_FETCH

2021-02-03 Thread Yonghong Song




On 2/2/21 5:50 AM, Brendan Jackman wrote:

When BPF_FETCH is set, atomic instructions load a value from memory
into a register. The current verifier code first checks via
check_mem_access whether we can access the memory, and then checks
via check_reg_arg whether we can write into the register.

For loads, check_reg_arg has the side-effect of marking the
register's value as unkonwn, and check_mem_access has the side effect
of propagating bounds from memory to the register. This currently only
takes effect for stack memory.

Therefore with the current order, bounds information is thrown away,
but by simply reversing the order of check_reg_arg
vs. check_mem_access, we can instead propagate bounds smartly.

A simple test is added with an infinite loop that can only be proved
unreachable if this propagation is present. This is implemented both
with C and directly in test_verifier using assembly.

Suggested-by: John Fastabend 
Signed-off-by: Brendan Jackman 


Ack with a nit below.

Acked-by: Yonghong Song 


---

Difference from v2->v3 [1]:

  * Fixed missing ENABLE_ATOMICS_TESTS check.

Difference from v1->v2:

  * Reworked commit message to clarify this only affects stack memory
  * Added the Suggested-by
  * Added a C-based test.

[1]: 
https://lore.kernel.org/bpf/ca+i-1c2zwubgxwj8kaxbri9rbboyumavj_bbhg+2zf_su9b...@mail.gmail.com/T/#t

  kernel/bpf/verifier.c | 32 +++
  .../selftests/bpf/prog_tests/atomic_bounds.c  | 15 +
  .../selftests/bpf/progs/atomic_bounds.c   | 24 ++
  .../selftests/bpf/verifier/atomic_bounds.c| 27 
  4 files changed, 84 insertions(+), 14 deletions(-)
  create mode 100644 tools/testing/selftests/bpf/prog_tests/atomic_bounds.c
  create mode 100644 tools/testing/selftests/bpf/progs/atomic_bounds.c
  create mode 100644 tools/testing/selftests/bpf/verifier/atomic_bounds.c

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 972fc38eb62d..5e09632efddb 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -3665,9 +3665,26 @@ static int check_atomic(struct bpf_verifier_env *env, 
int insn_idx, struct bpf_i
return -EACCES;
}

+   if (insn->imm & BPF_FETCH) {
+   if (insn->imm == BPF_CMPXCHG)
+   load_reg = BPF_REG_0;
+   else
+   load_reg = insn->src_reg;
+
+   /* check and record load of old value */
+   err = check_reg_arg(env, load_reg, DST_OP);
+   if (err)
+   return err;
+   } else {
+   /* This instruction accesses a memory location but doesn't
+* actually load it into a register.
+*/
+   load_reg = -1;
+   }
+
/* check whether we can read the memory */
err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
-  BPF_SIZE(insn->code), BPF_READ, -1, true);
+  BPF_SIZE(insn->code), BPF_READ, load_reg, true);
if (err)
return err;

@@ -3677,19 +3694,6 @@ static int check_atomic(struct bpf_verifier_env *env, 
int insn_idx, struct bpf_i
if (err)
return err;

-   if (!(insn->imm & BPF_FETCH))
-   return 0;
-
-   if (insn->imm == BPF_CMPXCHG)
-   load_reg = BPF_REG_0;
-   else
-   load_reg = insn->src_reg;
-
-   /* check and record load of old value */
-   err = check_reg_arg(env, load_reg, DST_OP);
-   if (err)
-   return err;
-
return 0;
  }

diff --git a/tools/testing/selftests/bpf/prog_tests/atomic_bounds.c 
b/tools/testing/selftests/bpf/prog_tests/atomic_bounds.c
new file mode 100644
index ..addf127068e4
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/atomic_bounds.c
@@ -0,0 +1,15 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include 
+
+#include "atomic_bounds.skel.h"
+
+void test_atomic_bounds(void)
+{
+   struct atomic_bounds *skel;
+   __u32 duration = 0;
+
+   skel = atomic_bounds__open_and_load();
+   if (CHECK(!skel, "skel_load", "couldn't load program\n"))
+   return;


You are missing
atomic_bounds__destroy(skel);
here.


+}
diff --git a/tools/testing/selftests/bpf/progs/atomic_bounds.c 
b/tools/testing/selftests/bpf/progs/atomic_bounds.c
new file mode 100644
index ..e5fff7fc7f8f
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/atomic_bounds.c
@@ -0,0 +1,24 @@
+// SPDX-License-Identifier: GPL-2.0
+#include 
+#include 
+#include 
+#include 
+
+#ifdef ENABLE_ATOMICS_TESTS
+bool skip_tests __attribute((__section__(".data"))) = false;
+#else
+bool skip_tests = true;
+#endif
+
+SEC("fentry/bpf_fentry_test1")
+int BPF_PROG(sub, int x)
+{
+#ifdef ENABLE_ATOMICS_TESTS
+   int a = 0;
+   

Re: [PATCH bpf-next 2/3] bpf: Add size arg to build_id_parse function

2021-01-26 Thread Yonghong Song




On 1/26/21 12:52 PM, Jiri Olsa wrote:

On Thu, Jan 14, 2021 at 07:47:20PM -0800, Alexei Starovoitov wrote:

On Thu, Jan 14, 2021 at 3:44 PM Yonghong Song  wrote:




On 1/14/21 2:02 PM, Jiri Olsa wrote:

On Thu, Jan 14, 2021 at 01:05:33PM -0800, Yonghong Song wrote:



On 1/14/21 12:01 PM, Jiri Olsa wrote:

On Thu, Jan 14, 2021 at 10:56:33AM -0800, Yonghong Song wrote:



On 1/14/21 5:40 AM, Jiri Olsa wrote:

It's possible to have other build id types (other than default SHA1).
Currently there's also ld support for MD5 build id.


Currently, bpf build_id based stackmap does not returns the size of
the build_id. Did you see an issue here? I guess user space can check
the length of non-zero bits of the build id to decide what kind of
type it is, right?


you can have zero bytes in the build id hash, so you need to get the size

I never saw MD5 being used in practise just SHA1, but we added the
size to be complete and make sure we'll fit with build id, because
there's only limited space in mmap2 event


I am asking to check whether we should extend uapi struct
bpf_stack_build_id to include build_id_size as well. I guess
we can delay this until a real use case.


right, we can try make some MD5 build id binaries and check if it
explodes with some bcc tools, but I don't expect that.. I'll try
to find some time for that


Thanks. We may have issues on bcc side. For build_id collected in
kernel, bcc always generates a length-20 string. But for user
binaries, the build_id string length is equal to actual size of
the build_id. They may not match (MD5 length is 16).
The fix is probably to append '0's (up to length 20) for user
binary build_id's.

I guess MD5 is very seldom used. I will wait if you can reproduce
the issue and then we might fix it.


Indeed.
Jiri, please check whether md5 is really an issue.
Sounds like we have to do something on the kernel side.
Hopefully zero padding will be enough.
I would prefer to avoid extending uapi struct to cover rare case.


build_id_parse is already doing the zero padding, so we are ok

I tried several bcc tools over perf bench with md5 buildid and
the results looked ok


Great. Thanks for confirmation!



jirka



Re: [PATCH bpf-next v2] samples/bpf: Set flag __SANE_USERSPACE_TYPES__ for MIPS to fix build warnings

2021-01-25 Thread Yonghong Song




On 1/24/21 9:05 PM, Tiezhu Yang wrote:

There exists many build warnings when make M=samples/bpf on the Loongson
platform, this issue is MIPS related, x86 compiles just fine.

Here are some warnings:

   CC  samples/bpf/ibumad_user.o
samples/bpf/ibumad_user.c: In function ‘dump_counts’:
samples/bpf/ibumad_user.c:46:24: warning: format ‘%llu’ expects argument of 
type ‘long long unsigned int’, but argument 3 has type ‘__u64’ {aka ‘long 
unsigned int’} [-Wformat=]
 printf("0x%02x : %llu\n", key, value);
  ~~~^  ~
  %lu
   CC  samples/bpf/offwaketime_user.o
samples/bpf/offwaketime_user.c: In function ‘print_ksym’:
samples/bpf/offwaketime_user.c:34:17: warning: format ‘%llx’ expects argument 
of type ‘long long unsigned int’, but argument 3 has type ‘__u64’ {aka ‘long 
unsigned int’} [-Wformat=]
printf("%s/%llx;", sym->name, addr);
   ~~~^   
   %lx
samples/bpf/offwaketime_user.c: In function ‘print_stack’:
samples/bpf/offwaketime_user.c:68:17: warning: format ‘%lld’ expects argument 
of type ‘long long int’, but argument 3 has type ‘__u64’ {aka ‘long unsigned 
int’} [-Wformat=]
   printf(";%s %lld\n", key->waker, count);
   ~~~^ ~
   %ld

MIPS needs __SANE_USERSPACE_TYPES__ before  to select
'int-ll64.h' in arch/mips/include/uapi/asm/types.h, then it can avoid
build warnings when printing __u64 with %llu, %llx or %lld.

The header tools/include/linux/types.h defines __SANE_USERSPACE_TYPES__,
it seems that we can include  in the source files which
have build warnings, but it has no effect due to actually it includes
usr/include/linux/types.h instead of tools/include/linux/types.h, the
problem is that "usr/include" is preferred first than "tools/include"
in samples/bpf/Makefile, that sounds like a ugly hack to -Itools/include
before -Iusr/include.

So define __SANE_USERSPACE_TYPES__ for MIPS in samples/bpf/Makefile
is proper, if add "TPROGS_CFLAGS += -D__SANE_USERSPACE_TYPES__" in
samples/bpf/Makefile, it appears the following error:

Auto-detecting system features:
...libelf: [ on  ]
...  zlib: [ on  ]
...   bpf: [ OFF ]

BPF API too old
make[3]: *** [Makefile:293: bpfdep] Error 1
make[2]: *** [Makefile:156: all] Error 2

With #ifndef __SANE_USERSPACE_TYPES__  in tools/include/linux/types.h,
the above error has gone and this ifndef change does not hurt other
compilations.

Signed-off-by: Tiezhu Yang 


Acked-by: Yonghong Song 


Re: [PATCH bpf-next v5 4/4] selftests/bpf: Add a selftest for the tracing bpf_get_socket_cookie

2021-01-23 Thread Yonghong Song




On 1/22/21 7:34 AM, Florent Revest wrote:

On Wed, Jan 20, 2021 at 8:06 PM Florent Revest  wrote:


On Wed, Jan 20, 2021 at 8:04 PM Alexei Starovoitov
 wrote:


On Wed, Jan 20, 2021 at 9:08 AM KP Singh  wrote:


On Tue, Jan 19, 2021 at 5:00 PM Florent Revest  wrote:


This builds up on the existing socket cookie test which checks whether
the bpf_get_socket_cookie helpers provide the same value in
cgroup/connect6 and sockops programs for a socket created by the
userspace part of the test.

Adding a tracing program to the existing objects requires a different
attachment strategy and different headers.

Signed-off-by: Florent Revest 


Acked-by: KP Singh 

(one minor note, doesn't really need fixing as a part of this though)


---
  .../selftests/bpf/prog_tests/socket_cookie.c  | 24 +++
  .../selftests/bpf/progs/socket_cookie_prog.c  | 41 ---
  2 files changed, 52 insertions(+), 13 deletions(-)

diff --git a/tools/testing/selftests/bpf/prog_tests/socket_cookie.c 
b/tools/testing/selftests/bpf/prog_tests/socket_cookie.c
index 53d0c44e7907..e5c5e2ea1deb 100644
--- a/tools/testing/selftests/bpf/prog_tests/socket_cookie.c
+++ b/tools/testing/selftests/bpf/prog_tests/socket_cookie.c
@@ -15,8 +15,8 @@ struct socket_cookie {

  void test_socket_cookie(void)
  {
+   struct bpf_link *set_link, *update_sockops_link, *update_tracing_link;
 socklen_t addr_len = sizeof(struct sockaddr_in6);
-   struct bpf_link *set_link, *update_link;
 int server_fd, client_fd, cgroup_fd;
 struct socket_cookie_prog *skel;
 __u32 cookie_expected_value;
@@ -39,15 +39,21 @@ void test_socket_cookie(void)
   PTR_ERR(set_link)))
 goto close_cgroup_fd;

-   update_link = bpf_program__attach_cgroup(skel->progs.update_cookie,
-cgroup_fd);
-   if (CHECK(IS_ERR(update_link), "update-link-cg-attach", "err %ld\n",
- PTR_ERR(update_link)))
+   update_sockops_link = bpf_program__attach_cgroup(
+   skel->progs.update_cookie_sockops, cgroup_fd);
+   if (CHECK(IS_ERR(update_sockops_link), "update-sockops-link-cg-attach",
+ "err %ld\n", PTR_ERR(update_sockops_link)))
 goto free_set_link;

+   update_tracing_link = bpf_program__attach(
+   skel->progs.update_cookie_tracing);
+   if (CHECK(IS_ERR(update_tracing_link), "update-tracing-link-attach",
+ "err %ld\n", PTR_ERR(update_tracing_link)))
+   goto free_update_sockops_link;
+
 server_fd = start_server(AF_INET6, SOCK_STREAM, "::1", 0, 0);
 if (CHECK(server_fd < 0, "start_server", "errno %d\n", errno))
-   goto free_update_link;
+   goto free_update_tracing_link;

 client_fd = connect_to_fd(server_fd, 0);
 if (CHECK(client_fd < 0, "connect_to_fd", "errno %d\n", errno))
@@ -71,8 +77,10 @@ void test_socket_cookie(void)
 close(client_fd);
  close_server_fd:
 close(server_fd);
-free_update_link:
-   bpf_link__destroy(update_link);
+free_update_tracing_link:
+   bpf_link__destroy(update_tracing_link);


I don't think this need to block submission unless there are other
issues but the
bpf_link__destroy can just be called in a single cleanup label because
it handles null or
erroneous inputs:

int bpf_link__destroy(struct bpf_link *link)
{
 int err = 0;

 if (IS_ERR_OR_NULL(link))
  return 0;
[...]


+1 to KP's point.

Also Florent, how did you test it?
This test fails in CI and in my manual run:
./test_progs -t cook
libbpf: load bpf program failed: Permission denied
libbpf: -- BEGIN DUMP LOG ---
libbpf:
; int update_cookie_sockops(struct bpf_sock_ops *ctx)
0: (bf) r6 = r1
; if (ctx->family != AF_INET6)
1: (61) r1 = *(u32 *)(r6 +20)
; if (ctx->family != AF_INET6)
2: (56) if w1 != 0xa goto pc+21
  R1_w=inv10 R6_w=ctx(id=0,off=0,imm=0) R10=fp0
; if (ctx->op != BPF_SOCK_OPS_TCP_CONNECT_CB)
3: (61) r1 = *(u32 *)(r6 +0)
; if (ctx->op != BPF_SOCK_OPS_TCP_CONNECT_CB)
4: (56) if w1 != 0x3 goto pc+19
  R1_w=inv3 R6_w=ctx(id=0,off=0,imm=0) R10=fp0
; if (!ctx->sk)
5: (79) r1 = *(u64 *)(r6 +184)
; if (!ctx->sk)
6: (15) if r1 == 0x0 goto pc+17
  R1_w=sock(id=0,ref_obj_id=0,off=0,imm=0) R6_w=ctx(id=0,off=0,imm=0) R10=fp0
; p = bpf_sk_storage_get(_cookies, ctx->sk, 0, 0);
7: (79) r2 = *(u64 *)(r6 +184)
; p = bpf_sk_storage_get(_cookies, ctx->sk, 0, 0);
8: (18) r1 = 0x888106e41400
10: (b7) r3 = 0
11: (b7) r4 = 0
12: (85) call bpf_sk_storage_get#107
R2 type=sock_or_null expected=sock_common, sock, tcp_sock, xdp_sock, ptr_
processed 12 insns (limit 100) max_states_per_insn 0 total_states
0 peak_states 0 mark_read 0

libbpf: -- END LOG --
libbpf: failed to load program 'update_cookie_sockops'
libbpf: failed to load object 'socket_cookie_prog'
libbpf: failed to load BPF skeleton 'socket_cookie_prog': -4007
test_socket_cookie:FAIL:socket_cookie_prog__open_and_load 

Re: KASAN: vmalloc-out-of-bounds Read in bpf_trace_run7

2021-01-23 Thread Yonghong Song



I can reproduce the issue with C reproducer. This is an old known issue 
though and the failure is due to memory allocation failure in 
tracepoint_probe_unregister().


[   40.807849][ T8287] Call Trace:
[   40.808201][ T8287]  dump_stack+0x77/0x97
[   40.808695][ T8287]  should_fail.cold.6+0x32/0x4c
[   40.809238][ T8287]  should_failslab+0x5/0x10
[   40.809709][ T8287]  slab_pre_alloc_hook.constprop.97+0xa0/0xd0
[   40.810365][ T8287]  ? tracepoint_probe_unregister+0xc7/0x2b0
[   40.810998][ T8287]  __kmalloc+0x64/0x210
[   40.811442][ T8287]  ? trace_raw_output_percpu_destroy_chunk+0x40/0x40
[   40.812158][ T8287]  tracepoint_probe_unregister+0xc7/0x2b0
[   40.812766][ T8287]  bpf_raw_tp_link_release+0x11/0x20
[   40.813328][ T8287]  bpf_link_free+0x20/0x40
[   40.813802][ T8287]  bpf_link_release+0xc/0x10
[   40.814242][ T8287]  __fput+0xa1/0x250
[   40.814606][ T8287]  task_work_run+0x68/0xb0
[   40.815030][ T8287]  exit_to_user_mode_prepare+0x22c/0x250

Steven Rostedt has the following pending patch
  https://lore.kernel.org/bpf/20201118093405.7a6d2...@gandalf.local.home/
trying to solve this exact problem.

On 1/20/21 11:14 PM, syzbot wrote:

syzbot has bisected this issue to:

commit 8b401f9ed2441ad9e219953927a842d24ed051fc
Author: Yonghong Song 
Date:   Thu May 23 21:47:45 2019 +

 bpf: implement bpf_send_signal() helper

bisection log:  https://syzkaller.appspot.com/x/bisect.txt?x=123408e750
start commit:   7d68e382 bpf: Permit size-0 datasec
git tree:   bpf-next
final oops: https://syzkaller.appspot.com/x/report.txt?x=113408e750
console output: https://syzkaller.appspot.com/x/log.txt?x=163408e750
kernel config:  https://syzkaller.appspot.com/x/.config?x=e0c7843b8af99dff
dashboard link: https://syzkaller.appspot.com/bug?extid=fad5d91c7158ce568634
syz repro:  https://syzkaller.appspot.com/x/repro.syz?x=1224daa4d0
C reproducer:   https://syzkaller.appspot.com/x/repro.c?x=13dfabd0d0

Reported-by: syzbot+fad5d91c7158ce568...@syzkaller.appspotmail.com
Fixes: 8b401f9ed244 ("bpf: implement bpf_send_signal() helper")

For information about bisection process see: https://goo.gl/tpsmEJ#bisection



Re: [PATCH bpf-next v3] samples/bpf: Update build procedure for manually compiling LLVM and Clang

2021-01-21 Thread Yonghong Song




On 1/21/21 6:23 AM, Nathan Chancellor wrote:

On Thu, Jan 21, 2021 at 12:08:31AM -0800, Andrii Nakryiko wrote:

On Wed, Jan 20, 2021 at 9:36 PM Nathan Chancellor
 wrote:


On Thu, Jan 21, 2021 at 01:27:35PM +0800, Tiezhu Yang wrote:

The current LLVM and Clang build procedure in samples/bpf/README.rst is
out of date. See below that the links are not accessible any more.

$ git clone http://llvm.org/git/llvm.git
Cloning into 'llvm'...
fatal: unable to access 'http://llvm.org/git/llvm.git/ ': Maximum (20) 
redirects followed
$ git clone --depth 1 http://llvm.org/git/clang.git
Cloning into 'clang'...
fatal: unable to access 'http://llvm.org/git/clang.git/ ': Maximum (20) 
redirects followed

The LLVM community has adopted new ways to build the compiler. There are
different ways to build LLVM and Clang, the Clang Getting Started page [1]
has one way. As Yonghong said, it is better to copy the build procedure
in Documentation/bpf/bpf_devel_QA.rst to keep consistent.

I verified the procedure and it is proved to be feasible, so we should
update README.rst to reflect the reality. At the same time, update the
related comment in Makefile.

Additionally, as Fangrui said, the dir llvm-project/llvm/build/install is
not used, BUILD_SHARED_LIBS=OFF is the default option [2], so also change
Documentation/bpf/bpf_devel_QA.rst together.

[1] https://clang.llvm.org/get_started.html
[2] https://www.llvm.org/docs/CMake.html

Signed-off-by: Tiezhu Yang 
Acked-by: Yonghong Song 


Reviewed-by: Nathan Chancellor 

Small comment below.


---

v2: Update the commit message suggested by Yonghong,
 thank you very much.

v3: Remove the default option BUILD_SHARED_LIBS=OFF
 and just mkdir llvm-project/llvm/build suggested
 by Fangrui.

  Documentation/bpf/bpf_devel_QA.rst |  3 +--
  samples/bpf/Makefile   |  2 +-
  samples/bpf/README.rst | 16 +---
  3 files changed, 11 insertions(+), 10 deletions(-)

diff --git a/Documentation/bpf/bpf_devel_QA.rst 
b/Documentation/bpf/bpf_devel_QA.rst
index 5b613d2..18788bb 100644
--- a/Documentation/bpf/bpf_devel_QA.rst
+++ b/Documentation/bpf/bpf_devel_QA.rst
@@ -506,11 +506,10 @@ that set up, proceed with building the latest LLVM and 
clang version
  from the git repositories::

   $ git clone https://github.com/llvm/llvm-project.git
- $ mkdir -p llvm-project/llvm/build/install
+ $ mkdir -p llvm-project/llvm/build
   $ cd llvm-project/llvm/build
   $ cmake .. -G "Ninja" -DLLVM_TARGETS_TO_BUILD="BPF;X86" \
  -DLLVM_ENABLE_PROJECTS="clang"\
--DBUILD_SHARED_LIBS=OFF   \
  -DCMAKE_BUILD_TYPE=Release\
  -DLLVM_BUILD_RUNTIME=OFF
   $ ninja
diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
index 26fc96c..d061446 100644
--- a/samples/bpf/Makefile
+++ b/samples/bpf/Makefile
@@ -208,7 +208,7 @@ TPROGLDLIBS_xdpsock   += -pthread -lcap
  TPROGLDLIBS_xsk_fwd  += -pthread

  # Allows pointing LLC/CLANG to a LLVM backend with bpf support, redefine on 
cmdline:
-#  make M=samples/bpf/ LLC=~/git/llvm/build/bin/llc 
CLANG=~/git/llvm/build/bin/clang
+# make M=samples/bpf LLC=~/git/llvm-project/llvm/build/bin/llc 
CLANG=~/git/llvm-project/llvm/build/bin/clang
  LLC ?= llc
  CLANG ?= clang
  OPT ?= opt
diff --git a/samples/bpf/README.rst b/samples/bpf/README.rst
index dd34b2d..23006cb 100644
--- a/samples/bpf/README.rst
+++ b/samples/bpf/README.rst
@@ -65,17 +65,19 @@ To generate a smaller llc binary one can use::
  Quick sniplet for manually compiling LLVM and clang
  (build dependencies are cmake and gcc-c++)::


Technically, ninja is now a build dependency as well, it might be worth
mentioning that here (usually the package is ninja or ninja-build).


it's possible to generate Makefile by passing `-g "Unix Makefiles"`,
which would avoid dependency on ninja, no?


Yes, although I am fairly certain that building with ninja is quicker so
I would recommend keeping it. One small extra dependency never killed
anyone plus ninja is becoming more common nowadays :)


Agree. Let us use 'ninja' here, which is widely used in llvm community
for llvm-project build compared to other alternatives.




Regardless of whether that is addressed or not (because it is small),
feel free to carry forward my tag in any future revisions unless they
drastically change.


- $ git clone http://llvm.org/git/llvm.git
- $ cd llvm/tools
- $ git clone --depth 1 http://llvm.org/git/clang.git
- $ cd ..; mkdir build; cd build
- $ cmake .. -DLLVM_TARGETS_TO_BUILD="BPF;X86"
- $ make -j $(getconf _NPROCESSORS_ONLN)
+ $ git clone https://github.com/llvm/llvm-project.git
+ $ mkdir -p llvm-project/llvm/build
+ $ cd llvm-project/llvm/build
+ $ cmake .. -G "Ninja" -DLLVM_TARGETS_TO_BUILD="BPF;X86" \
+-DLLVM_ENABLE_PROJECTS="clang"\
+-DCMAKE_BUILD_TYPE=Release\

Re: [PATCH bpf-next v2] samples/bpf: Update README.rst and Makefile for manually compiling LLVM and clang

2021-01-19 Thread Yonghong Song




On 1/19/21 6:32 PM, Tiezhu Yang wrote:

On 01/20/2021 05:58 AM, Fangrui Song wrote:

On 2021-01-19, Tiezhu Yang wrote:

The current llvm/clang build procedure in samples/bpf/README.rst is
out of date. See below that the links are not accessible any more.

$ git clone 
http://llvm.org/git/llvm.git 
Cloning into 'llvm'...
fatal: unable to access 
'http://llvm.org/git/llvm.git/ 
': Maximum (20) redirects followed
$ git clone --depth 1 
http://llvm.org/git/clang.git 
Cloning into 'clang'...
fatal: unable to access 
'http://llvm.org/git/clang.git/ 
': Maximum (20) redirects followed


The llvm community has adopted new ways to build the compiler. There are
different ways to build llvm/clang, the Clang Getting Started page 
[1] has

one way. As Yonghong said, it is better to just copy the build procedure
in Documentation/bpf/bpf_devel_QA.rst to keep consistent.

I verified the procedure and it is proved to be feasible, so we should
update README.rst to reflect the reality. At the same time, update the
related comment in Makefile.

[1] 
https://clang.llvm.org/get_started.html 


Signed-off-by: Tiezhu Yang 
Acked-by: Yonghong Song 
---

v2: Update the commit message suggested by Yonghong,
   thank you very much.

samples/bpf/Makefile   |  2 +-
samples/bpf/README.rst | 17 ++---
2 files changed, 11 insertions(+), 8 deletions(-)

diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
index 26fc96c..d061446 100644
--- a/samples/bpf/Makefile
+++ b/samples/bpf/Makefile
@@ -208,7 +208,7 @@ TPROGLDLIBS_xdpsock    += -pthread -lcap
TPROGLDLIBS_xsk_fwd    += -pthread

# Allows pointing LLC/CLANG to a LLVM backend with bpf support, 
redefine on cmdline:
-#  make M=samples/bpf/ LLC=~/git/llvm/build/bin/llc 
CLANG=~/git/llvm/build/bin/clang
+# make M=samples/bpf LLC=~/git/llvm-project/llvm/build/bin/llc 
CLANG=~/git/llvm-project/llvm/build/bin/clang

LLC ?= llc
CLANG ?= clang
OPT ?= opt
diff --git a/samples/bpf/README.rst b/samples/bpf/README.rst
index dd34b2d..d1be438 100644
--- a/samples/bpf/README.rst
+++ b/samples/bpf/README.rst
@@ -65,17 +65,20 @@ To generate a smaller llc binary one can use::
Quick sniplet for manually compiling LLVM and clang
(build dependencies are cmake and gcc-c++)::

- $ git clone 
http://llvm.org/git/llvm.git 
- $ cd llvm/tools
- $ git clone --depth 1 
http://llvm.org/git/clang.git 
- $ cd ..; mkdir build; cd build

- $ cmake .. -DLLVM_TARGETS_TO_BUILD="BPF;X86"
- $ make -j $(getconf _NPROCESSORS_ONLN)
+ $ git clone https://github.com/llvm/llvm-project.git
+ $ mkdir -p llvm-project/llvm/build/install


llvm-project/llvm/build/install is not used.


Yes, just mkdir -p llvm-project/llvm/build is OK.




+ $ cd llvm-project/llvm/build
+ $ cmake .. -G "Ninja" -DLLVM_TARGETS_TO_BUILD="BPF;X86" \
+    -DLLVM_ENABLE_PROJECTS="clang"    \
+    -DBUILD_SHARED_LIBS=OFF   \


-DBUILD_SHARED_LIBS=OFF is the default. It can be omitted.


I search the related doc [1] [2], remove this option is OK for me.

BUILD_SHARED_LIBS:BOOL

     Flag indicating if each LLVM component (e.g. Support) is built as a 
shared library (ON) or as a static library (OFF). Its default value is OFF.


[1] 
https://www.llvm.org/docs/CMake.html 
[2] 
https://cmake.org/cmake/help/latest/variable/BUILD_SHARED_LIBS.html 




+ -DCMAKE_BUILD_TYPE=Release    \
+    -DLLVM_BUILD_RUNTIME=OFF


-DLLVM_BUILD_RUNTIME=OFF can be omitted if none of
compiler-rt/libc++/libc++abi is built.


I am not very sure about it because the default value of
LLVM_BUILD_RUNTIME is ON? [3]

option(LLVM_BUILD_RUNTIME
"Build the LLVM runtime libraries." ON)

[3] https://github.com/llvm/llvm-project/blob/main/llvm/CMakeLists.txt

If anyone has any more suggestions, please let me know.
I will send v3 after waiting for other feedback.

By the way, Documentation/bpf/bpf_devel_QA.rst maybe need a separate
patch to remove some cmake options?


Please submit updated this patch and Documentation/bpf/bpf_devel_QA.rst
together. This way, it is easy to cross check. Thanks.



Thanks,
Tiezhu


[...]


Re: [PATCH v3 bpf-next 2/2] selftests: bpf: Add a new test for bare tracepoints

2021-01-19 Thread Yonghong Song




On 1/19/21 4:22 AM, Qais Yousef wrote:

Reuse module_attach infrastructure to add a new bare tracepoint to check
we can attach to it as a raw tracepoint.

Signed-off-by: Qais Yousef 


Acked-by: Yonghong Song 


Re: [PATCH bpf] samples/bpf: Update README.rst and Makefile for manually compiling LLVM and clang

2021-01-18 Thread Yonghong Song




On 1/18/21 7:53 PM, Tiezhu Yang wrote:

In the current samples/bpf/README.rst, the url of llvm and clang git
may be out of date, they are unable to access:


Let us just rephrase the above more clearly, something like below.

The current clang/llvm build procedure in samples/bpf/README.rst is
out of date. See below that the links are not accessible any more.



$ git clone http://llvm.org/git/llvm.git
Cloning into 'llvm'...
fatal: unable to access 'http://llvm.org/git/llvm.git/ ': Maximum (20) 
redirects followed
$ git clone --depth 1 http://llvm.org/git/clang.git
Cloning into 'clang'...
fatal: unable to access 'http://llvm.org/git/clang.git/ ': Maximum (20) 
redirects followed



The llvm community has adopted new ways to build the compiler.
[followed by your descriptions below]


There are different ways to build llvm/clang, I find the Clang Getting
Started page [1] has one way, as Yonghong said, it is better to just
copy the build procedure in Documentation/bpf/bpf_devel_QA.rst to keep
consistent.

I verified the procedure and it is proved to be feasible, so we should
update README.rst to reflect the reality. At the same time, update the
related comment in Makefile.

[1] https://clang.llvm.org/get_started.html

Signed-off-by: Tiezhu Yang 


Ack with minor nits in the above. Also, this is a documentation update.
I think it is okay to target the patch to bpf-next instead of bpf.

Acked-by: Yonghong Song 


Re: [PATCH bpf 1/2] samples/bpf: Set flag __SANE_USERSPACE_TYPES__ for MIPS to fix build warnings

2021-01-18 Thread Yonghong Song




On 1/17/21 7:22 PM, Tiezhu Yang wrote:

On 01/14/2021 01:12 AM, Yonghong Song wrote:



On 1/13/21 2:57 AM, Tiezhu Yang wrote:

MIPS needs __SANE_USERSPACE_TYPES__ before  to select
'int-ll64.h' in arch/mips/include/uapi/asm/types.h and avoid compile
warnings when printing __u64 with %llu, %llx or %lld.


could you mention which command produces the following warning?


make M=samples/bpf





 printf("0x%02x : %llu\n", key, value);
  ~~~^  ~
  %lu
    printf("%s/%llx;", sym->name, addr);
   ~~~^   
   %lx
   printf(";%s %lld\n", key->waker, count);
   ~~~^ ~
   %ld

Signed-off-by: Tiezhu Yang 
---
  samples/bpf/Makefile    | 4 
  tools/include/linux/types.h | 3 +++
  2 files changed, 7 insertions(+)

diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
index 26fc96c..27de306 100644
--- a/samples/bpf/Makefile
+++ b/samples/bpf/Makefile
@@ -183,6 +183,10 @@ BPF_EXTRA_CFLAGS := $(ARM_ARCH_SELECTOR)
  TPROGS_CFLAGS += $(ARM_ARCH_SELECTOR)
  endif
  +ifeq ($(ARCH), mips)
+TPROGS_CFLAGS += -D__SANE_USERSPACE_TYPES__
+endif
+


This change looks okay based on description in
arch/mips/include/uapi/asm/types.h

'''
/*
 * We don't use int-l64.h for the kernel anymore but still use it for
 * userspace to avoid code changes.
 *
 * However, some user programs (e.g. perf) may not want this. They can
 * flag __SANE_USERSPACE_TYPES__ to get int-ll64.h here.
 */
'''


  TPROGS_CFLAGS += -Wall -O2
  TPROGS_CFLAGS += -Wmissing-prototypes
  TPROGS_CFLAGS += -Wstrict-prototypes
diff --git a/tools/include/linux/types.h b/tools/include/linux/types.h
index 154eb4e..e9c5a21 100644
--- a/tools/include/linux/types.h
+++ b/tools/include/linux/types.h
@@ -6,7 +6,10 @@
  #include 
  #include 
  +#ifndef __SANE_USERSPACE_TYPES__
  #define __SANE_USERSPACE_TYPES__    /* For PPC64, to get LL64 types */
+#endif


What problem this patch fixed?


If add "TPROGS_CFLAGS += -D__SANE_USERSPACE_TYPES__" in
samples/bpf/Makefile, it appears the following error:

Auto-detecting system features:
...    libelf: [ on  ]
...  zlib: [ on  ]
...   bpf: [ OFF ]

BPF API too old
make[3]: *** [Makefile:293: bpfdep] Error 1
make[2]: *** [Makefile:156: all] Error 2

With #ifndef __SANE_USERSPACE_TYPES__  in tools/include/linux/types.h,
the above error has gone.


If this header is used, you can just
change comment from "PPC64" to "PPC64/MIPS", right?


If include  in the source files which have compile warnings
when printing __u64 with %llu, %llx or %lld, it has no effect due to 
actually
it includes usr/include/linux/types.h instead of 
tools/include/linux/types.h,
this is because the include-directories in samples/bpf/Makefile are 
searched

in the order, -I./usr/include is in the front of -I./tools/include.

So I think define __SANE_USERSPACE_TYPES__ for MIPS in samples/bpf/Makefile
is proper, at the same time, add #ifndef __SANE_USERSPACE_TYPES__ in
tools/include/linux/types.h can avoid build error and have no side effect.

I will send v2 later with mention in the commit message that this is
mips related.


It would be good if you can add the above information to the commit
message so people will know what the root cause of the issue.

If I understand correctly, if we could have include path
"tools/include" earlier than "usr/include", we might not have this 
issue. The problem is that "usr/include" is preferred first (uapi)

than "tools/include" (including kernel dev headers).

I am wondering whether we could avoid changes in 
tools/include/linux/types.h, e.g., by undef __SANE_USER_SPACE_TYPES 
right before include

path tools/include. But that sounds like a ugly hack and actually
the change in tools/include/linux/types.h does not hurt other
compilations.

So your current change looks good to me, but please have better
explanation of the problem and why for each change in the commit
message.



Thanks,
Tiezhu




+
  #include 
  #include 





Re: [PATCH bpf] samples/bpf: Update README.rst for manually compiling LLVM and clang

2021-01-18 Thread Yonghong Song




On 1/18/21 12:53 AM, Tiezhu Yang wrote:

In the current samples/bpf/README.rst, the url of llvm and clang git
may be out of date, they are unable to access:

$ git clone http://llvm.org/git/llvm.git
Cloning into 'llvm'...
fatal: unable to access 'http://llvm.org/git/llvm.git/ ': Maximum (20) 
redirects followed
$ git clone --depth 1 http://llvm.org/git/clang.git
Cloning into 'clang'...
fatal: unable to access 'http://llvm.org/git/clang.git/ ': Maximum (20) 
redirects followed

The Clang Getting Started page [1] might have more accurate information,
I verified the procedure and it is proved to be feasible, so we should
update it to reflect the reality.

[1] https://clang.llvm.org/get_started.html

Signed-off-by: Tiezhu Yang 
---
  samples/bpf/README.rst | 8 +++-
  1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/samples/bpf/README.rst b/samples/bpf/README.rst
index dd34b2d..f606c08 100644
--- a/samples/bpf/README.rst
+++ b/samples/bpf/README.rst
@@ -65,11 +65,9 @@ To generate a smaller llc binary one can use::
  Quick sniplet for manually compiling LLVM and clang
  (build dependencies are cmake and gcc-c++)::
  
- $ git clone http://llvm.org/git/llvm.git

- $ cd llvm/tools
- $ git clone --depth 1 http://llvm.org/git/clang.git
- $ cd ..; mkdir build; cd build
- $ cmake .. -DLLVM_TARGETS_TO_BUILD="BPF;X86"
+ $ git clone https://github.com/llvm/llvm-project.git
+ $ cd llvm-project; mkdir build; cd build
+ $ cmake -DLLVM_ENABLE_PROJECTS=clang -DLLVM_TARGETS_TO_BUILD="BPF;X86" -G "Unix 
Makefiles" ../llvm
   $ make -j $(getconf _NPROCESSORS_ONLN)


Thanks for the patch. Indeed llvm.org/git/llvm has been deprecated. We 
have recommended to use llvm-project at 
kernel/Documentation/bpf/bpf_devel_QA.rst.


https://github.com/torvalds/linux/blob/master/Documentation/bpf/bpf_devel_QA.rst#q-got-it-so-how-do-i-build-llvm-manually-anyway

Could you use the same scripts in the above link here?
There are different ways to build llvm/clang, I just want to be
consistent between bpf_devel_QA.rst and there.

I am also thinking whether we should just provide a link here to
bpf_devel_QA.rst. Looking at samples/bpf/README.rst, it all contains
direct commands for people to build/test, so copy-pasting the llvm
build scripts here should be fine.

  
  It is also possible to point make to the newly compiled 'llc' or




Re: [PATCH v2 bpf-next 2/2] selftests: bpf: Add a new test for bare tracepoints

2021-01-18 Thread Yonghong Song




On 1/18/21 4:18 AM, Qais Yousef wrote:

On 01/16/21 18:11, Yonghong Song wrote:



On 1/16/21 10:21 AM, Qais Yousef wrote:

Reuse module_attach infrastructure to add a new bare tracepoint to check
we can attach to it as a raw tracepoint.

Signed-off-by: Qais Yousef 
---
   .../bpf/bpf_testmod/bpf_testmod-events.h  |  6 +
   .../selftests/bpf/bpf_testmod/bpf_testmod.c   | 21 ++-
   .../selftests/bpf/bpf_testmod/bpf_testmod.h   |  6 +
   .../selftests/bpf/prog_tests/module_attach.c  | 27 +++
   .../selftests/bpf/progs/test_module_attach.c  | 10 +++
   5 files changed, 69 insertions(+), 1 deletion(-)

diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod-events.h 
b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod-events.h
index b83ea448bc79..89c6d58e5dd6 100644
--- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod-events.h
+++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod-events.h
@@ -28,6 +28,12 @@ TRACE_EVENT(bpf_testmod_test_read,
  __entry->pid, __entry->comm, __entry->off, __entry->len)
   );
+/* A bare tracepoint with no event associated with it */
+DECLARE_TRACE(bpf_testmod_test_write_bare,
+   TP_PROTO(struct task_struct *task, struct bpf_testmod_test_write_ctx 
*ctx),
+   TP_ARGS(task, ctx)
+);
+
   #endif /* _BPF_TESTMOD_EVENTS_H */
   #undef TRACE_INCLUDE_PATH
diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c 
b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c
index 2df19d73ca49..e900adad2276 100644
--- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c
+++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c
@@ -28,9 +28,28 @@ bpf_testmod_test_read(struct file *file, struct kobject 
*kobj,
   EXPORT_SYMBOL(bpf_testmod_test_read);
   ALLOW_ERROR_INJECTION(bpf_testmod_test_read, ERRNO);
+noinline ssize_t
+bpf_testmod_test_write(struct file *file, struct kobject *kobj,
+ struct bin_attribute *bin_attr,
+ char *buf, loff_t off, size_t len)
+{
+   struct bpf_testmod_test_write_ctx ctx = {
+   .buf = buf,
+   .off = off,
+   .len = len,
+   };
+
+   trace_bpf_testmod_test_write_bare(current, );
+
+   return -EIO; /* always fail */
+}
+EXPORT_SYMBOL(bpf_testmod_test_write);
+ALLOW_ERROR_INJECTION(bpf_testmod_test_write, ERRNO);
+
   static struct bin_attribute bin_attr_bpf_testmod_file __ro_after_init = {


Do we need to remove __ro_after_init?


I don't think so. The structure should still remain RO AFAIU.


okay.






-   .attr = { .name = "bpf_testmod", .mode = 0444, },
+   .attr = { .name = "bpf_testmod", .mode = 0666, },
.read = bpf_testmod_test_read,
+   .write = bpf_testmod_test_write,
   };
   static int bpf_testmod_init(void)
diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.h 
b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.h
index b81adfedb4f6..b3892dc40111 100644
--- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.h
+++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.h
@@ -11,4 +11,10 @@ struct bpf_testmod_test_read_ctx {
size_t len;
   };
+struct bpf_testmod_test_write_ctx {
+   char *buf;
+   loff_t off;
+   size_t len;
+};
+
   #endif /* _BPF_TESTMOD_H */
diff --git a/tools/testing/selftests/bpf/prog_tests/module_attach.c 
b/tools/testing/selftests/bpf/prog_tests/module_attach.c
index 50796b651f72..e4605c0b5af1 100644
--- a/tools/testing/selftests/bpf/prog_tests/module_attach.c
+++ b/tools/testing/selftests/bpf/prog_tests/module_attach.c
@@ -21,9 +21,34 @@ static int trigger_module_test_read(int read_sz)
return 0;
   }
+static int trigger_module_test_write(int write_sz)
+{
+   int fd, err;


Init err = 0?


I don't see what difference this makes.




+   char *buf = malloc(write_sz);
+
+   if (!buf)
+   return -ENOMEM;


Looks like we already non-negative value, so return ENOMEM?


We already set err=-errno. So shouldn't we return negative too?


Oh, yes, return -ENOMEM sounds right here.






+
+   memset(buf, 'a', write_sz);
+   buf[write_sz-1] = '\0';
+
+   fd = open("/sys/kernel/bpf_testmod", O_WRONLY);
+   err = -errno;
+   if (CHECK(fd < 0, "testmod_file_open", "failed: %d\n", err))
+   goto out;


Change the above to
fd = open("/sys/kernel/bpf_testmod", O_WRONLY);
if (CHECK(fd < 0, "testmod_file_open", "failed: %d\n", errno)) {


Here it should be ... "failed: %d\n", -errno.


err = -errno;
goto out;
}


I kept the code consistent with the definition of trigger_module_test_read().


The original patch code:

+static int trigger_module_test_write(int write_sz)
+{
+   int fd, err;
+   char *buf = malloc(write_sz);
+
+   if (!buf)
+   return -EN

Re: [PATCH bpf-next v2 2/2] docs: bpf: Clarify -mcpu=v3 requirement for atomic ops

2021-01-18 Thread Yonghong Song




On 1/18/21 7:57 AM, Brendan Jackman wrote:

Alexei pointed out [1] that this wording is pretty confusing. Here's
an attempt to be more explicit and clear.

[1] 
https://lore.kernel.org/bpf/CAADnVQJVvwoZsE1K+6qRxzF7+6CvZNzygnoBW9tZNWJELk5c=q...@mail.gmail.com/T/#m07264fc18fdc43af02fc1320968afefcc73d96f4

Signed-off-by: Brendan Jackman 


Thanks for better description!

Acked-by: Yonghong Song 


Re: [PATCH v2 bpf-next 2/2] selftests: bpf: Add a new test for bare tracepoints

2021-01-16 Thread Yonghong Song




On 1/16/21 10:21 AM, Qais Yousef wrote:

Reuse module_attach infrastructure to add a new bare tracepoint to check
we can attach to it as a raw tracepoint.

Signed-off-by: Qais Yousef 
---
  .../bpf/bpf_testmod/bpf_testmod-events.h  |  6 +
  .../selftests/bpf/bpf_testmod/bpf_testmod.c   | 21 ++-
  .../selftests/bpf/bpf_testmod/bpf_testmod.h   |  6 +
  .../selftests/bpf/prog_tests/module_attach.c  | 27 +++
  .../selftests/bpf/progs/test_module_attach.c  | 10 +++
  5 files changed, 69 insertions(+), 1 deletion(-)

diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod-events.h 
b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod-events.h
index b83ea448bc79..89c6d58e5dd6 100644
--- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod-events.h
+++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod-events.h
@@ -28,6 +28,12 @@ TRACE_EVENT(bpf_testmod_test_read,
  __entry->pid, __entry->comm, __entry->off, __entry->len)
  );
  
+/* A bare tracepoint with no event associated with it */

+DECLARE_TRACE(bpf_testmod_test_write_bare,
+   TP_PROTO(struct task_struct *task, struct bpf_testmod_test_write_ctx 
*ctx),
+   TP_ARGS(task, ctx)
+);
+
  #endif /* _BPF_TESTMOD_EVENTS_H */
  
  #undef TRACE_INCLUDE_PATH

diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c 
b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c
index 2df19d73ca49..e900adad2276 100644
--- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c
+++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c
@@ -28,9 +28,28 @@ bpf_testmod_test_read(struct file *file, struct kobject 
*kobj,
  EXPORT_SYMBOL(bpf_testmod_test_read);
  ALLOW_ERROR_INJECTION(bpf_testmod_test_read, ERRNO);
  
+noinline ssize_t

+bpf_testmod_test_write(struct file *file, struct kobject *kobj,
+ struct bin_attribute *bin_attr,
+ char *buf, loff_t off, size_t len)
+{
+   struct bpf_testmod_test_write_ctx ctx = {
+   .buf = buf,
+   .off = off,
+   .len = len,
+   };
+
+   trace_bpf_testmod_test_write_bare(current, );
+
+   return -EIO; /* always fail */
+}
+EXPORT_SYMBOL(bpf_testmod_test_write);
+ALLOW_ERROR_INJECTION(bpf_testmod_test_write, ERRNO);
+
  static struct bin_attribute bin_attr_bpf_testmod_file __ro_after_init = {


Do we need to remove __ro_after_init?


-   .attr = { .name = "bpf_testmod", .mode = 0444, },
+   .attr = { .name = "bpf_testmod", .mode = 0666, },
.read = bpf_testmod_test_read,
+   .write = bpf_testmod_test_write,
  };
  
  static int bpf_testmod_init(void)

diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.h 
b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.h
index b81adfedb4f6..b3892dc40111 100644
--- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.h
+++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.h
@@ -11,4 +11,10 @@ struct bpf_testmod_test_read_ctx {
size_t len;
  };
  
+struct bpf_testmod_test_write_ctx {

+   char *buf;
+   loff_t off;
+   size_t len;
+};
+
  #endif /* _BPF_TESTMOD_H */
diff --git a/tools/testing/selftests/bpf/prog_tests/module_attach.c 
b/tools/testing/selftests/bpf/prog_tests/module_attach.c
index 50796b651f72..e4605c0b5af1 100644
--- a/tools/testing/selftests/bpf/prog_tests/module_attach.c
+++ b/tools/testing/selftests/bpf/prog_tests/module_attach.c
@@ -21,9 +21,34 @@ static int trigger_module_test_read(int read_sz)
return 0;
  }
  
+static int trigger_module_test_write(int write_sz)

+{
+   int fd, err;


Init err = 0?


+   char *buf = malloc(write_sz);
+
+   if (!buf)
+   return -ENOMEM;


Looks like we already non-negative value, so return ENOMEM?


+
+   memset(buf, 'a', write_sz);
+   buf[write_sz-1] = '\0';
+
+   fd = open("/sys/kernel/bpf_testmod", O_WRONLY);
+   err = -errno;
+   if (CHECK(fd < 0, "testmod_file_open", "failed: %d\n", err))
+   goto out;


Change the above to
fd = open("/sys/kernel/bpf_testmod", O_WRONLY);
if (CHECK(fd < 0, "testmod_file_open", "failed: %d\n", errno)) {
err = -errno;
goto out;
}


+
+   write(fd, buf, write_sz);
+   close(fd);
+out:
+   free(buf);
+


No need for extra line here.


+   return 0;


return err.


+}
+
  void test_module_attach(void)
  {
const int READ_SZ = 456;
+   const int WRITE_SZ = 457;
struct test_module_attach* skel;
struct test_module_attach__bss *bss;
int err;
@@ -48,8 +73,10 @@ void test_module_attach(void)
  
  	/* trigger tracepoint */

ASSERT_OK(trigger_module_test_read(READ_SZ), "trigger_read");
+   ASSERT_OK(trigger_module_test_write(WRITE_SZ), "trigger_write");
  
  	ASSERT_EQ(bss->raw_tp_read_sz, READ_SZ, "raw_tp");

+   ASSERT_EQ(bss->raw_tp_bare_write_sz, WRITE_SZ, "raw_tp_bare");

Re: [PATCH v2 bpf-next 1/2] trace: bpf: Allow bpf to attach to bare tracepoints

2021-01-16 Thread Yonghong Song




On 1/16/21 10:21 AM, Qais Yousef wrote:

Some subsystems only have bare tracepoints (a tracepoint with no
associated trace event) to avoid the problem of trace events being an
ABI that can't be changed.

 From bpf presepective, bare tracepoints are what it calls
RAW_TRACEPOINT().

Since bpf assumed there's 1:1 mapping, it relied on hooking to
DEFINE_EVENT() macro to create bpf mapping of the tracepoints. Since
bare tracepoints use DECLARE_TRACE() to create the tracepoint, bpf had
no knowledge about their existence.

By teaching bpf_probe.h to parse DECLARE_TRACE() in a similar fashion to
DEFINE_EVENT(), bpf can find and attach to the new raw tracepoints.

Enabling that comes with the contract that changes to raw tracepoints
don't constitute a regression if they break existing bpf programs.
We need the ability to continue to morph and modify these raw
tracepoints without worrying about any ABI.

Update Documentation/bpf/bpf_design_QA.rst to document this contract.

Signed-off-by: Qais Yousef 


Acked-by: Yonghong Song 


Re: [PATCH bpf-next 1/4] bpf: enable task local storage for tracing programs

2021-01-15 Thread Yonghong Song




On 1/15/21 5:12 PM, Song Liu wrote:




On Jan 15, 2021, at 4:55 PM, Yonghong Song  wrote:



On 1/15/21 3:34 PM, Song Liu wrote:

On Jan 12, 2021, at 8:53 AM, KP Singh  wrote:

On Tue, Jan 12, 2021 at 5:32 PM Yonghong Song  wrote:




On 1/11/21 3:45 PM, Song Liu wrote:




On Jan 11, 2021, at 1:58 PM, Martin Lau  wrote:

On Mon, Jan 11, 2021 at 10:35:43PM +0100, KP Singh wrote:

On Mon, Jan 11, 2021 at 7:57 PM Martin KaFai Lau  wrote:


On Fri, Jan 08, 2021 at 03:19:47PM -0800, Song Liu wrote:

[ ... ]


diff --git a/kernel/bpf/bpf_local_storage.c b/kernel/bpf/bpf_local_storage.c
index dd5aedee99e73..9bd47ad2b26f1 100644
--- a/kernel/bpf/bpf_local_storage.c
+++ b/kernel/bpf/bpf_local_storage.c


[...]


+#include 

#include 
#include 
@@ -734,6 +735,7 @@ void __put_task_struct(struct task_struct *tsk)
  cgroup_free(tsk);
  task_numa_free(tsk, true);
  security_task_free(tsk);
+ bpf_task_storage_free(tsk);
  exit_creds(tsk);

If exit_creds() is traced by a bpf and this bpf is doing
bpf_task_storage_get(..., BPF_LOCAL_STORAGE_GET_F_CREATE),
new task storage will be created after bpf_task_storage_free().

I recalled there was an earlier discussion with KP and KP mentioned
BPF_LSM will not be called with a task that is going away.
It seems enabling bpf task storage in bpf tracing will break
this assumption and needs to be addressed?


For tracing programs, I think we will need an allow list where
task local storage can be used.

Instead of whitelist, can refcount_inc_not_zero(>usage) be used?


I think we can put refcount_inc_not_zero() in bpf_task_storage_get, like:

diff --git i/kernel/bpf/bpf_task_storage.c w/kernel/bpf/bpf_task_storage.c
index f654b56907b69..93d01b0a010e6 100644
--- i/kernel/bpf/bpf_task_storage.c
+++ w/kernel/bpf/bpf_task_storage.c
@@ -216,6 +216,9 @@ BPF_CALL_4(bpf_task_storage_get, struct bpf_map *, map, 
struct task_struct *,
  * by an RCU read-side critical section.
  */
 if (flags & BPF_LOCAL_STORAGE_GET_F_CREATE) {
+   if (!refcount_inc_not_zero(>usage))
+   return -EBUSY;
+
 sdata = bpf_local_storage_update(
 task, (struct bpf_local_storage_map *)map, value,
 BPF_NOEXIST);

But where shall we add the refcount_dec()? IIUC, we cannot add it to
__put_task_struct().


Maybe put_task_struct()?


Yeah, something like, or if you find a more elegant alternative :)

--- a/include/linux/sched/task.h
+++ b/include/linux/sched/task.h
@@ -107,13 +107,20 @@ extern void __put_task_struct(struct task_struct *t);

static inline void put_task_struct(struct task_struct *t)
{
-   if (refcount_dec_and_test(>usage))
+
+   if (rcu_access_pointer(t->bpf_storage)) {
+   if (refcount_sub_and_test(2, >usage))
+   __put_task_struct(t);
+   } else if (refcount_dec_and_test(>usage))
__put_task_struct(t);
}

static inline void put_task_struct_many(struct task_struct *t, int nr)
{
-   if (refcount_sub_and_test(nr, >usage))
+   if (rcu_access_pointer(t->bpf_storage)) {
+   if (refcount_sub_and_test(nr + 1, >usage))
+   __put_task_struct(t);
+   } else if (refcount_sub_and_test(nr, >usage))
__put_task_struct(t);
}

It is not ideal to leak bpf_storage here. How about we only add the
following:
diff --git i/kernel/bpf/bpf_task_storage.c w/kernel/bpf/bpf_task_storage.c
index f654b56907b69..2811b9fc47233 100644
--- i/kernel/bpf/bpf_task_storage.c
+++ w/kernel/bpf/bpf_task_storage.c
@@ -216,6 +216,10 @@ BPF_CALL_4(bpf_task_storage_get, struct bpf_map *, map, 
struct task_struct *,
  * by an RCU read-side critical section.
  */
 if (flags & BPF_LOCAL_STORAGE_GET_F_CREATE) {
+   /* the task_struct is being freed, fail over*/
+   if (!refcount_read(>usage))
+   return -EBUSY;


This may not work? Even we check here and task->usage is not 0, it could still 
become 0 immediately after the above refcount_read, right?


We call bpf_task_storage_get() with "task" that has valid BTF, so "task"
should not go away during the BPF program? Whatever mechanism that


Oh, right. this is true. Otherwise, we cannot use task ptr in the helper.


triggers the BPF program should either hold a reference to task (usage > 0)
or be the only one owning it (usage == 0, in __put_task_struct). Did I miss
anything?


Sorry. I think you are right. Not sure lsm requirement. There are two
more possible ways to check task is exiting which happens before 
__put_task_struct():

  . check task->exit_state
  . check task->flags & PF_EXITING (used in bpf_trace.c)

Not sure which condition is the correct one to check.



Thanks,
Song




+
 sdata = bpf_local_storage_update(
 task, (struct bpf_local_storage_map *)map

Re: [PATCH bpf-next 1/4] bpf: enable task local storage for tracing programs

2021-01-15 Thread Yonghong Song




On 1/15/21 3:34 PM, Song Liu wrote:




On Jan 12, 2021, at 8:53 AM, KP Singh  wrote:

On Tue, Jan 12, 2021 at 5:32 PM Yonghong Song  wrote:




On 1/11/21 3:45 PM, Song Liu wrote:




On Jan 11, 2021, at 1:58 PM, Martin Lau  wrote:

On Mon, Jan 11, 2021 at 10:35:43PM +0100, KP Singh wrote:

On Mon, Jan 11, 2021 at 7:57 PM Martin KaFai Lau  wrote:


On Fri, Jan 08, 2021 at 03:19:47PM -0800, Song Liu wrote:

[ ... ]


diff --git a/kernel/bpf/bpf_local_storage.c b/kernel/bpf/bpf_local_storage.c
index dd5aedee99e73..9bd47ad2b26f1 100644
--- a/kernel/bpf/bpf_local_storage.c
+++ b/kernel/bpf/bpf_local_storage.c


[...]


+#include 

#include 
#include 
@@ -734,6 +735,7 @@ void __put_task_struct(struct task_struct *tsk)
  cgroup_free(tsk);
  task_numa_free(tsk, true);
  security_task_free(tsk);
+ bpf_task_storage_free(tsk);
  exit_creds(tsk);

If exit_creds() is traced by a bpf and this bpf is doing
bpf_task_storage_get(..., BPF_LOCAL_STORAGE_GET_F_CREATE),
new task storage will be created after bpf_task_storage_free().

I recalled there was an earlier discussion with KP and KP mentioned
BPF_LSM will not be called with a task that is going away.
It seems enabling bpf task storage in bpf tracing will break
this assumption and needs to be addressed?


For tracing programs, I think we will need an allow list where
task local storage can be used.

Instead of whitelist, can refcount_inc_not_zero(>usage) be used?


I think we can put refcount_inc_not_zero() in bpf_task_storage_get, like:

diff --git i/kernel/bpf/bpf_task_storage.c w/kernel/bpf/bpf_task_storage.c
index f654b56907b69..93d01b0a010e6 100644
--- i/kernel/bpf/bpf_task_storage.c
+++ w/kernel/bpf/bpf_task_storage.c
@@ -216,6 +216,9 @@ BPF_CALL_4(bpf_task_storage_get, struct bpf_map *, map, 
struct task_struct *,
  * by an RCU read-side critical section.
  */
 if (flags & BPF_LOCAL_STORAGE_GET_F_CREATE) {
+   if (!refcount_inc_not_zero(>usage))
+   return -EBUSY;
+
 sdata = bpf_local_storage_update(
 task, (struct bpf_local_storage_map *)map, value,
 BPF_NOEXIST);

But where shall we add the refcount_dec()? IIUC, we cannot add it to
__put_task_struct().


Maybe put_task_struct()?


Yeah, something like, or if you find a more elegant alternative :)

--- a/include/linux/sched/task.h
+++ b/include/linux/sched/task.h
@@ -107,13 +107,20 @@ extern void __put_task_struct(struct task_struct *t);

static inline void put_task_struct(struct task_struct *t)
{
-   if (refcount_dec_and_test(>usage))
+
+   if (rcu_access_pointer(t->bpf_storage)) {
+   if (refcount_sub_and_test(2, >usage))
+   __put_task_struct(t);
+   } else if (refcount_dec_and_test(>usage))
__put_task_struct(t);
}

static inline void put_task_struct_many(struct task_struct *t, int nr)
{
-   if (refcount_sub_and_test(nr, >usage))
+   if (rcu_access_pointer(t->bpf_storage)) {
+   if (refcount_sub_and_test(nr + 1, >usage))
+   __put_task_struct(t);
+   } else if (refcount_sub_and_test(nr, >usage))
__put_task_struct(t);
}


It is not ideal to leak bpf_storage here. How about we only add the
following:

diff --git i/kernel/bpf/bpf_task_storage.c w/kernel/bpf/bpf_task_storage.c
index f654b56907b69..2811b9fc47233 100644
--- i/kernel/bpf/bpf_task_storage.c
+++ w/kernel/bpf/bpf_task_storage.c
@@ -216,6 +216,10 @@ BPF_CALL_4(bpf_task_storage_get, struct bpf_map *, map, 
struct task_struct *,
  * by an RCU read-side critical section.
  */
 if (flags & BPF_LOCAL_STORAGE_GET_F_CREATE) {
+   /* the task_struct is being freed, fail over*/
+   if (!refcount_read(>usage))
+   return -EBUSY;


This may not work? Even we check here and task->usage is not 0, it could 
still become 0 immediately after the above refcount_read, right?



+
 sdata = bpf_local_storage_update(
 task, (struct bpf_local_storage_map *)map, value,
 BPF_NOEXIST);




I may be missing something but shouldn't bpf_storage be an __rcu
member like we have for sk_bpf_storage?


Good catch! I will fix this in v2.

Thanks,
Song



Re: [PATCH v5 0/3] Kbuild: DWARF v5 support

2021-01-15 Thread Yonghong Song




On 1/15/21 3:34 PM, Nick Desaulniers wrote:

On Fri, Jan 15, 2021 at 3:24 PM Yonghong Song  wrote:




On 1/15/21 1:53 PM, Sedat Dilek wrote:

En plus, I encountered breakage with GCC v10.2.1 and LLVM=1 and
CONFIG_DEBUG_INFO_DWARF4.
So might be good to add a "depends on !DEBUG_INFO_BTF" in this combination.


Can you privately send me your configs that repro? Maybe I can isolate
it to a set of configs?



I suggested not to add !DEBUG_INFO_BTF to CONFIG_DEBUG_INFO_DWARF4.
It is not there before and adding this may suddenly break some users.

If certain combination of gcc/llvm does not work for
CONFIG_DEBUG_INFO_DWARF4 with pahole, this is a bug bpf community
should fix.


Is there a place I should report bugs?


You can send bug report to Arnaldo Carvalho de Melo , 
dwar...@vger.kernel.org and b...@vger.kernel.org.








I had some other small nits commented in the single patches.

As requested in your previous patch-series, feel free to add my:

Tested-by: Sedat Dilek 


Yeah, I'll keep it if v6 is just commit message changes.



Re: [PATCH v5 0/3] Kbuild: DWARF v5 support

2021-01-15 Thread Yonghong Song




On 1/15/21 1:53 PM, Sedat Dilek wrote:

On Fri, Jan 15, 2021 at 10:06 PM Nick Desaulniers
 wrote:


DWARF v5 is the latest standard of the DWARF debug info format.

DWARF5 wins significantly in terms of size when mixed with compression
(CONFIG_DEBUG_INFO_COMPRESSED).

Link: http://www.dwarfstd.org/doc/DWARF5.pdf

Patch 1 is a cleanup from Masahiro and isn't DWARF v5 specific.
Patch 2 is a cleanup that lays the ground work and isn't DWARF
v5 specific.
Patch 3 implements Kconfig and Kbuild support for DWARFv5.

Changes from v4:
* drop set -e from script as per Nathan.
* add dependency on !CONFIG_DEBUG_INFO_BTF for DWARF v5 as per Sedat.
* Move LLVM_IAS=1 complexity from patch 2 to patch 3 as per Arvind and
   Masahiro. Sorry it took me a few tries to understand the point (I
   might still not), but it looks much cleaner this way. Sorry Nathan, I
   did not carry forward your previous reviews as a result, but I would
   appreciate if you could look again.
* Add Nathan's reviewed by tag to patch 1.
* Reword commit message for patch 3 to mention LLVM_IAS=1 and -gdwarf-5
   binutils addition later, and BTF issue.
* I still happen to see a pahole related error spew for the combination
   of:
   * LLVM=1
   * LLVM_IAS=1
   * CONFIG_DEBUG_INFO_DWARF4
   * CONFIG_DEBUG_INFO_BTF
   Though they're non-fatal to the build. I'm not sure yet why removing
   any one of the above prevents the warning spew. Maybe we'll need a v6.



En plus, I encountered breakage with GCC v10.2.1 and LLVM=1 and
CONFIG_DEBUG_INFO_DWARF4.
So might be good to add a "depends on !DEBUG_INFO_BTF" in this combination.


I suggested not to add !DEBUG_INFO_BTF to CONFIG_DEBUG_INFO_DWARF4.
It is not there before and adding this may suddenly break some users.

If certain combination of gcc/llvm does not work for 
CONFIG_DEBUG_INFO_DWARF4 with pahole, this is a bug bpf community

should fix.



I had some other small nits commented in the single patches.

As requested in your previous patch-series, feel free to add my:

Tested-by: Sedat Dilek 

- Sedat -


Changes from v3:

Changes as per Arvind:
* only add -Wa,-gdwarf-5 for (LLVM=1|CC=clang)+LLVM_IAS=0 builds.
* add -gdwarf-5 to Kconfig shell script.
* only run Kconfig shell script for Clang.

Apologies to Sedat and Nathan; I appreciate previous testing/review, but
I did no carry forward your Tested-by and Reviewed-by tags, as the
patches have changed too much IMO.

Changes from v2:
* Drop two of the earlier patches that have been accepted already.
* Add measurements with GCC 10.2 to commit message.
* Update help text as per Arvind with help from Caroline.
* Improve case/wording between DWARF Versions as per Masahiro.

Changes from the RFC:
* split patch in 3 patch series, include Fangrui's patch, too.
* prefer `DWARF vX` format, as per Fangrui.
* use spaces between assignment in Makefile as per Masahiro.
* simplify setting dwarf-version-y as per Masahiro.
* indent `prompt` in Kconfig change as per Masahiro.
* remove explicit default in Kconfig as per Masahiro.
* add comments to test_dwarf5_support.sh.
* change echo in test_dwarf5_support.sh as per Masahiro.
* remove -u from test_dwarf5_support.sh as per Masahiro.
* add a -gdwarf-5 cc-option check to Kconfig as per Jakub.

*** BLURB HERE ***

Masahiro Yamada (1):
   Remove $(cc-option,-gdwarf-4) dependency from CONFIG_DEBUG_INFO_DWARF4

Nick Desaulniers (2):
   Kbuild: make DWARF version a choice
   Kbuild: implement support for DWARF v5

  Makefile  | 13 +++---
  include/asm-generic/vmlinux.lds.h |  6 -
  lib/Kconfig.debug | 42 +--
  scripts/test_dwarf5_support.sh|  8 ++
  4 files changed, 57 insertions(+), 12 deletions(-)
  create mode 100755 scripts/test_dwarf5_support.sh

--
2.30.0.284.gd98b1dd5eaa7-goog



Re: [PATCH bpf-next 2/3] bpf: Add size arg to build_id_parse function

2021-01-14 Thread Yonghong Song




On 1/14/21 2:02 PM, Jiri Olsa wrote:

On Thu, Jan 14, 2021 at 01:05:33PM -0800, Yonghong Song wrote:



On 1/14/21 12:01 PM, Jiri Olsa wrote:

On Thu, Jan 14, 2021 at 10:56:33AM -0800, Yonghong Song wrote:



On 1/14/21 5:40 AM, Jiri Olsa wrote:

It's possible to have other build id types (other than default SHA1).
Currently there's also ld support for MD5 build id.


Currently, bpf build_id based stackmap does not returns the size of
the build_id. Did you see an issue here? I guess user space can check
the length of non-zero bits of the build id to decide what kind of
type it is, right?


you can have zero bytes in the build id hash, so you need to get the size

I never saw MD5 being used in practise just SHA1, but we added the
size to be complete and make sure we'll fit with build id, because
there's only limited space in mmap2 event


I am asking to check whether we should extend uapi struct
bpf_stack_build_id to include build_id_size as well. I guess
we can delay this until a real use case.


right, we can try make some MD5 build id binaries and check if it
explodes with some bcc tools, but I don't expect that.. I'll try
to find some time for that


Thanks. We may have issues on bcc side. For build_id collected in 
kernel, bcc always generates a length-20 string. But for user

binaries, the build_id string length is equal to actual size of
the build_id. They may not match (MD5 length is 16).
The fix is probably to append '0's (up to length 20) for user
binary build_id's.

I guess MD5 is very seldom used. I will wait if you can reproduce
the issue and then we might fix it.



perf tool uses build ids in .debug cache as file links, and we had
few isues there

jirka



Re: [PATCH bpf-next 2/3] bpf: Add size arg to build_id_parse function

2021-01-14 Thread Yonghong Song




On 1/14/21 12:01 PM, Jiri Olsa wrote:

On Thu, Jan 14, 2021 at 10:56:33AM -0800, Yonghong Song wrote:



On 1/14/21 5:40 AM, Jiri Olsa wrote:

It's possible to have other build id types (other than default SHA1).
Currently there's also ld support for MD5 build id.


Currently, bpf build_id based stackmap does not returns the size of
the build_id. Did you see an issue here? I guess user space can check
the length of non-zero bits of the build id to decide what kind of
type it is, right?


you can have zero bytes in the build id hash, so you need to get the size

I never saw MD5 being used in practise just SHA1, but we added the
size to be complete and make sure we'll fit with build id, because
there's only limited space in mmap2 event


I am asking to check whether we should extend uapi struct
bpf_stack_build_id to include build_id_size as well. I guess
we can delay this until a real use case.




jirka





Adding size argument to build_id_parse function, that returns (if defined)
size of the parsed build id, so we can recognize the build id type.

Cc: Alexei Starovoitov 
Cc: Song Liu 
Signed-off-by: Jiri Olsa 
---
   include/linux/buildid.h |  3 ++-
   kernel/bpf/stackmap.c   |  2 +-
   lib/buildid.c   | 29 +
   3 files changed, 24 insertions(+), 10 deletions(-)

diff --git a/include/linux/buildid.h b/include/linux/buildid.h
index 08028a212589..40232f90db6e 100644
--- a/include/linux/buildid.h
+++ b/include/linux/buildid.h
@@ -6,6 +6,7 @@
   #define BUILD_ID_SIZE_MAX 20
-int build_id_parse(struct vm_area_struct *vma, unsigned char *build_id);
+int build_id_parse(struct vm_area_struct *vma, unsigned char *build_id,
+  __u32 *size);
   #endif
diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c
index 55d254a59f07..cabaf7db8efc 100644
--- a/kernel/bpf/stackmap.c
+++ b/kernel/bpf/stackmap.c
@@ -189,7 +189,7 @@ static void stack_map_get_build_id_offset(struct 
bpf_stack_build_id *id_offs,
for (i = 0; i < trace_nr; i++) {
vma = find_vma(current->mm, ips[i]);
-   if (!vma || build_id_parse(vma, id_offs[i].build_id)) {
+   if (!vma || build_id_parse(vma, id_offs[i].build_id, NULL)) {
/* per entry fall back to ips */
id_offs[i].status = BPF_STACK_BUILD_ID_IP;
id_offs[i].ip = ips[i];
diff --git a/lib/buildid.c b/lib/buildid.c
index 4a4f520c0e29..6156997c3895 100644
--- a/lib/buildid.c
+++ b/lib/buildid.c
@@ -12,6 +12,7 @@
*/
   static inline int parse_build_id(void *page_addr,
 unsigned char *build_id,
+__u32 *size,
 void *note_start,
 Elf32_Word note_size)
   {
@@ -38,6 +39,8 @@ static inline int parse_build_id(void *page_addr,
   nhdr->n_descsz);
memset(build_id + nhdr->n_descsz, 0,
   BUILD_ID_SIZE_MAX - nhdr->n_descsz);
+   if (size)
+   *size = nhdr->n_descsz;
return 0;
}
new_offs = note_offs + sizeof(Elf32_Nhdr) +
@@ -50,7 +53,8 @@ static inline int parse_build_id(void *page_addr,
   }

[...]





Re: [PATCH bpf-next 2/3] bpf: Add size arg to build_id_parse function

2021-01-14 Thread Yonghong Song




On 1/14/21 5:40 AM, Jiri Olsa wrote:

It's possible to have other build id types (other than default SHA1).
Currently there's also ld support for MD5 build id.


Currently, bpf build_id based stackmap does not returns the size of
the build_id. Did you see an issue here? I guess user space can check
the length of non-zero bits of the build id to decide what kind of
type it is, right?



Adding size argument to build_id_parse function, that returns (if defined)
size of the parsed build id, so we can recognize the build id type.

Cc: Alexei Starovoitov 
Cc: Song Liu 
Signed-off-by: Jiri Olsa 
---
  include/linux/buildid.h |  3 ++-
  kernel/bpf/stackmap.c   |  2 +-
  lib/buildid.c   | 29 +
  3 files changed, 24 insertions(+), 10 deletions(-)

diff --git a/include/linux/buildid.h b/include/linux/buildid.h
index 08028a212589..40232f90db6e 100644
--- a/include/linux/buildid.h
+++ b/include/linux/buildid.h
@@ -6,6 +6,7 @@
  
  #define BUILD_ID_SIZE_MAX 20
  
-int build_id_parse(struct vm_area_struct *vma, unsigned char *build_id);

+int build_id_parse(struct vm_area_struct *vma, unsigned char *build_id,
+  __u32 *size);
  
  #endif

diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c
index 55d254a59f07..cabaf7db8efc 100644
--- a/kernel/bpf/stackmap.c
+++ b/kernel/bpf/stackmap.c
@@ -189,7 +189,7 @@ static void stack_map_get_build_id_offset(struct 
bpf_stack_build_id *id_offs,
  
  	for (i = 0; i < trace_nr; i++) {

vma = find_vma(current->mm, ips[i]);
-   if (!vma || build_id_parse(vma, id_offs[i].build_id)) {
+   if (!vma || build_id_parse(vma, id_offs[i].build_id, NULL)) {
/* per entry fall back to ips */
id_offs[i].status = BPF_STACK_BUILD_ID_IP;
id_offs[i].ip = ips[i];
diff --git a/lib/buildid.c b/lib/buildid.c
index 4a4f520c0e29..6156997c3895 100644
--- a/lib/buildid.c
+++ b/lib/buildid.c
@@ -12,6 +12,7 @@
   */
  static inline int parse_build_id(void *page_addr,
 unsigned char *build_id,
+__u32 *size,
 void *note_start,
 Elf32_Word note_size)
  {
@@ -38,6 +39,8 @@ static inline int parse_build_id(void *page_addr,
   nhdr->n_descsz);
memset(build_id + nhdr->n_descsz, 0,
   BUILD_ID_SIZE_MAX - nhdr->n_descsz);
+   if (size)
+   *size = nhdr->n_descsz;
return 0;
}
new_offs = note_offs + sizeof(Elf32_Nhdr) +
@@ -50,7 +53,8 @@ static inline int parse_build_id(void *page_addr,
  }
  

[...]


Re: [PATCH 2/2] tools/bpftool: Add -Wall when building BPF programs

2021-01-13 Thread Yonghong Song




On 1/13/21 2:36 PM, Ian Rogers wrote:

No additional warnings are generated by enabling this, but having it
enabled will help avoid regressions.

Signed-off-by: Ian Rogers 


Acked-by: Yonghong Song 


Re: [PATCH 1/2] bpf, libbpf: Avoid unused function warning on bpf_tail_call_static

2021-01-13 Thread Yonghong Song




On 1/13/21 2:36 PM, Ian Rogers wrote:

Add inline to __always_inline making it match the linux/compiler.h.
Adding this avoids an unused function warning on bpf_tail_call_static
when compining with -Wall.

Signed-off-by: Ian Rogers 


Acked-by: Yonghong Song 


Re: [PATCH 0/2] Fix build errors and warnings when make M=samples/bpf

2021-01-13 Thread Yonghong Song




On 1/13/21 2:57 AM, Tiezhu Yang wrote:

There exists many build errors and warnings when make M=samples/bpf,


both fixes in this patch related to mips, please do mention in the 
commit message that this is

mips related. x86 (and arm64 I assume) compiles just fine.


this patch series fix some of them, I will submit some other patches
related with MIPS later.

Tiezhu Yang (2):
   samples/bpf: Set flag __SANE_USERSPACE_TYPES__ for MIPS to fix build
 warnings
   compiler.h: Include asm/rwonce.h under ARM64 and ALPHA to fix build
 errors

  include/linux/compiler.h| 6 ++
  samples/bpf/Makefile| 4 
  tools/include/linux/types.h | 3 +++
  3 files changed, 13 insertions(+)



Re: [PATCH 2/2] compiler.h: Include asm/rwonce.h under ARM64 and ALPHA to fix build errors

2021-01-13 Thread Yonghong Song




On 1/13/21 2:57 AM, Tiezhu Yang wrote:

When make M=samples/bpf on the Loongson 3A3000 platform which
belongs to MIPS arch, there exists many similar build errors
about 'asm/rwonce.h' file not found, so include it only under
CONFIG_ARM64 and CONFIG_ALPHA due to it exists only in arm64
and alpha arch.

   CLANG-bpf  samples/bpf/xdpsock_kern.o
In file included from samples/bpf/xdpsock_kern.c:2:
In file included from ./include/linux/bpf.h:9:
In file included from ./include/linux/workqueue.h:9:
In file included from ./include/linux/timer.h:5:
In file included from ./include/linux/list.h:9:
In file included from ./include/linux/kernel.h:10:
./include/linux/compiler.h:246:10: fatal error: 'asm/rwonce.h' file not found
  ^~
1 error generated.

$ find . -name rwonce.h
./include/asm-generic/rwonce.h
./arch/arm64/include/asm/rwonce.h
./arch/alpha/include/asm/rwonce.h

Signed-off-by: Tiezhu Yang 
---
  include/linux/compiler.h | 6 ++
  1 file changed, 6 insertions(+)

diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index b8fe0c2..bdbe759 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -243,6 +243,12 @@ static inline void *offset_to_ptr(const int *off)
   */
  #define prevent_tail_call_optimization()  mb()
  
+#ifdef CONFIG_ARM64

  #include 
+#endif
+
+#ifdef CONFIG_ALPHA
+#include 
+#endif


I do not think this fix is correct. x86 does not define its own
rwonce.h and still compiles fine.

As noted in the above, we have include/asm-generic/rwonce.h.
Once you do a proper build, you will have rwonce.h in arch
generated directory like

-bash-4.4$ find . -name rwonce.h
./include/asm-generic/rwonce.h
./arch/alpha/include/asm/rwonce.h
./arch/arm64/include/asm/rwonce.h
./arch/x86/include/generated/asm/rwonce.h

for mips, it should generated in 
arch/mips/include/generated/asm/rwonce.h. Please double check why this 
does not happen.


  
  #endif /* __LINUX_COMPILER_H */




Re: [PATCH bpf 1/2] samples/bpf: Set flag __SANE_USERSPACE_TYPES__ for MIPS to fix build warnings

2021-01-13 Thread Yonghong Song




On 1/13/21 2:57 AM, Tiezhu Yang wrote:

MIPS needs __SANE_USERSPACE_TYPES__ before  to select
'int-ll64.h' in arch/mips/include/uapi/asm/types.h and avoid compile
warnings when printing __u64 with %llu, %llx or %lld.


could you mention which command produces the following warning?



 printf("0x%02x : %llu\n", key, value);
  ~~~^  ~
  %lu
printf("%s/%llx;", sym->name, addr);
   ~~~^   
   %lx
   printf(";%s %lld\n", key->waker, count);
   ~~~^ ~
   %ld

Signed-off-by: Tiezhu Yang 
---
  samples/bpf/Makefile| 4 
  tools/include/linux/types.h | 3 +++
  2 files changed, 7 insertions(+)

diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
index 26fc96c..27de306 100644
--- a/samples/bpf/Makefile
+++ b/samples/bpf/Makefile
@@ -183,6 +183,10 @@ BPF_EXTRA_CFLAGS := $(ARM_ARCH_SELECTOR)
  TPROGS_CFLAGS += $(ARM_ARCH_SELECTOR)
  endif
  
+ifeq ($(ARCH), mips)

+TPROGS_CFLAGS += -D__SANE_USERSPACE_TYPES__
+endif
+


This change looks okay based on description in
arch/mips/include/uapi/asm/types.h

'''
/*
 * We don't use int-l64.h for the kernel anymore but still use it for
 * userspace to avoid code changes.
 *
 * However, some user programs (e.g. perf) may not want this. They can
 * flag __SANE_USERSPACE_TYPES__ to get int-ll64.h here.
 */
'''


  TPROGS_CFLAGS += -Wall -O2
  TPROGS_CFLAGS += -Wmissing-prototypes
  TPROGS_CFLAGS += -Wstrict-prototypes
diff --git a/tools/include/linux/types.h b/tools/include/linux/types.h
index 154eb4e..e9c5a21 100644
--- a/tools/include/linux/types.h
+++ b/tools/include/linux/types.h
@@ -6,7 +6,10 @@
  #include 
  #include 
  
+#ifndef __SANE_USERSPACE_TYPES__

  #define __SANE_USERSPACE_TYPES__  /* For PPC64, to get LL64 types */
+#endif


What problem this patch fixed? If this header is used, you can just
change comment from "PPC64" to "PPC64/MIPS", right?


+
  #include 
  #include 
  



Re: [PATCH bpf-next 1/2] trace: bpf: Allow bpf to attach to bare tracepoints

2021-01-13 Thread Yonghong Song




On 1/13/21 2:16 AM, Qais Yousef wrote:

On 01/12/21 12:19, Yonghong Song wrote:

I applied the patch to my local bpf-next repo, and got the following
compilation error:


[...]



I dumped preprecessor result but after macro expansion, the code
becomes really complex and I have not figured out why it failed.
Do you know what is the possible reason?


Yeah I did a last minute fix to address a checkpatch.pl error and my
verification of the change wasn't good enough obviously.

If you're keen to try out I can send you a patch with the fix. I should send v2
by the weekend too.


Thanks. I can wait and will check v2 once it is available.



Thanks for having a look.

Cheers

--
Qais Yousef



Re: [PATCH bpf v2 2/2] selftests/bpf: add verifier test for PTR_TO_MEM spill

2021-01-13 Thread Yonghong Song




On 1/12/21 9:38 PM, Gilad Reti wrote:

Add a test to check that the verifier is able to recognize spilling of
PTR_TO_MEM registers, by reserving a ringbuf buffer, forcing the spill
of a pointer holding the buffer address to the stack, filling it back
in from the stack and writing to the memory area pointed by it.

The patch was partially contributed by CyberArk Software, Inc.

Signed-off-by: Gilad Reti 


I didn't verify result_unpriv = ACCEPT part. I think it is correct
by checking code.

Acked-by: Yonghong Song 


Re: [PATCH bpf-next 1/2] trace: bpf: Allow bpf to attach to bare tracepoints

2021-01-12 Thread Yonghong Song




On 1/11/21 10:20 AM, Qais Yousef wrote:

Some subsystems only have bare tracepoints (a tracepoint with no
associated trace event) to avoid the problem of trace events being an
ABI that can't be changed.

 From bpf presepective, bare tracepoints are what it calls
RAW_TRACEPOINT().

Since bpf assumed there's 1:1 mapping, it relied on hooking to
DEFINE_EVENT() macro to create bpf mapping of the tracepoints. Since
bare tracepoints use DECLARE_TRACE() to create the tracepoint, bpf had
no knowledge about their existence.

By teaching bpf_probe.h to parse DECLARE_TRACE() in a similar fashion to
DEFINE_EVENT(), bpf can find and attach to the new raw tracepoints.

Enabling that comes with the contract that changes to raw tracepoints
don't constitute a regression if they break existing bpf programs.
We need the ability to continue to morph and modify these raw
tracepoints without worrying about any ABI.

Update Documentation/bpf/bpf_design_QA.rst to document this contract.

Signed-off-by: Qais Yousef 
---
  Documentation/bpf/bpf_design_QA.rst |  6 ++
  include/trace/bpf_probe.h   | 12 ++--
  2 files changed, 16 insertions(+), 2 deletions(-)

diff --git a/Documentation/bpf/bpf_design_QA.rst 
b/Documentation/bpf/bpf_design_QA.rst
index 2df7b067ab93..0e15f9b05c9d 100644
--- a/Documentation/bpf/bpf_design_QA.rst
+++ b/Documentation/bpf/bpf_design_QA.rst
@@ -208,6 +208,12 @@ data structures and compile with kernel internal headers. 
Both of these
  kernel internals are subject to change and can break with newer kernels
  such that the program needs to be adapted accordingly.
  
+Q: Are tracepoints part of the stable ABI?

+--
+A: NO. Tracepoints are tied to internal implementation details hence they are
+subject to change and can break with newer kernels. BPF programs need to change
+accordingly when this happens.
+
  Q: How much stack space a BPF program uses?
  ---
  A: Currently all program types are limited to 512 bytes of stack
diff --git a/include/trace/bpf_probe.h b/include/trace/bpf_probe.h
index cd74bffed5c6..cf1496b162b1 100644
--- a/include/trace/bpf_probe.h
+++ b/include/trace/bpf_probe.h
@@ -55,8 +55,7 @@
  /* tracepoints with more than 12 arguments will hit build error */
  #define CAST_TO_U64(...) CONCATENATE(__CAST, 
COUNT_ARGS(__VA_ARGS__))(__VA_ARGS__)
  
-#undef DECLARE_EVENT_CLASS

-#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) \
+#define __BPF_DECLARE_TRACE(call, proto, args) \
  static notrace void   \
  __bpf_trace_##call(void *__data, proto)   
\
  { \
@@ -64,6 +63,10 @@ __bpf_trace_##call(void *__data, proto)  
\
CONCATENATE(bpf_trace_run, COUNT_ARGS(args))(prog, CAST_TO_U64(args));  
\
  }
  
+#undef DECLARE_EVENT_CLASS

+#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) \
+   __BPF_DECLARE_TRACE(call, PARAMS(proto), PARAMS(args))
+
  /*
   * This part is compiled out, it is only here as a build time check
   * to make sure that if the tracepoint handling changes, the
@@ -111,6 +114,11 @@ __DEFINE_EVENT(template, call, PARAMS(proto), 
PARAMS(args), size)
  #define DEFINE_EVENT_PRINT(template, name, proto, args, print)\
DEFINE_EVENT(template, name, PARAMS(proto), PARAMS(args))
  
+#undef DECLARE_TRACE

+#define DECLARE_TRACE(call, proto, args)   \
+   (__BPF_DECLARE_TRACE(call, PARAMS(proto), PARAMS(args)) \
+__DEFINE_EVENT(call, call, PARAMS(proto), PARAMS(args), 0))


I applied the patch to my local bpf-next repo, and got the following
compilation error:

In file included from 
/data/users/yhs/work/net-next/include/trace/define_trace.h:104, 

 from 
/data/users/yhs/work/net-next/include/trace/events/sched.h:740, 

 from 
/data/users/yhs/work/net-next/kernel/sched/core.c:10: 

/data/users/yhs/work/net-next/include/trace/bpf_probe.h:59:1: error: 
expected identifier or ‘(’ before ‘static’
 static notrace void   \ 

 ^~ 

/data/users/yhs/work/net-next/include/trace/bpf_probe.h:119:3: note: in 
expansion of macro ‘__BPF_DECLARE_TRACE’
  (__BPF_DECLARE_TRACE(call, PARAMS(proto), PARAMS(args))  \ 

   ^~~ 

/data/users/yhs/work/net-next/include/trace/events/sched.h:693:1: note: 
in expansion of macro ‘DECLARE_TRACE’
 DECLARE_TRACE(pelt_cfs_tp, 

 ^ 

/data/users/yhs/work/net-next/include/trace/bpf_probe.h:59:1: error: 
expected identifier or ‘(’ before ‘static’
 static notrace void   \ 

 ^~ 

/data/users/yhs/work/net-next/include/trace/bpf_probe.h:119:3: note: in 
expansion of macro ‘__BPF_DECLARE_TRACE’
  (__BPF_DECLARE_TRACE(call, PARAMS(proto), PARAMS(args))  \ 

   ^~~ 


Re: [PATCH 2/2] selftests/bpf: add verifier test for PTR_TO_MEM spill

2021-01-12 Thread Yonghong Song




On 1/12/21 7:43 AM, Daniel Borkmann wrote:

On 1/12/21 4:35 PM, Gilad Reti wrote:

On Tue, Jan 12, 2021 at 4:56 PM KP Singh  wrote:
On Tue, Jan 12, 2021 at 10:16 AM Gilad Reti  
wrote:


Add test to check that the verifier is able to recognize spilling of
PTR_TO_MEM registers.


It would be nice to have some explanation of what the test does to
recognize the spilling of the PTR_TO_MEM registers in the commit
log as well.

Would it be possible to augment an existing test_progs
program like tools/testing/selftests/bpf/progs/test_ringbuf.c to test
this functionality?


How would you guarantee that LLVM generates the spill/fill, via inline asm?


You can make the following change to force the return value ("sample" 
here) of bpf_ringbuf_reserve() to spill on the stack.


diff --git a/tools/testing/selftests/bpf/progs/test_ringbuf.c 
b/tools/testing/selftests/bpf/progs/test_ringbuf.c

index 8ba9959b036b..011521170856 100644
--- a/tools/testing/selftests/bpf/progs/test_ringbuf.c
+++ b/tools/testing/selftests/bpf/progs/test_ringbuf.c
@@ -40,7 +40,7 @@ SEC("tp/syscalls/sys_enter_getpgid")
 int test_ringbuf(void *ctx)
 {
int cur_pid = bpf_get_current_pid_tgid() >> 32;
-   struct sample *sample;
+   struct sample * volatile sample;
int zero = 0;

if (cur_pid != pid)

This change will cause verifier failure without Patch #1.




It may be possible, but from what I understood from Daniel's comment here

https://lore.kernel.org/bpf/17629073-4fab-a922-ecc3-25b019960...@iogearbox.net/ 



the test should be a part of the verifier tests (which is reasonable
to me since it is
a verifier bugfix)


Yeah, the test_verifier case as you have is definitely the most straight
forward way to add coverage in this case.


Re: [PATCH bpf-next] bpf: Fix a verifier message for alloc size helper arg

2021-01-12 Thread Yonghong Song




On 1/12/21 4:39 AM, Brendan Jackman wrote:

The error message here is misleading, the argument will be rejected
unless it is a known constant.

Signed-off-by: Brendan Jackman 


Okay, this is for bpf_ringbuf_reserve() helper where the size must be a 
constant.


Acked-by: Yonghong Song 


Re: [PATCH bpf-next] bpf: Clarify return value of probe str helpers

2021-01-12 Thread Yonghong Song




On 1/12/21 4:34 AM, Brendan Jackman wrote:

When the buffer is too small to contain the input string, these
helpers return the length of the buffer, not the length of the
original string. This tries to make the docs totally clear about
that, since "the length of the [copied ]string" could also refer to
the length of the input.

Signed-off-by: Brendan Jackman 


Acked-by: Yonghong Song 


Re: [PATCH bpf-next 1/4] bpf: enable task local storage for tracing programs

2021-01-12 Thread Yonghong Song




On 1/11/21 3:45 PM, Song Liu wrote:




On Jan 11, 2021, at 1:58 PM, Martin Lau  wrote:

On Mon, Jan 11, 2021 at 10:35:43PM +0100, KP Singh wrote:

On Mon, Jan 11, 2021 at 7:57 PM Martin KaFai Lau  wrote:


On Fri, Jan 08, 2021 at 03:19:47PM -0800, Song Liu wrote:

[ ... ]


diff --git a/kernel/bpf/bpf_local_storage.c b/kernel/bpf/bpf_local_storage.c
index dd5aedee99e73..9bd47ad2b26f1 100644
--- a/kernel/bpf/bpf_local_storage.c
+++ b/kernel/bpf/bpf_local_storage.c
@@ -140,17 +140,18 @@ static void __bpf_selem_unlink_storage(struct 
bpf_local_storage_elem *selem)
{
  struct bpf_local_storage *local_storage;
  bool free_local_storage = false;
+ unsigned long flags;

  if (unlikely(!selem_linked_to_storage(selem)))
  /* selem has already been unlinked from sk */
  return;

  local_storage = rcu_dereference(selem->local_storage);
- raw_spin_lock_bh(_storage->lock);
+ raw_spin_lock_irqsave(_storage->lock, flags);

It will be useful to have a few words in commit message on this change
for future reference purpose.

Please also remove the in_irq() check from bpf_sk_storage.c
to avoid confusion in the future.  It probably should
be in a separate patch.

[ ... ]


diff --git a/kernel/bpf/bpf_task_storage.c b/kernel/bpf/bpf_task_storage.c
index 4ef1959a78f27..f654b56907b69 100644
diff --git a/kernel/fork.c b/kernel/fork.c
index 7425b3224891d..3d65c8ebfd594 100644

[ ... ]


--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -96,6 +96,7 @@
#include 
#include 
#include 
+#include 

#include 
#include 
@@ -734,6 +735,7 @@ void __put_task_struct(struct task_struct *tsk)
  cgroup_free(tsk);
  task_numa_free(tsk, true);
  security_task_free(tsk);
+ bpf_task_storage_free(tsk);
  exit_creds(tsk);

If exit_creds() is traced by a bpf and this bpf is doing
bpf_task_storage_get(..., BPF_LOCAL_STORAGE_GET_F_CREATE),
new task storage will be created after bpf_task_storage_free().

I recalled there was an earlier discussion with KP and KP mentioned
BPF_LSM will not be called with a task that is going away.
It seems enabling bpf task storage in bpf tracing will break
this assumption and needs to be addressed?


For tracing programs, I think we will need an allow list where
task local storage can be used.

Instead of whitelist, can refcount_inc_not_zero(>usage) be used?


I think we can put refcount_inc_not_zero() in bpf_task_storage_get, like:

diff --git i/kernel/bpf/bpf_task_storage.c w/kernel/bpf/bpf_task_storage.c
index f654b56907b69..93d01b0a010e6 100644
--- i/kernel/bpf/bpf_task_storage.c
+++ w/kernel/bpf/bpf_task_storage.c
@@ -216,6 +216,9 @@ BPF_CALL_4(bpf_task_storage_get, struct bpf_map *, map, 
struct task_struct *,
  * by an RCU read-side critical section.
  */
 if (flags & BPF_LOCAL_STORAGE_GET_F_CREATE) {
+   if (!refcount_inc_not_zero(>usage))
+   return -EBUSY;
+
 sdata = bpf_local_storage_update(
 task, (struct bpf_local_storage_map *)map, value,
 BPF_NOEXIST);

But where shall we add the refcount_dec()? IIUC, we cannot add it to
__put_task_struct().


Maybe put_task_struct()?


Thanks,
Song



Re: [PATCH bpf-next 4/4] bpf: runqslower: use task local storage

2021-01-11 Thread Yonghong Song




On 1/11/21 11:14 PM, Andrii Nakryiko wrote:

On Mon, Jan 11, 2021 at 7:24 PM Yonghong Song  wrote:




On 1/11/21 2:54 PM, Song Liu wrote:




On Jan 11, 2021, at 9:49 AM, Yonghong Song  wrote:



On 1/8/21 3:19 PM, Song Liu wrote:

Replace hashtab with task local storage in runqslower. This improves the
performance of these BPF programs. The following table summarizes average
runtime of these programs, in nanoseconds:
task-local   hash-prealloc   hash-no-prealloc
handle__sched_wakeup 125 340   3124
handle__sched_wakeup_new28121510   2998
handle__sched_switch 151 208991
Note that, task local storage gives better performance than hashtab for
handle__sched_wakeup and handle__sched_switch. On the other hand, for
handle__sched_wakeup_new, task local storage is slower than hashtab with
prealloc. This is because handle__sched_wakeup_new accesses the data for
the first time, so it has to allocate the data for task local storage.
Once the initial allocation is done, subsequent accesses, as those in
handle__sched_wakeup, are much faster with task local storage. If we
disable hashtab prealloc, task local storage is much faster for all 3
functions.
Signed-off-by: Song Liu 
---
   tools/bpf/runqslower/runqslower.bpf.c | 26 +++---
   1 file changed, 15 insertions(+), 11 deletions(-)
diff --git a/tools/bpf/runqslower/runqslower.bpf.c 
b/tools/bpf/runqslower/runqslower.bpf.c
index 1f18a409f0443..c4de4179a0a17 100644
--- a/tools/bpf/runqslower/runqslower.bpf.c
+++ b/tools/bpf/runqslower/runqslower.bpf.c
@@ -11,9 +11,9 @@ const volatile __u64 min_us = 0;
   const volatile pid_t targ_pid = 0;
 struct {
-   __uint(type, BPF_MAP_TYPE_HASH);
-   __uint(max_entries, 10240);
-   __type(key, u32);
+   __uint(type, BPF_MAP_TYPE_TASK_STORAGE);
+   __uint(map_flags, BPF_F_NO_PREALLOC);
+   __type(key, int);
 __type(value, u64);
   } start SEC(".maps");
   @@ -25,15 +25,19 @@ struct {
 /* record enqueue timestamp */
   __always_inline
-static int trace_enqueue(u32 tgid, u32 pid)
+static int trace_enqueue(struct task_struct *t)
   {
-   u64 ts;
+   u32 pid = t->pid;
+   u64 ts, *ptr;
 if (!pid || (targ_pid && targ_pid != pid))
 return 0;
 ts = bpf_ktime_get_ns();
-   bpf_map_update_elem(, , , 0);
+   ptr = bpf_task_storage_get(, t, 0,
+  BPF_LOCAL_STORAGE_GET_F_CREATE);
+   if (ptr)
+   *ptr = ts;
 return 0;
   }
   @@ -43,7 +47,7 @@ int handle__sched_wakeup(u64 *ctx)
 /* TP_PROTO(struct task_struct *p) */
 struct task_struct *p = (void *)ctx[0];
   - return trace_enqueue(p->tgid, p->pid);
+   return trace_enqueue(p);
   }
 SEC("tp_btf/sched_wakeup_new")
@@ -52,7 +56,7 @@ int handle__sched_wakeup_new(u64 *ctx)
 /* TP_PROTO(struct task_struct *p) */
 struct task_struct *p = (void *)ctx[0];
   - return trace_enqueue(p->tgid, p->pid);
+   return trace_enqueue(p);
   }
 SEC("tp_btf/sched_switch")
@@ -70,12 +74,12 @@ int handle__sched_switch(u64 *ctx)
 /* ivcsw: treat like an enqueue event and store timestamp */
 if (prev->state == TASK_RUNNING)
-   trace_enqueue(prev->tgid, prev->pid);
+   trace_enqueue(prev);
 pid = next->pid;
 /* fetch timestamp and calculate delta */
-   tsp = bpf_map_lookup_elem(, );
+   tsp = bpf_task_storage_get(, next, 0, 0);
 if (!tsp)
 return 0;   /* missed enqueue */


Previously, hash table may overflow so we may have missed enqueue.
Here with task local storage, is it possible to add additional pid
filtering in the beginning of handle__sched_switch such that
missed enqueue here can be treated as an error?


IIUC, hashtab overflow is not the only reason of missed enqueue. If the
wakeup (which calls trace_enqueue) happens before runqslower starts, we
may still get missed enqueue in sched_switch, no?


the wakeup won't happen before runqslower starts since runqslower needs
to start to do attachment first and then trace_enqueue() can run.


I think Song is right. Given wakeup and sched_switch need to be
matched, depending at which exact time we attach BPF programs, we can
end up missing wakeup, but not missing sched_switch, no? So it's not
an error.


The current approach works fine. What I suggested is to
tighten sched_switch only for target_pid. wakeup (doing queuing) will
be more relaxed than sched_switch to ensure task local storage creation
is always there for target_pid regardless of attachment timing.
I think it should work, but we have to experiment to see actual
results...





For the current implementation trace_enqueue() will happen for any non-0
pid before setting test_progs tgid, and will happen for any non-0 and
test_progs tgid if it is set, so this should be okay if we do filtering
in handle_

Re: [PATCH bpf-next 4/4] bpf: runqslower: use task local storage

2021-01-11 Thread Yonghong Song




On 1/11/21 2:54 PM, Song Liu wrote:




On Jan 11, 2021, at 9:49 AM, Yonghong Song  wrote:



On 1/8/21 3:19 PM, Song Liu wrote:

Replace hashtab with task local storage in runqslower. This improves the
performance of these BPF programs. The following table summarizes average
runtime of these programs, in nanoseconds:
   task-local   hash-prealloc   hash-no-prealloc
handle__sched_wakeup 125 340   3124
handle__sched_wakeup_new28121510   2998
handle__sched_switch 151 208991
Note that, task local storage gives better performance than hashtab for
handle__sched_wakeup and handle__sched_switch. On the other hand, for
handle__sched_wakeup_new, task local storage is slower than hashtab with
prealloc. This is because handle__sched_wakeup_new accesses the data for
the first time, so it has to allocate the data for task local storage.
Once the initial allocation is done, subsequent accesses, as those in
handle__sched_wakeup, are much faster with task local storage. If we
disable hashtab prealloc, task local storage is much faster for all 3
functions.
Signed-off-by: Song Liu 
---
  tools/bpf/runqslower/runqslower.bpf.c | 26 +++---
  1 file changed, 15 insertions(+), 11 deletions(-)
diff --git a/tools/bpf/runqslower/runqslower.bpf.c 
b/tools/bpf/runqslower/runqslower.bpf.c
index 1f18a409f0443..c4de4179a0a17 100644
--- a/tools/bpf/runqslower/runqslower.bpf.c
+++ b/tools/bpf/runqslower/runqslower.bpf.c
@@ -11,9 +11,9 @@ const volatile __u64 min_us = 0;
  const volatile pid_t targ_pid = 0;
struct {
-   __uint(type, BPF_MAP_TYPE_HASH);
-   __uint(max_entries, 10240);
-   __type(key, u32);
+   __uint(type, BPF_MAP_TYPE_TASK_STORAGE);
+   __uint(map_flags, BPF_F_NO_PREALLOC);
+   __type(key, int);
__type(value, u64);
  } start SEC(".maps");
  @@ -25,15 +25,19 @@ struct {
/* record enqueue timestamp */
  __always_inline
-static int trace_enqueue(u32 tgid, u32 pid)
+static int trace_enqueue(struct task_struct *t)
  {
-   u64 ts;
+   u32 pid = t->pid;
+   u64 ts, *ptr;
if (!pid || (targ_pid && targ_pid != pid))
return 0;
ts = bpf_ktime_get_ns();
-   bpf_map_update_elem(, , , 0);
+   ptr = bpf_task_storage_get(, t, 0,
+  BPF_LOCAL_STORAGE_GET_F_CREATE);
+   if (ptr)
+   *ptr = ts;
return 0;
  }
  @@ -43,7 +47,7 @@ int handle__sched_wakeup(u64 *ctx)
/* TP_PROTO(struct task_struct *p) */
struct task_struct *p = (void *)ctx[0];
  - return trace_enqueue(p->tgid, p->pid);
+   return trace_enqueue(p);
  }
SEC("tp_btf/sched_wakeup_new")
@@ -52,7 +56,7 @@ int handle__sched_wakeup_new(u64 *ctx)
/* TP_PROTO(struct task_struct *p) */
struct task_struct *p = (void *)ctx[0];
  - return trace_enqueue(p->tgid, p->pid);
+   return trace_enqueue(p);
  }
SEC("tp_btf/sched_switch")
@@ -70,12 +74,12 @@ int handle__sched_switch(u64 *ctx)
/* ivcsw: treat like an enqueue event and store timestamp */
if (prev->state == TASK_RUNNING)
-   trace_enqueue(prev->tgid, prev->pid);
+   trace_enqueue(prev);
pid = next->pid;
/* fetch timestamp and calculate delta */
-   tsp = bpf_map_lookup_elem(, );
+   tsp = bpf_task_storage_get(, next, 0, 0);
if (!tsp)
return 0;   /* missed enqueue */


Previously, hash table may overflow so we may have missed enqueue.
Here with task local storage, is it possible to add additional pid
filtering in the beginning of handle__sched_switch such that
missed enqueue here can be treated as an error?


IIUC, hashtab overflow is not the only reason of missed enqueue. If the
wakeup (which calls trace_enqueue) happens before runqslower starts, we
may still get missed enqueue in sched_switch, no?


the wakeup won't happen before runqslower starts since runqslower needs
to start to do attachment first and then trace_enqueue() can run.

For the current implementation trace_enqueue() will happen for any non-0 
pid before setting test_progs tgid, and will happen for any non-0 and 
test_progs tgid if it is set, so this should be okay if we do filtering

in handle__sched_switch. Maybe you can do an experiment to prove whether
my point is correct or not.



Thanks,
Song



Re: [PATCH bpf-next 4/4] bpf: runqslower: use task local storage

2021-01-11 Thread Yonghong Song




On 1/8/21 3:19 PM, Song Liu wrote:

Replace hashtab with task local storage in runqslower. This improves the
performance of these BPF programs. The following table summarizes average
runtime of these programs, in nanoseconds:

   task-local   hash-prealloc   hash-no-prealloc
handle__sched_wakeup 125 340   3124
handle__sched_wakeup_new28121510   2998
handle__sched_switch 151 208991

Note that, task local storage gives better performance than hashtab for
handle__sched_wakeup and handle__sched_switch. On the other hand, for
handle__sched_wakeup_new, task local storage is slower than hashtab with
prealloc. This is because handle__sched_wakeup_new accesses the data for
the first time, so it has to allocate the data for task local storage.
Once the initial allocation is done, subsequent accesses, as those in
handle__sched_wakeup, are much faster with task local storage. If we
disable hashtab prealloc, task local storage is much faster for all 3
functions.

Signed-off-by: Song Liu 
---
  tools/bpf/runqslower/runqslower.bpf.c | 26 +++---
  1 file changed, 15 insertions(+), 11 deletions(-)

diff --git a/tools/bpf/runqslower/runqslower.bpf.c 
b/tools/bpf/runqslower/runqslower.bpf.c
index 1f18a409f0443..c4de4179a0a17 100644
--- a/tools/bpf/runqslower/runqslower.bpf.c
+++ b/tools/bpf/runqslower/runqslower.bpf.c
@@ -11,9 +11,9 @@ const volatile __u64 min_us = 0;
  const volatile pid_t targ_pid = 0;
  
  struct {

-   __uint(type, BPF_MAP_TYPE_HASH);
-   __uint(max_entries, 10240);
-   __type(key, u32);
+   __uint(type, BPF_MAP_TYPE_TASK_STORAGE);
+   __uint(map_flags, BPF_F_NO_PREALLOC);
+   __type(key, int);
__type(value, u64);
  } start SEC(".maps");
  
@@ -25,15 +25,19 @@ struct {
  
  /* record enqueue timestamp */

  __always_inline
-static int trace_enqueue(u32 tgid, u32 pid)
+static int trace_enqueue(struct task_struct *t)
  {
-   u64 ts;
+   u32 pid = t->pid;
+   u64 ts, *ptr;
  
  	if (!pid || (targ_pid && targ_pid != pid))

return 0;
  
  	ts = bpf_ktime_get_ns();

-   bpf_map_update_elem(, , , 0);
+   ptr = bpf_task_storage_get(, t, 0,
+  BPF_LOCAL_STORAGE_GET_F_CREATE);
+   if (ptr)
+   *ptr = ts;
return 0;
  }
  
@@ -43,7 +47,7 @@ int handle__sched_wakeup(u64 *ctx)

/* TP_PROTO(struct task_struct *p) */
struct task_struct *p = (void *)ctx[0];
  
-	return trace_enqueue(p->tgid, p->pid);

+   return trace_enqueue(p);
  }
  
  SEC("tp_btf/sched_wakeup_new")

@@ -52,7 +56,7 @@ int handle__sched_wakeup_new(u64 *ctx)
/* TP_PROTO(struct task_struct *p) */
struct task_struct *p = (void *)ctx[0];
  
-	return trace_enqueue(p->tgid, p->pid);

+   return trace_enqueue(p);
  }
  
  SEC("tp_btf/sched_switch")

@@ -70,12 +74,12 @@ int handle__sched_switch(u64 *ctx)
  
  	/* ivcsw: treat like an enqueue event and store timestamp */

if (prev->state == TASK_RUNNING)
-   trace_enqueue(prev->tgid, prev->pid);
+   trace_enqueue(prev);
  
  	pid = next->pid;
  
  	/* fetch timestamp and calculate delta */

-   tsp = bpf_map_lookup_elem(, );
+   tsp = bpf_task_storage_get(, next, 0, 0);
if (!tsp)
return 0;   /* missed enqueue */


Previously, hash table may overflow so we may have missed enqueue.
Here with task local storage, is it possible to add additional pid
filtering in the beginning of handle__sched_switch such that
missed enqueue here can be treated as an error?

  
@@ -91,7 +95,7 @@ int handle__sched_switch(u64 *ctx)

bpf_perf_event_output(ctx, , BPF_F_CURRENT_CPU,
  , sizeof(event));
  
-	bpf_map_delete_elem(, );

+   bpf_task_storage_delete(, next);
return 0;
  }
  



Re: [PATCH bpf-next 3/4] bpf: runqslower: prefer use local vmlinux

2021-01-11 Thread Yonghong Song




On 1/8/21 3:19 PM, Song Liu wrote:

Update the Makefile to prefer using ../../../vmlinux, which has latest
definitions for vmlinux.h

Signed-off-by: Song Liu 
---
  tools/bpf/runqslower/Makefile | 3 ++-
  1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tools/bpf/runqslower/Makefile b/tools/bpf/runqslower/Makefile
index 4d5ca54fcd4c8..306f1ce5a97b2 100644
--- a/tools/bpf/runqslower/Makefile
+++ b/tools/bpf/runqslower/Makefile
@@ -19,7 +19,8 @@ CFLAGS := -g -Wall
  
  # Try to detect best kernel BTF source

  KERNEL_REL := $(shell uname -r)
-VMLINUX_BTF_PATHS := /sys/kernel/btf/vmlinux /boot/vmlinux-$(KERNEL_REL)
+VMLINUX_BTF_PATHS := ../../../vmlinux /sys/kernel/btf/vmlinux \
+   /boot/vmlinux-$(KERNEL_REL)


selftests/bpf Makefile has:

VMLINUX_BTF_PATHS ?= $(if $(O),$(O)/vmlinux)\
 $(if $(KBUILD_OUTPUT),$(KBUILD_OUTPUT)/vmlinux)\
 ../../../../vmlinux\
 /sys/kernel/btf/vmlinux\
 /boot/vmlinux-$(shell uname -r)

If you intend to add ../../../vmlinux, I think we should also
add $(if $(KBUILD_OUTPUT),$(KBUILD_OUTPUT)/vmlinux).


  VMLINUX_BTF_PATH := $(or $(VMLINUX_BTF),$(firstword  \
  $(wildcard $(VMLINUX_BTF_PATHS
  



Re: [PATCH bpf-next 2/4] selftests/bpf: add non-BPF_LSM test for task local storage

2021-01-11 Thread Yonghong Song




On 1/8/21 3:19 PM, Song Liu wrote:

Task local storage is enabled for tracing programs. Add a test for it
without CONFIG_BPF_LSM.

Signed-off-by: Song Liu 
---
  .../bpf/prog_tests/test_task_local_storage.c  | 34 +
  .../selftests/bpf/progs/task_local_storage.c  | 37 +++
  2 files changed, 71 insertions(+)
  create mode 100644 
tools/testing/selftests/bpf/prog_tests/test_task_local_storage.c
  create mode 100644 tools/testing/selftests/bpf/progs/task_local_storage.c

diff --git a/tools/testing/selftests/bpf/prog_tests/test_task_local_storage.c 
b/tools/testing/selftests/bpf/prog_tests/test_task_local_storage.c
new file mode 100644
index 0..7de7a154ebbe6
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/test_task_local_storage.c
@@ -0,0 +1,34 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */


2020 -> 2021


+
+#include 
+#include 
+#include 
+#include "task_local_storage.skel.h"
+
+static unsigned int duration;
+
+void test_test_task_local_storage(void)
+{
+   struct task_local_storage *skel;
+   const int count = 10;
+   int i, err;
+
+   skel = task_local_storage__open_and_load();
+


Extra line is unnecessary here.


+   if (CHECK(!skel, "skel_open_and_load", "skeleton open and load 
failed\n"))
+   return;
+
+   err = task_local_storage__attach(skel);
+


ditto.


+   if (CHECK(err, "skel_attach", "skeleton attach failed\n"))
+   goto out;
+
+   for (i = 0; i < count; i++)
+   usleep(1000);


Does a smaller usleep value will work? If it is, recommend to have a 
smaller value here to reduce test_progs running time.



+   CHECK(skel->bss->value < count, "task_local_storage_value",
+ "task local value too small\n");
+
+out:
+   task_local_storage__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/progs/task_local_storage.c 
b/tools/testing/selftests/bpf/progs/task_local_storage.c
new file mode 100644
index 0..807255c5c162d
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/task_local_storage.c
@@ -0,0 +1,37 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */


2020 -> 2021


+
+#include "vmlinux.h"
+#include 
+#include 
+
+char _license[] SEC("license") = "GPL";
+
+struct local_data {
+   __u64 val;
+};
+
+struct {
+   __uint(type, BPF_MAP_TYPE_TASK_STORAGE);
+   __uint(map_flags, BPF_F_NO_PREALLOC);
+   __type(key, int);
+   __type(value, struct local_data);
+} task_storage_map SEC(".maps");
+
+int value = 0;
+
+SEC("tp_btf/sched_switch")
+int BPF_PROG(on_switch, bool preempt, struct task_struct *prev,
+struct task_struct *next)
+{
+   struct local_data *storage;


If it possible that we do some filtering based on test_progs pid
so below bpf_task_storage_get is only called for test_progs process?
This is more targeted and can avoid counter contributions from
other unrelated processes and make test_task_local_storage.c result
comparison more meaningful.


+
+   storage = bpf_task_storage_get(_storage_map,
+  next, 0,
+  BPF_LOCAL_STORAGE_GET_F_CREATE);
+   if (storage) {
+   storage->val++;
+   value = storage->val;
+   }
+   return 0;
+}



Re: [PATCH bpf-next 1/4] bpf: enable task local storage for tracing programs

2021-01-11 Thread Yonghong Song




On 1/8/21 3:19 PM, Song Liu wrote:

To access per-task data, BPF program typically creates a hash table with
pid as the key. This is not ideal because:
  1. The use need to estimate requires size of the hash table, with may be
 inaccurate;
  2. Big hash tables are slow;
  3. To clean up the data properly during task terminations, the user need
 to write code.

Task local storage overcomes these issues and becomes a better option for
these per-task data. Task local storage is only available to BPF_LSM. Now
enable it for tracing programs.

Reported-by: kernel test robot 


The whole patch is not reported by kernel test robot. I think we should
drop this.


Signed-off-by: Song Liu 
---
  include/linux/bpf.h|  7 +++
  include/linux/bpf_lsm.h| 22 --
  include/linux/bpf_types.h  |  2 +-
  include/linux/sched.h  |  5 +
  kernel/bpf/Makefile|  3 +--
  kernel/bpf/bpf_local_storage.c | 28 +---
  kernel/bpf/bpf_lsm.c   |  4 
  kernel/bpf/bpf_task_storage.c  | 26 ++
  kernel/fork.c  |  5 +
  kernel/trace/bpf_trace.c   |  4 
  10 files changed, 46 insertions(+), 60 deletions(-)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 07cb5d15e7439..cf16548f28f7b 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1480,6 +1480,7 @@ struct bpf_prog *bpf_prog_by_id(u32 id);
  struct bpf_link *bpf_link_by_id(u32 id);
  
  const struct bpf_func_proto *bpf_base_func_proto(enum bpf_func_id func_id);

+void bpf_task_storage_free(struct task_struct *task);
  #else /* !CONFIG_BPF_SYSCALL */
  static inline struct bpf_prog *bpf_prog_get(u32 ufd)
  {
@@ -1665,6 +1666,10 @@ bpf_base_func_proto(enum bpf_func_id func_id)
  {
return NULL;
  }
+
+static inline void bpf_task_storage_free(struct task_struct *task)
+{
+}
  #endif /* CONFIG_BPF_SYSCALL */

[...]


Re: [PATCH bpf-next 1/4] bpf: enable task local storage for tracing programs

2021-01-11 Thread Yonghong Song




On 1/11/21 2:17 AM, KP Singh wrote:

On Mon, Jan 11, 2021 at 7:27 AM Yonghong Song  wrote:




On 1/8/21 3:19 PM, Song Liu wrote:

To access per-task data, BPF program typically creates a hash table with
pid as the key. This is not ideal because:
   1. The use need to estimate requires size of the hash table, with may be
  inaccurate;
   2. Big hash tables are slow;
   3. To clean up the data properly during task terminations, the user need
  to write code.

Task local storage overcomes these issues and becomes a better option for
these per-task data. Task local storage is only available to BPF_LSM. Now
enable it for tracing programs.

Reported-by: kernel test robot 
Signed-off-by: Song Liu 
---


[...]


   struct cfs_rq;
   struct fs_struct;
@@ -1348,6 +1349,10 @@ struct task_struct {
   /* Used by LSM modules for access restriction: */
   void*security;
   #endif
+#ifdef CONFIG_BPF_SYSCALL
+ /* Used by BPF task local storage */
+ struct bpf_local_storage*bpf_storage;
+#endif


I remembered there is a discussion where KP initially wanted to put
bpf_local_storage in task_struct, but later on changed to
use in lsm as his use case mostly for lsm. Did anybody
remember the details of the discussion? Just want to be
sure what is the concern people has with putting bpf_local_storage
in task_struct and whether the use case presented by
Song will justify it.



If I recall correctly, the discussion was about inode local storage and
it was decided to use the security blob since the use-case was only LSM
programs. Since we now plan to use it in tracing,
detangling the dependency from CONFIG_BPF_LSM
sounds logical to me.


Sounds good. Thanks for explanation.






   #ifdef CONFIG_GCC_PLUGIN_STACKLEAK
   unsigned long   lowest_stack;
diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile
index d1249340fd6ba..ca995fdfa45e7 100644
--- a/kernel/bpf/Makefile
+++ b/kernel/bpf/Makefile
@@ -8,9 +8,8 @@ CFLAGS_core.o += $(call cc-disable-warning, override-init) 
$(cflags-nogcse-yy)

   obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o 
bpf_iter.o map_iter.o task_iter.o prog_iter.o
   obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o 
bpf_lru_list.o lpm_trie.o map_in_map.o
-obj-$(CONFIG_BPF_SYSCALL) += local_storage.o queue_stack_maps.o ringbuf.o
+obj-$(CONFIG_BPF_SYSCALL) += local_storage.o queue_stack_maps.o ringbuf.o 
bpf_task_storage.o
   obj-${CONFIG_BPF_LSM} += bpf_inode_storage.o
-obj-${CONFIG_BPF_LSM}  += bpf_task_storage.o
   obj-$(CONFIG_BPF_SYSCALL) += disasm.o
   obj-$(CONFIG_BPF_JIT) += trampoline.o
   obj-$(CONFIG_BPF_SYSCALL) += btf.o

[...]


Re: [PATCH bpf-next 1/4] bpf: enable task local storage for tracing programs

2021-01-10 Thread Yonghong Song




On 1/8/21 3:19 PM, Song Liu wrote:

To access per-task data, BPF program typically creates a hash table with
pid as the key. This is not ideal because:
  1. The use need to estimate requires size of the hash table, with may be
 inaccurate;
  2. Big hash tables are slow;
  3. To clean up the data properly during task terminations, the user need
 to write code.

Task local storage overcomes these issues and becomes a better option for
these per-task data. Task local storage is only available to BPF_LSM. Now
enable it for tracing programs.

Reported-by: kernel test robot 
Signed-off-by: Song Liu 
---
  include/linux/bpf.h|  7 +++
  include/linux/bpf_lsm.h| 22 --
  include/linux/bpf_types.h  |  2 +-
  include/linux/sched.h  |  5 +
  kernel/bpf/Makefile|  3 +--
  kernel/bpf/bpf_local_storage.c | 28 +---
  kernel/bpf/bpf_lsm.c   |  4 
  kernel/bpf/bpf_task_storage.c  | 26 ++
  kernel/fork.c  |  5 +
  kernel/trace/bpf_trace.c   |  4 
  10 files changed, 46 insertions(+), 60 deletions(-)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 07cb5d15e7439..cf16548f28f7b 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1480,6 +1480,7 @@ struct bpf_prog *bpf_prog_by_id(u32 id);
  struct bpf_link *bpf_link_by_id(u32 id);
  
  const struct bpf_func_proto *bpf_base_func_proto(enum bpf_func_id func_id);

+void bpf_task_storage_free(struct task_struct *task);
  #else /* !CONFIG_BPF_SYSCALL */
  static inline struct bpf_prog *bpf_prog_get(u32 ufd)
  {
@@ -1665,6 +1666,10 @@ bpf_base_func_proto(enum bpf_func_id func_id)
  {
return NULL;
  }
+
+static inline void bpf_task_storage_free(struct task_struct *task)
+{
+}
  #endif /* CONFIG_BPF_SYSCALL */
  
  static inline struct bpf_prog *bpf_prog_get_type(u32 ufd,

@@ -1860,6 +1865,8 @@ extern const struct bpf_func_proto bpf_per_cpu_ptr_proto;
  extern const struct bpf_func_proto bpf_this_cpu_ptr_proto;
  extern const struct bpf_func_proto bpf_ktime_get_coarse_ns_proto;
  extern const struct bpf_func_proto bpf_sock_from_file_proto;
+extern const struct bpf_func_proto bpf_task_storage_get_proto;
+extern const struct bpf_func_proto bpf_task_storage_delete_proto;
  
  const struct bpf_func_proto *bpf_tracing_func_proto(

enum bpf_func_id func_id, const struct bpf_prog *prog);
diff --git a/include/linux/bpf_lsm.h b/include/linux/bpf_lsm.h
index 0d1c33ace3987..479c101546ad1 100644
--- a/include/linux/bpf_lsm.h
+++ b/include/linux/bpf_lsm.h
@@ -38,21 +38,9 @@ static inline struct bpf_storage_blob *bpf_inode(
return inode->i_security + bpf_lsm_blob_sizes.lbs_inode;
  }
  
-static inline struct bpf_storage_blob *bpf_task(

-   const struct task_struct *task)
-{
-   if (unlikely(!task->security))
-   return NULL;
-
-   return task->security + bpf_lsm_blob_sizes.lbs_task;
-}
-
  extern const struct bpf_func_proto bpf_inode_storage_get_proto;
  extern const struct bpf_func_proto bpf_inode_storage_delete_proto;
-extern const struct bpf_func_proto bpf_task_storage_get_proto;
-extern const struct bpf_func_proto bpf_task_storage_delete_proto;
  void bpf_inode_storage_free(struct inode *inode);
-void bpf_task_storage_free(struct task_struct *task);
  
  #else /* !CONFIG_BPF_LSM */
  
@@ -73,20 +61,10 @@ static inline struct bpf_storage_blob *bpf_inode(

return NULL;
  }
  
-static inline struct bpf_storage_blob *bpf_task(

-   const struct task_struct *task)
-{
-   return NULL;
-}
-
  static inline void bpf_inode_storage_free(struct inode *inode)
  {
  }
  
-static inline void bpf_task_storage_free(struct task_struct *task)

-{
-}
-
  #endif /* CONFIG_BPF_LSM */
  
  #endif /* _LINUX_BPF_LSM_H */

diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h
index 99f7fd657d87a..b9edee336d804 100644
--- a/include/linux/bpf_types.h
+++ b/include/linux/bpf_types.h
@@ -109,8 +109,8 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_SOCKHASH, sock_hash_ops)
  #endif
  #ifdef CONFIG_BPF_LSM
  BPF_MAP_TYPE(BPF_MAP_TYPE_INODE_STORAGE, inode_storage_map_ops)
-BPF_MAP_TYPE(BPF_MAP_TYPE_TASK_STORAGE, task_storage_map_ops)
  #endif
+BPF_MAP_TYPE(BPF_MAP_TYPE_TASK_STORAGE, task_storage_map_ops)
  BPF_MAP_TYPE(BPF_MAP_TYPE_CPUMAP, cpu_map_ops)
  #if defined(CONFIG_XDP_SOCKETS)
  BPF_MAP_TYPE(BPF_MAP_TYPE_XSKMAP, xsk_map_ops)
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 51d535b69bd6f..4a173defa2010 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -42,6 +42,7 @@ struct audit_context;
  struct backing_dev_info;
  struct bio_list;
  struct blk_plug;
+struct bpf_local_storage;
  struct capture_control;
  struct cfs_rq;
  struct fs_struct;
@@ -1348,6 +1349,10 @@ struct task_struct {
/* Used by LSM modules for access restriction: */
void*security;
  #endif
+#ifdef CONFIG_BPF_SYSCALL
+   /* 

Re: [PATCH] tools/bpf: Remove unnecessary parameter in bpf_object__probe_loading

2021-01-08 Thread Yonghong Song




On 1/7/21 6:08 PM, 彭浩(Richard) wrote:

struct bpf_object *obj is not used in bpf_object__probe_loading, so we
can remove it.

Signed-off-by: Peng Hao 


Acked-by: Yonghong Song 


Re: [PATCH v2] btf: support ints larger than 128 bits

2020-12-30 Thread Yonghong Song




On 12/19/20 8:36 AM, Sean Young wrote:

clang supports arbitrary length ints using the _ExtInt extension. This
can be useful to hold very large values, e.g. 256 bit or 512 bit types.

Larger types (e.g. 1024 bits) are possible but I am unaware of a use
case for these.

This requires the _ExtInt extension enabled in clang, which is under
review.

Link: https://clang.llvm.org/docs/LanguageExtensions.html#extended-integer-types
Link: https://reviews.llvm.org/D93103

Signed-off-by: Sean Young 
---
changes since v2:
  - added tests as suggested by Yonghong Song
  - added kernel pretty-printer

  Documentation/bpf/btf.rst |   4 +-
  include/uapi/linux/btf.h  |   2 +-
  kernel/bpf/btf.c  |  54 +-
  tools/bpf/bpftool/btf_dumper.c|  40 ++
  tools/include/uapi/linux/btf.h|   2 +-
  tools/lib/bpf/btf.c   |   2 +-
  tools/testing/selftests/bpf/Makefile  |   3 +-
  tools/testing/selftests/bpf/prog_tests/btf.c  |   3 +-
  .../selftests/bpf/progs/test_btf_extint.c |  50 ++
  tools/testing/selftests/bpf/test_extint.py| 535 ++


For easier review, maybe you can break this patch into a patch series 
like below?

  patch 1 (kernel related changes and doc)
  kernel/bpf/btf.c, include/uapi/linux/btf.h,
  tools/include/uapi/linux/btf.h
  Documentation/bpf/btf.rst
  patch 2 (libbpf support)
  tools/lib/bpf/btf.c
  patch 3 (bpftool support)
  tools/bpf/bpftool/btf_dumper.c
  patch 4 (testing)
  rest files


  10 files changed, 679 insertions(+), 16 deletions(-)
  create mode 100644 tools/testing/selftests/bpf/progs/test_btf_extint.c
  create mode 100755 tools/testing/selftests/bpf/test_extint.py

diff --git a/Documentation/bpf/btf.rst b/Documentation/bpf/btf.rst
index 44dc789de2b4..784f1743dbc7 100644
--- a/Documentation/bpf/btf.rst
+++ b/Documentation/bpf/btf.rst
@@ -132,7 +132,7 @@ The following sections detail encoding of each kind.
  
#define BTF_INT_ENCODING(VAL)   (((VAL) & 0x0f00) >> 24)

#define BTF_INT_OFFSET(VAL) (((VAL) & 0x00ff) >> 16)
-  #define BTF_INT_BITS(VAL)   ((VAL)  & 0x00ff)
+  #define BTF_INT_BITS(VAL)   ((VAL)  & 0x03ff)
  
  The ``BTF_INT_ENCODING`` has the following attributes::
  
@@ -147,7 +147,7 @@ pretty print. At most one encoding can be specified for the int type.

  The ``BTF_INT_BITS()`` specifies the number of actual bits held by this int
  type. For example, a 4-bit bitfield encodes ``BTF_INT_BITS()`` equals to 4.
  The ``btf_type.size * 8`` must be equal to or greater than ``BTF_INT_BITS()``
-for the type. The maximum value of ``BTF_INT_BITS()`` is 128.
+for the type. The maximum value of ``BTF_INT_BITS()`` is 512.
  
  The ``BTF_INT_OFFSET()`` specifies the starting bit offset to calculate values

  for this int. For example, a bitfield struct member has:
diff --git a/include/uapi/linux/btf.h b/include/uapi/linux/btf.h
index 5a667107ad2c..1696fd02b302 100644
--- a/include/uapi/linux/btf.h
+++ b/include/uapi/linux/btf.h
@@ -84,7 +84,7 @@ struct btf_type {
   */
  #define BTF_INT_ENCODING(VAL) (((VAL) & 0x0f00) >> 24)
  #define BTF_INT_OFFSET(VAL)   (((VAL) & 0x00ff) >> 16)
-#define BTF_INT_BITS(VAL)  ((VAL)  & 0x00ff)
+#define BTF_INT_BITS(VAL)  ((VAL)  & 0x03ff)
  
  /* Attributes stored in the BTF_INT_ENCODING */

  #define BTF_INT_SIGNED(1 << 0)
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index 8d6bdb4f4d61..44bc17207e9b 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -166,7 +166,8 @@
   *
   */
  
-#define BITS_PER_U128 (sizeof(u64) * BITS_PER_BYTE * 2)

+#define BITS_PER_U128 128
+#define BITS_PER_U512 512
  #define BITS_PER_BYTE_MASK (BITS_PER_BYTE - 1)
  #define BITS_PER_BYTE_MASKED(bits) ((bits) & BITS_PER_BYTE_MASK)
  #define BITS_ROUNDDOWN_BYTES(bits) ((bits) >> 3)
@@ -1907,9 +1908,9 @@ static int btf_int_check_member(struct btf_verifier_env 
*env,
nr_copy_bits = BTF_INT_BITS(int_data) +
BITS_PER_BYTE_MASKED(struct_bits_off);
  
-	if (nr_copy_bits > BITS_PER_U128) {

+   if (nr_copy_bits > BITS_PER_U512) {
btf_verifier_log_member(env, struct_type, member,
-   "nr_copy_bits exceeds 128");
+   "nr_copy_bits exceeds 512");
return -EINVAL;
}
  
@@ -1963,9 +1964,9 @@ static int btf_int_check_kflag_member(struct btf_verifier_env *env,
  
  	bytes_offset = BITS_ROUNDDOWN_BYTES(struct_bits_off);

nr_copy_bits = nr_bits + BITS_PER_BYTE_MASKED(struct_bits_off);
-   if (nr_copy_bits > BITS_PER_U128) {
+   if (nr_copy_bits > BITS_PER_U512) {
btf_verifier_log_member(env, struct_type, member,
-   "nr_copy_bits excee

Re: [PATCH] bpf: fix: address of local auto-variable assigned to a function parameter.

2020-12-23 Thread Yonghong Song




On 12/23/20 11:01 PM, YANG LI wrote:

Assigning local variable txq to the outputting parameter xdp->txq is not
safe, txq will be released after the end of the function call.
Then the result of using xdp is unpredictable.

Fix this error by defining the struct xdp_txq_info in function
dev_map_run_prog() as a static type.

Signed-off-by: YANG LI 
Reported-by: Abaci 
---
  kernel/bpf/devmap.c | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c
index f6e9c68..af6f004 100644
--- a/kernel/bpf/devmap.c
+++ b/kernel/bpf/devmap.c
@@ -454,7 +454,7 @@ static struct xdp_buff *dev_map_run_prog(struct net_device 
*dev,
 struct xdp_buff *xdp,
 struct bpf_prog *xdp_prog)
  {
-   struct xdp_txq_info txq = { .dev = dev };
+   static struct xdp_txq_info txq = { .dev = dev };
u32 act;
  
  	xdp_set_data_meta_invalid(xdp);


exposing txq outside the routine with 'static' definition is not
a good practice. maybe just reset xdp->txq = NULl right before
function return?

diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c
index f6e9c68afdd4..50f5c20a33a3 100644
--- a/kernel/bpf/devmap.c
+++ b/kernel/bpf/devmap.c
@@ -475,6 +475,7 @@ static struct xdp_buff *dev_map_run_prog(struct 
net_device *dev,

}

xdp_return_buff(xdp);
+   xdp->txq = NULL;
return NULL;
 }

-bash-4.4$


Re: [PATCH bpf-next 1/2] bpf: Add a bpf_kallsyms_lookup helper

2020-12-17 Thread Yonghong Song




On 12/17/20 7:20 PM, Alexei Starovoitov wrote:

On Thu, Dec 17, 2020 at 09:26:09AM -0800, Yonghong Song wrote:



On 12/17/20 7:31 AM, Florent Revest wrote:

On Mon, Dec 14, 2020 at 7:47 AM Yonghong Song  wrote:

On 12/11/20 6:40 AM, Florent Revest wrote:

On Wed, Dec 2, 2020 at 10:18 PM Alexei Starovoitov
 wrote:

I still think that adopting printk/vsnprintf for this instead of
reinventing the wheel
is more flexible and easier to maintain long term.
Almost the same layout can be done with vsnprintf
with exception of \0 char.
More meaningful names, etc.
See Documentation/core-api/printk-formats.rst


I agree this would be nice. I finally got a bit of time to experiment
with this and I noticed a few things:

First of all, because helpers only have 5 arguments, if we use two for
the output buffer and its size and two for the format string and its
size, we are only left with one argument for a modifier. This is still
enough for our usecase (where we'd only use "%ps" for example) but it
does not strictly-speaking allow for the same layout that Andrii
proposed.


See helper bpf_seq_printf. It packs all arguments for format string and
puts them into an array. bpf_seq_printf will unpack them as it parsed
through the format string. So it should be doable to have more than
"%ps" in format string.


This could be a nice trick, thank you for the suggestion Yonghong :)

My understanding is that this would also require two extra args (one
for the array of arguments and one for the size of this array) so it
would still not fit the 5 arguments limit I described in my previous
email.
eg: this would not be possible:
long bpf_snprintf(const char *out, u32 out_size,
const char *fmt, u32 fmt_size,
   const void *data, u32 data_len)


Right. bpf allows only up to 5 parameters.


Would you then suggest that we also put the format string and its
length in the first and second cells of this array and have something
along the line of:
long bpf_snprintf(const char *out, u32 out_size,
const void *args, u32 args_len) ?
This seems like a fairly opaque signature to me and harder to verify.


One way is to define an explicit type for args, something like
struct bpf_fmt_str_data {
   char *fmt;
   u64 fmt_len;
   u64 data[];
};


that feels a bit convoluted.

The reason I feel unease with the helper as was originally proposed
and with Andrii's proposal is all the extra strlen and strcpy that
needs to be done. In the helper we have to call kallsyms_lookup()
which is ok interface for what it was desinged to do,
but it's awkward to use to construct new string ("%s [%s]", sym, modname)
or to send two strings into a ring buffer.
Andrii's zero separator idea will simplify bpf prog, but user space
would need to do strlen anyway if it needs to pretty print.
If we take pain on converting addr to sym+modname let's figure out
how to make it easy for the bpf prog to do and easy for user space to consume.
That's why I proposed snprintf.

As far as 6 arg issue:
long bpf_snprintf(const char *out, u32 out_size,
   const char *fmt, u32 fmt_size,
   const void *data, u32 data_len);
Yeah. It won't work as-is, but fmt_size is unnecessary nowadays.
The verifier understands read-only data.
Hence the helper can be:
long bpf_snprintf(const char *out, u32 out_size,
   const char *fmt,
   const void *data, u32 data_len);
The 3rd arg cannot be ARG_PTR_TO_MEM.
Instead we can introduce ARG_PTR_TO_CONST_STR in the verifier.


This should work except if fmt string is on the stack. Maybe this is
an okay tradeoff.


See check_mem_access() where it's doing bpf_map_direct_read().
That 'fmt' string will be accessed through the same bpf_map_direct_read().
The verifier would need to check that it's NUL-terminated valid string.
It should probably do % specifier checks at the same time.
At the end bpf_snprintf() will have 5 args and when wrapped with
BPF_SNPRINTF() macro it will accept arbitrary number of arguments to print.
It also will be generally useful to do all other kinds of pretty printing.



Re: [PATCH] btf: support ints larger than 128 bits

2020-12-17 Thread Yonghong Song




On 12/17/20 7:01 AM, Sean Young wrote:

clang supports arbitrary length ints using the _ExtInt extension. This
can be useful to hold very large values, e.g. 256 bit or 512 bit types.

Larger types (e.g. 1024 bits) are possible but I am unaware of a use
case for these.

This requires the _ExtInt extension to enabled for BPF in clang, which
is under review.

Link: https://clang.llvm.org/docs/LanguageExtensions.html#extended-integer-types
Link: https://reviews.llvm.org/D93103

Signed-off-by: Sean Young 
---
  Documentation/bpf/btf.rst  |  4 ++--
  include/uapi/linux/btf.h   |  2 +-
  tools/bpf/bpftool/btf_dumper.c | 39 ++
  tools/include/uapi/linux/btf.h |  2 +-
  4 files changed, 43 insertions(+), 4 deletions(-)


Thanks for the patch. But the change is not enough and no tests in the 
patch set.


For example, in kernel/bpf/btf.c, we BITS_PER_U128 to guard in various 
places where the number of integer bits must be <= 128 bits which is

what we supported now. In function btf_type_int_is_regular(), # of int
bits larger than 128 considered false. The extint like 256/512bits 
should be also regular int.


extint permits non-power-of-2 bits (e.g., 192bits), to support them
may not be necessary and this is not your use case. what do you think?

lib/bpf/btf.c btf__and_int() function also has the following check,

/* byte_sz must be power of 2 */
if (!byte_sz || (byte_sz & (byte_sz - 1)) || byte_sz > 16)
return -EINVAL;

So Extint 256 bits will fail here.

Please do add some selftests tools/testing/selftests/bpf
directories:
   - to ensure btf with newly supported int types loaded successfully
 in kernel
   - to ensure bpftool map [pretty] print working fine with new types
   - to ensure kernel map pretty print works fine
 (tests at tools/testing/selftests/bpf/prog_tests/btf.c)
   - to ensure btf manipulation APIs works with new types.



diff --git a/Documentation/bpf/btf.rst b/Documentation/bpf/btf.rst
index 44dc789de2b4..784f1743dbc7 100644
--- a/Documentation/bpf/btf.rst
+++ b/Documentation/bpf/btf.rst
@@ -132,7 +132,7 @@ The following sections detail encoding of each kind.
  
#define BTF_INT_ENCODING(VAL)   (((VAL) & 0x0f00) >> 24)

#define BTF_INT_OFFSET(VAL) (((VAL) & 0x00ff) >> 16)
-  #define BTF_INT_BITS(VAL)   ((VAL)  & 0x00ff)
+  #define BTF_INT_BITS(VAL)   ((VAL)  & 0x03ff)
  
  The ``BTF_INT_ENCODING`` has the following attributes::
  
@@ -147,7 +147,7 @@ pretty print. At most one encoding can be specified for the int type.

  The ``BTF_INT_BITS()`` specifies the number of actual bits held by this int
  type. For example, a 4-bit bitfield encodes ``BTF_INT_BITS()`` equals to 4.
  The ``btf_type.size * 8`` must be equal to or greater than ``BTF_INT_BITS()``
-for the type. The maximum value of ``BTF_INT_BITS()`` is 128.
+for the type. The maximum value of ``BTF_INT_BITS()`` is 512.
  
  The ``BTF_INT_OFFSET()`` specifies the starting bit offset to calculate values

  for this int. For example, a bitfield struct member has:
diff --git a/include/uapi/linux/btf.h b/include/uapi/linux/btf.h
index 5a667107ad2c..1696fd02b302 100644
--- a/include/uapi/linux/btf.h
+++ b/include/uapi/linux/btf.h
@@ -84,7 +84,7 @@ struct btf_type {
   */
  #define BTF_INT_ENCODING(VAL) (((VAL) & 0x0f00) >> 24)
  #define BTF_INT_OFFSET(VAL)   (((VAL) & 0x00ff) >> 16)
-#define BTF_INT_BITS(VAL)  ((VAL)  & 0x00ff)
+#define BTF_INT_BITS(VAL)  ((VAL)  & 0x03ff)
  
  /* Attributes stored in the BTF_INT_ENCODING */

  #define BTF_INT_SIGNED(1 << 0)
diff --git a/tools/bpf/bpftool/btf_dumper.c b/tools/bpf/bpftool/btf_dumper.c
index 0e9310727281..45ed45ea9962 100644
--- a/tools/bpf/bpftool/btf_dumper.c
+++ b/tools/bpf/bpftool/btf_dumper.c
@@ -271,6 +271,40 @@ static void btf_int128_print(json_writer_t *jw, const void 
*data,
}
  }
  
+static void btf_bigint_print(json_writer_t *jw, const void *data, int nr_bits,

+bool is_plain_text)
+{
+   char buf[nr_bits / 4 + 1];
+   bool first = true;
+   int i;
+
+#ifdef __BIG_ENDIAN_BITFIELD
+   for (i = 0; i < nr_bits / 64; i++) {
+#else
+   for (i = nr_bits / 64 - 1; i >= 0; i++) {
+#endif
+   __u64 v = ((__u64 *)data)[i];
+
+   if (first) {
+   if (!v)
+   continue;
+
+   snprintf(buf, sizeof(buf), "%llx", v);
+
+   first = false;
+   } else {
+   size_t off = strlen(buf);
+
+   snprintf(buf + off, sizeof(buf) - off, "%016llx", v);
+   }
+   }
+
+   if (is_plain_text)
+   jsonw_printf(jw, "0x%s", buf);
+   else
+   jsonw_printf(jw, "\"0x%s\"", buf);
+}
+
  static void btf_int128_shift(__u64 *print_num, __u16 left_shift_bits,
 __u16 right_shift_bits)
  {
@@ 

Re: [PATCH bpf-next 1/2] bpf: Add a bpf_kallsyms_lookup helper

2020-12-17 Thread Yonghong Song




On 12/17/20 7:31 AM, Florent Revest wrote:

On Mon, Dec 14, 2020 at 7:47 AM Yonghong Song  wrote:

On 12/11/20 6:40 AM, Florent Revest wrote:

On Wed, Dec 2, 2020 at 10:18 PM Alexei Starovoitov
 wrote:

I still think that adopting printk/vsnprintf for this instead of
reinventing the wheel
is more flexible and easier to maintain long term.
Almost the same layout can be done with vsnprintf
with exception of \0 char.
More meaningful names, etc.
See Documentation/core-api/printk-formats.rst


I agree this would be nice. I finally got a bit of time to experiment
with this and I noticed a few things:

First of all, because helpers only have 5 arguments, if we use two for
the output buffer and its size and two for the format string and its
size, we are only left with one argument for a modifier. This is still
enough for our usecase (where we'd only use "%ps" for example) but it
does not strictly-speaking allow for the same layout that Andrii
proposed.


See helper bpf_seq_printf. It packs all arguments for format string and
puts them into an array. bpf_seq_printf will unpack them as it parsed
through the format string. So it should be doable to have more than
"%ps" in format string.


This could be a nice trick, thank you for the suggestion Yonghong :)

My understanding is that this would also require two extra args (one
for the array of arguments and one for the size of this array) so it
would still not fit the 5 arguments limit I described in my previous
email.
eg: this would not be possible:
long bpf_snprintf(const char *out, u32 out_size,
   const char *fmt, u32 fmt_size,
  const void *data, u32 data_len)


Right. bpf allows only up to 5 parameters.


Would you then suggest that we also put the format string and its
length in the first and second cells of this array and have something
along the line of:
long bpf_snprintf(const char *out, u32 out_size,
   const void *args, u32 args_len) ?
This seems like a fairly opaque signature to me and harder to verify.


One way is to define an explicit type for args, something like
   struct bpf_fmt_str_data {
  char *fmt;
  u64 fmt_len;
  u64 data[];
   };

The bpf_snprintf signature can be
   long bpf_snprintf(const char *out, u32 out_size,
 const struct bpf_fmt_str_data *fmt_data,
 u32 fmt_data_len);

Internally you can have one argument type for "struct bpf_fmt_str_data" 
like PTR_TO_FMT_DATA as a verifier reg state. if bpf_snprintf is used, 
when you try to verify PTR_TO_FMT_DATA, you can just verify 
fmt_data->fmt and fmt_data->fmt_len which satifies mem contraints.

The rest of data can be passed to the helper as is.

Yes, still some verifier work. But may be useful for this and
future format string related helpers.


Re: [PATCH bpf-next v4 10/11] bpf: Add tests for new BPF atomic operations

2020-12-16 Thread Yonghong Song




On 12/16/20 3:51 AM, Brendan Jackman wrote:

On Wed, 16 Dec 2020 at 08:19, Yonghong Song  wrote:




On 12/15/20 3:12 AM, Brendan Jackman wrote:

On Tue, Dec 08, 2020 at 10:15:35AM -0800, Yonghong Song wrote:



On 12/8/20 8:59 AM, Brendan Jackman wrote:

On Tue, Dec 08, 2020 at 08:38:04AM -0800, Yonghong Song wrote:



On 12/8/20 4:41 AM, Brendan Jackman wrote:

On Mon, Dec 07, 2020 at 07:18:57PM -0800, Yonghong Song wrote:



On 12/7/20 8:07 AM, Brendan Jackman wrote:

The prog_test that's added depends on Clang/LLVM features added by
Yonghong in commit 286daafd6512 (was https://reviews.llvm.org/D72184 ).

Note the use of a define called ENABLE_ATOMICS_TESTS: this is used
to:

  - Avoid breaking the build for people on old versions of Clang
  - Avoid needing separate lists of test objects for no_alu32, where
atomics are not supported even if Clang has the feature.

The atomics_test.o BPF object is built unconditionally both for
test_progs and test_progs-no_alu32. For test_progs, if Clang supports
atomics, ENABLE_ATOMICS_TESTS is defined, so it includes the proper
test code. Otherwise, progs and global vars are defined anyway, as
stubs; this means that the skeleton user code still builds.

The atomics_test.o userspace object is built once and used for both
test_progs and test_progs-no_alu32. A variable called skip_tests is
defined in the BPF object's data section, which tells the userspace
object whether to skip the atomics test.

Signed-off-by: Brendan Jackman 


Ack with minor comments below.

Acked-by: Yonghong Song 


---
  tools/testing/selftests/bpf/Makefile  |  10 +
  .../selftests/bpf/prog_tests/atomics.c| 246 ++
  tools/testing/selftests/bpf/progs/atomics.c   | 154 +++
  .../selftests/bpf/verifier/atomic_and.c   |  77 ++
  .../selftests/bpf/verifier/atomic_cmpxchg.c   |  96 +++
  .../selftests/bpf/verifier/atomic_fetch_add.c | 106 
  .../selftests/bpf/verifier/atomic_or.c|  77 ++
  .../selftests/bpf/verifier/atomic_xchg.c  |  46 
  .../selftests/bpf/verifier/atomic_xor.c   |  77 ++
  9 files changed, 889 insertions(+)
  create mode 100644 tools/testing/selftests/bpf/prog_tests/atomics.c
  create mode 100644 tools/testing/selftests/bpf/progs/atomics.c
  create mode 100644 tools/testing/selftests/bpf/verifier/atomic_and.c
  create mode 100644 tools/testing/selftests/bpf/verifier/atomic_cmpxchg.c
  create mode 100644 tools/testing/selftests/bpf/verifier/atomic_fetch_add.c
  create mode 100644 tools/testing/selftests/bpf/verifier/atomic_or.c
  create mode 100644 tools/testing/selftests/bpf/verifier/atomic_xchg.c
  create mode 100644 tools/testing/selftests/bpf/verifier/atomic_xor.c

diff --git a/tools/testing/selftests/bpf/Makefile 
b/tools/testing/selftests/bpf/Makefile
index ac25ba5d0d6c..13bc1d736164 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -239,6 +239,12 @@ BPF_CFLAGS = -g -D__TARGET_ARCH_$(SRCARCH) $(MENDIAN)  
\
  -I$(INCLUDE_DIR) -I$(CURDIR) -I$(APIDIR)   \
  -I$(abspath $(OUTPUT)/../usr/include)
+# BPF atomics support was added to Clang in llvm-project commit 286daafd6512
+# (release 12.0.0).
+BPF_ATOMICS_SUPPORTED = $(shell \
+   echo "int x = 0; int foo(void) { return __sync_val_compare_and_swap(, 1, 
2); }" \
+   | $(CLANG) -x cpp-output -S -target bpf -mcpu=v3 - -o /dev/null && echo 
1 || echo 0)


'-x c' here more intuitive?


+
  CLANG_CFLAGS = $(CLANG_SYS_INCLUDES) \
-Wno-compare-distinct-pointer-types
@@ -399,11 +405,15 @@ TRUNNER_EXTRA_FILES := $(OUTPUT)/urandom_read 
$(OUTPUT)/bpf_testmod.ko\
$(wildcard progs/btf_dump_test_case_*.c)
  TRUNNER_BPF_BUILD_RULE := CLANG_BPF_BUILD_RULE
  TRUNNER_BPF_CFLAGS := $(BPF_CFLAGS) $(CLANG_CFLAGS)
+ifeq ($(BPF_ATOMICS_SUPPORTED),1)
+  TRUNNER_BPF_CFLAGS += -DENABLE_ATOMICS_TESTS
+endif
  TRUNNER_BPF_LDFLAGS := -mattr=+alu32
  $(eval $(call DEFINE_TEST_RUNNER,test_progs))
  # Define test_progs-no_alu32 test runner.
  TRUNNER_BPF_BUILD_RULE := CLANG_NOALU32_BPF_BUILD_RULE
+TRUNNER_BPF_CFLAGS := $(BPF_CFLAGS) $(CLANG_CFLAGS)
  TRUNNER_BPF_LDFLAGS :=
  $(eval $(call DEFINE_TEST_RUNNER,test_progs,no_alu32))
diff --git a/tools/testing/selftests/bpf/prog_tests/atomics.c 
b/tools/testing/selftests/bpf/prog_tests/atomics.c
new file mode 100644
index ..c841a3abc2f7
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/atomics.c
@@ -0,0 +1,246 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include 
+
+#include "atomics.skel.h"
+
+static void test_add(struct atomics *skel)
+{
+   int err, prog_fd;
+   __u32 duration = 0, retval;
+   struct bpf_link *link;
+
+   link = bpf_program__attach(skel->progs.add);
+   if (

Re: [PATCH bpf-next v5 11/11] bpf: Document new atomic instructions

2020-12-16 Thread Yonghong Song




On 12/16/20 3:44 AM, Brendan Jackman wrote:

On Wed, 16 Dec 2020 at 08:08, Yonghong Song  wrote:




On 12/15/20 4:18 AM, Brendan Jackman wrote:

Document new atomic instructions.

Signed-off-by: Brendan Jackman 


Ack with minor comments below.

Acked-by: Yonghong Song 


---
   Documentation/networking/filter.rst | 26 ++
   1 file changed, 26 insertions(+)

diff --git a/Documentation/networking/filter.rst 
b/Documentation/networking/filter.rst
index 1583d59d806d..26d508a5e038 100644
--- a/Documentation/networking/filter.rst
+++ b/Documentation/networking/filter.rst
@@ -1053,6 +1053,32 @@ encoding.
  .imm = BPF_ADD, .code = BPF_ATOMIC | BPF_W  | BPF_STX: lock xadd *(u32 
*)(dst_reg + off16) += src_reg
  .imm = BPF_ADD, .code = BPF_ATOMIC | BPF_DW | BPF_STX: lock xadd *(u64 
*)(dst_reg + off16) += src_reg

+The basic atomic operations supported (from architecture v4 onwards) are:


Remove "(from architecture v4 onwards)".


Oops, thanks.


+
+BPF_ADD
+BPF_AND
+BPF_OR
+BPF_XOR
+
+Each having equivalent semantics with the ``BPF_ADD`` example, that is: the
+memory location addresed by ``dst_reg + off`` is atomically modified, with
+``src_reg`` as the other operand. If the ``BPF_FETCH`` flag is set in the
+immediate, then these operations also overwrite ``src_reg`` with the
+value that was in memory before it was modified.
+
+The more special operations are:
+
+BPF_XCHG
+
+This atomically exchanges ``src_reg`` with the value addressed by ``dst_reg +
+off``.
+
+BPF_CMPXCHG
+
+This atomically compares the value addressed by ``dst_reg + off`` with
+``R0``. If they match it is replaced with ``src_reg``, The value that was there
+before is loaded back to ``R0``.
+
   Note that 1 and 2 byte atomic operations are not supported.


Adding something like below.

Except xadd for legacy reason, all other 4 byte atomic operations
require alu32 mode.
The alu32 mode can be enabled with clang flags "-Xclang -target-feature
-Xclang +alu32" or "-mcpu=v3". The cpu version 3 has alu32 mode on by
default.


Thanks, I've written it as:

Except ``BPF_ADD`` _without_ ``BPF_FETCH`` (for legacy reasons), all 4
byte atomic operations require alu32 mode. Clang enables this mode by
default in architecture v3 (``-mcpu=v3``). For older versions it can
be enabled with ``-Xclang -target-feature -Xclang +alu32``.


Sounds good. thanks!





   You may encounter BPF_XADD - this is a legacy name for BPF_ATOMIC, referring 
to



Re: [PATCH bpf-next v4 10/11] bpf: Add tests for new BPF atomic operations

2020-12-15 Thread Yonghong Song




On 12/15/20 3:12 AM, Brendan Jackman wrote:

On Tue, Dec 08, 2020 at 10:15:35AM -0800, Yonghong Song wrote:



On 12/8/20 8:59 AM, Brendan Jackman wrote:

On Tue, Dec 08, 2020 at 08:38:04AM -0800, Yonghong Song wrote:



On 12/8/20 4:41 AM, Brendan Jackman wrote:

On Mon, Dec 07, 2020 at 07:18:57PM -0800, Yonghong Song wrote:



On 12/7/20 8:07 AM, Brendan Jackman wrote:

The prog_test that's added depends on Clang/LLVM features added by
Yonghong in commit 286daafd6512 (was https://reviews.llvm.org/D72184).

Note the use of a define called ENABLE_ATOMICS_TESTS: this is used
to:

 - Avoid breaking the build for people on old versions of Clang
 - Avoid needing separate lists of test objects for no_alu32, where
   atomics are not supported even if Clang has the feature.

The atomics_test.o BPF object is built unconditionally both for
test_progs and test_progs-no_alu32. For test_progs, if Clang supports
atomics, ENABLE_ATOMICS_TESTS is defined, so it includes the proper
test code. Otherwise, progs and global vars are defined anyway, as
stubs; this means that the skeleton user code still builds.

The atomics_test.o userspace object is built once and used for both
test_progs and test_progs-no_alu32. A variable called skip_tests is
defined in the BPF object's data section, which tells the userspace
object whether to skip the atomics test.

Signed-off-by: Brendan Jackman 


Ack with minor comments below.

Acked-by: Yonghong Song 


---
 tools/testing/selftests/bpf/Makefile  |  10 +
 .../selftests/bpf/prog_tests/atomics.c| 246 ++
 tools/testing/selftests/bpf/progs/atomics.c   | 154 +++
 .../selftests/bpf/verifier/atomic_and.c   |  77 ++
 .../selftests/bpf/verifier/atomic_cmpxchg.c   |  96 +++
 .../selftests/bpf/verifier/atomic_fetch_add.c | 106 
 .../selftests/bpf/verifier/atomic_or.c|  77 ++
 .../selftests/bpf/verifier/atomic_xchg.c  |  46 
 .../selftests/bpf/verifier/atomic_xor.c   |  77 ++
 9 files changed, 889 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/atomics.c
 create mode 100644 tools/testing/selftests/bpf/progs/atomics.c
 create mode 100644 tools/testing/selftests/bpf/verifier/atomic_and.c
 create mode 100644 tools/testing/selftests/bpf/verifier/atomic_cmpxchg.c
 create mode 100644 tools/testing/selftests/bpf/verifier/atomic_fetch_add.c
 create mode 100644 tools/testing/selftests/bpf/verifier/atomic_or.c
 create mode 100644 tools/testing/selftests/bpf/verifier/atomic_xchg.c
 create mode 100644 tools/testing/selftests/bpf/verifier/atomic_xor.c

diff --git a/tools/testing/selftests/bpf/Makefile 
b/tools/testing/selftests/bpf/Makefile
index ac25ba5d0d6c..13bc1d736164 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -239,6 +239,12 @@ BPF_CFLAGS = -g -D__TARGET_ARCH_$(SRCARCH) $(MENDIAN)  
\
 -I$(INCLUDE_DIR) -I$(CURDIR) -I$(APIDIR)   \
 -I$(abspath $(OUTPUT)/../usr/include)
+# BPF atomics support was added to Clang in llvm-project commit 286daafd6512
+# (release 12.0.0).
+BPF_ATOMICS_SUPPORTED = $(shell \
+   echo "int x = 0; int foo(void) { return __sync_val_compare_and_swap(, 1, 
2); }" \
+   | $(CLANG) -x cpp-output -S -target bpf -mcpu=v3 - -o /dev/null && echo 
1 || echo 0)


'-x c' here more intuitive?


+
 CLANG_CFLAGS = $(CLANG_SYS_INCLUDES) \
   -Wno-compare-distinct-pointer-types
@@ -399,11 +405,15 @@ TRUNNER_EXTRA_FILES := $(OUTPUT)/urandom_read 
$(OUTPUT)/bpf_testmod.ko\
   $(wildcard progs/btf_dump_test_case_*.c)
 TRUNNER_BPF_BUILD_RULE := CLANG_BPF_BUILD_RULE
 TRUNNER_BPF_CFLAGS := $(BPF_CFLAGS) $(CLANG_CFLAGS)
+ifeq ($(BPF_ATOMICS_SUPPORTED),1)
+  TRUNNER_BPF_CFLAGS += -DENABLE_ATOMICS_TESTS
+endif
 TRUNNER_BPF_LDFLAGS := -mattr=+alu32
 $(eval $(call DEFINE_TEST_RUNNER,test_progs))
 # Define test_progs-no_alu32 test runner.
 TRUNNER_BPF_BUILD_RULE := CLANG_NOALU32_BPF_BUILD_RULE
+TRUNNER_BPF_CFLAGS := $(BPF_CFLAGS) $(CLANG_CFLAGS)
 TRUNNER_BPF_LDFLAGS :=
 $(eval $(call DEFINE_TEST_RUNNER,test_progs,no_alu32))
diff --git a/tools/testing/selftests/bpf/prog_tests/atomics.c 
b/tools/testing/selftests/bpf/prog_tests/atomics.c
new file mode 100644
index ..c841a3abc2f7
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/atomics.c
@@ -0,0 +1,246 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include 
+
+#include "atomics.skel.h"
+
+static void test_add(struct atomics *skel)
+{
+   int err, prog_fd;
+   __u32 duration = 0, retval;
+   struct bpf_link *link;
+
+   link = bpf_program__attach(skel->progs.add);
+   if (CHECK(IS_ERR(link), "attach(add)", "err: %ld\n", PTR_ERR(link)))
+   return;
+
+   prog

Re: [PATCH bpf-next v5 11/11] bpf: Document new atomic instructions

2020-12-15 Thread Yonghong Song




On 12/15/20 4:18 AM, Brendan Jackman wrote:

Document new atomic instructions.

Signed-off-by: Brendan Jackman 


Ack with minor comments below.

Acked-by: Yonghong Song 


---
  Documentation/networking/filter.rst | 26 ++
  1 file changed, 26 insertions(+)

diff --git a/Documentation/networking/filter.rst 
b/Documentation/networking/filter.rst
index 1583d59d806d..26d508a5e038 100644
--- a/Documentation/networking/filter.rst
+++ b/Documentation/networking/filter.rst
@@ -1053,6 +1053,32 @@ encoding.
 .imm = BPF_ADD, .code = BPF_ATOMIC | BPF_W  | BPF_STX: lock xadd *(u32 
*)(dst_reg + off16) += src_reg
 .imm = BPF_ADD, .code = BPF_ATOMIC | BPF_DW | BPF_STX: lock xadd *(u64 
*)(dst_reg + off16) += src_reg
  
+The basic atomic operations supported (from architecture v4 onwards) are:


Remove "(from architecture v4 onwards)".


+
+BPF_ADD
+BPF_AND
+BPF_OR
+BPF_XOR
+
+Each having equivalent semantics with the ``BPF_ADD`` example, that is: the
+memory location addresed by ``dst_reg + off`` is atomically modified, with
+``src_reg`` as the other operand. If the ``BPF_FETCH`` flag is set in the
+immediate, then these operations also overwrite ``src_reg`` with the
+value that was in memory before it was modified.
+
+The more special operations are:
+
+BPF_XCHG
+
+This atomically exchanges ``src_reg`` with the value addressed by ``dst_reg +
+off``.
+
+BPF_CMPXCHG
+
+This atomically compares the value addressed by ``dst_reg + off`` with
+``R0``. If they match it is replaced with ``src_reg``, The value that was there
+before is loaded back to ``R0``.
+
  Note that 1 and 2 byte atomic operations are not supported.


Adding something like below.

Except xadd for legacy reason, all other 4 byte atomic operations 
require alu32 mode.
The alu32 mode can be enabled with clang flags "-Xclang -target-feature 
-Xclang +alu32" or "-mcpu=v3". The cpu version 3 has alu32 mode on by 
default.


  
  You may encounter BPF_XADD - this is a legacy name for BPF_ATOMIC, referring to




Re: [PATCH bpf-next v5 09/11] bpf: Add bitwise atomic instructions

2020-12-15 Thread Yonghong Song




On 12/15/20 4:18 AM, Brendan Jackman wrote:

This adds instructions for

atomic[64]_[fetch_]and
atomic[64]_[fetch_]or
atomic[64]_[fetch_]xor

All these operations are isomorphic enough to implement with the same
verifier, interpreter, and x86 JIT code, hence being a single commit.

The main interesting thing here is that x86 doesn't directly support
the fetch_ version these operations, so we need to generate a CMPXCHG
loop in the JIT. This requires the use of two temporary registers,
IIUC it's safe to use BPF_REG_AX and x86's AUX_REG for this purpose.

Signed-off-by: Brendan Jackman 


Acked-by: Yonghong Song 


Re: [PATCH bpf-next v5 07/11] bpf: Add instructions for atomic_[cmp]xchg

2020-12-15 Thread Yonghong Song




On 12/15/20 4:18 AM, Brendan Jackman wrote:

This adds two atomic opcodes, both of which include the BPF_FETCH
flag. XCHG without the BPF_FETCH flag would naturally encode
atomic_set. This is not supported because it would be of limited
value to userspace (it doesn't imply any barriers). CMPXCHG without
BPF_FETCH woulud be an atomic compare-and-write. We don't have such
an operation in the kernel so it isn't provided to BPF either.

There are two significant design decisions made for the CMPXCHG
instruction:

  - To solve the issue that this operation fundamentally has 3
operands, but we only have two register fields. Therefore the
operand we compare against (the kernel's API calls it 'old') is
hard-coded to be R0. x86 has similar design (and A64 doesn't
have this problem).

A potential alternative might be to encode the other operand's
register number in the immediate field.

  - The kernel's atomic_cmpxchg returns the old value, while the C11
userspace APIs return a boolean indicating the comparison
result. Which should BPF do? A64 returns the old value. x86 returns
the old value in the hard-coded register (and also sets a
flag). That means return-old-value is easier to JIT, so that's
what we use.

Signed-off-by: Brendan Jackman 


Ack with a minor comment below.

Acked-by: Yonghong Song 


---
  arch/x86/net/bpf_jit_comp.c|  8 
  include/linux/filter.h |  2 ++
  include/uapi/linux/bpf.h   |  4 +++-
  kernel/bpf/core.c  | 20 
  kernel/bpf/disasm.c| 15 +++
  kernel/bpf/verifier.c  | 19 +--
  tools/include/linux/filter.h   |  2 ++
  tools/include/uapi/linux/bpf.h |  4 +++-
  8 files changed, 70 insertions(+), 4 deletions(-)

diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index eea7d8b0bb12..308241187582 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -815,6 +815,14 @@ static int emit_atomic(u8 **pprog, u8 atomic_op,
/* src_reg = atomic_fetch_add(dst_reg + off, src_reg); */
EMIT2(0x0F, 0xC1);
break;
+   case BPF_XCHG:
+   /* src_reg = atomic_xchg(dst_reg + off, src_reg); */
+   EMIT1(0x87);
+   break;
+   case BPF_CMPXCHG:
+   /* r0 = atomic_cmpxchg(dst_reg + off, r0, src_reg); */
+   EMIT2(0x0F, 0xB1);
+   break;
default:
pr_err("bpf_jit: unknown atomic opcode %02x\n", atomic_op);
return -EFAULT;
diff --git a/include/linux/filter.h b/include/linux/filter.h
index c3e87a63e0b8..16e0ba5e8937 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -265,6 +265,8 @@ static inline bool insn_is_zext(const struct bpf_insn *insn)
   *
   *   BPF_ADD  *(uint *) (dst_reg + off16) += src_reg
   *   BPF_ADD | BPF_FETCH  src_reg = atomic_fetch_add(dst_reg + off16, 
src_reg);
+ *   BPF_XCHG src_reg = atomic_xchg(dst_reg + off16, src_reg)
+ *   BPF_CMPXCHG  r0 = atomic_cmpxchg(dst_reg + off16, r0, src_reg)
   */
  
  #define BPF_ATOMIC_OP(SIZE, OP, DST, SRC, OFF)			\

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 760ae333a5ed..538b95472c8f 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -45,7 +45,9 @@
  #define BPF_EXIT  0x90/* function return */
  
  /* atomic op type fields (stored in immediate) */

-#define BPF_FETCH  0x01/* fetch previous value into src reg */
+#define BPF_XCHG   (0xe0 | BPF_FETCH)  /* atomic exchange */
+#define BPF_CMPXCHG(0xf0 | BPF_FETCH)  /* atomic compare-and-write */
+#define BPF_FETCH  0x01/* not an opcode on its own, used to build 
others */


Although the above code works fine, I would suggest to put
BPF_FETCH definition before BPF_XCHG and BPF_CMPXCHG, which
makes more sense intuitively.

  
  /* Register numbers */

  enum {
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c

[...]   \

diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 760ae333a5ed..538b95472c8f 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -45,7 +45,9 @@
  #define BPF_EXIT  0x90/* function return */
  
  /* atomic op type fields (stored in immediate) */

-#define BPF_FETCH  0x01/* fetch previous value into src reg */
+#define BPF_XCHG   (0xe0 | BPF_FETCH)  /* atomic exchange */
+#define BPF_CMPXCHG(0xf0 | BPF_FETCH)  /* atomic compare-and-write */
+#define BPF_FETCH  0x01/* not an opcode on its own, used to build 
others */


same here.

  
  /* Register numbers */

  enum {



Re: [PATCH bpf-next 1/2] bpf: Add a bpf_kallsyms_lookup helper

2020-12-13 Thread Yonghong Song




On 12/11/20 6:40 AM, Florent Revest wrote:

On Wed, Dec 2, 2020 at 10:18 PM Alexei Starovoitov
 wrote:

I still think that adopting printk/vsnprintf for this instead of
reinventing the wheel
is more flexible and easier to maintain long term.
Almost the same layout can be done with vsnprintf
with exception of \0 char.
More meaningful names, etc.
See Documentation/core-api/printk-formats.rst


I agree this would be nice. I finally got a bit of time to experiment
with this and I noticed a few things:

First of all, because helpers only have 5 arguments, if we use two for
the output buffer and its size and two for the format string and its
size, we are only left with one argument for a modifier. This is still
enough for our usecase (where we'd only use "%ps" for example) but it
does not strictly-speaking allow for the same layout that Andrii
proposed.


See helper bpf_seq_printf. It packs all arguments for format string and
puts them into an array. bpf_seq_printf will unpack them as it parsed
through the format string. So it should be doable to have more than
"%ps" in format string.




If we force fmt to come from readonly map then bpf_trace_printk()-like
run-time check of fmt string can be moved into load time check
and performance won't suffer.


Regarding this bit, I have the impression that this would not be
possible, but maybe I'm missing something ? :)

The iteration that bpf_trace_printk does over the format string
argument is not only used for validation. It is also used to remember
what extra operations need to be done based on the modifier types. For
example, it remembers whether an arg should be interpreted as 32bits or
64bits. In the case of string printing, it also remembers whether it is
a kernel-space or user-space pointer so that bpf_trace_copy_string can
be called with the right arg. If we were to run the iteration over the format
string in the verifier, how would you recommend that we
"remember" the modifier type until the helper gets called ?



Re: [PATCH bpf-next v4 10/11] bpf: Add tests for new BPF atomic operations

2020-12-08 Thread Yonghong Song




On 12/8/20 8:59 AM, Brendan Jackman wrote:

On Tue, Dec 08, 2020 at 08:38:04AM -0800, Yonghong Song wrote:



On 12/8/20 4:41 AM, Brendan Jackman wrote:

On Mon, Dec 07, 2020 at 07:18:57PM -0800, Yonghong Song wrote:



On 12/7/20 8:07 AM, Brendan Jackman wrote:

The prog_test that's added depends on Clang/LLVM features added by
Yonghong in commit 286daafd6512 (was https://reviews.llvm.org/D72184   ).

Note the use of a define called ENABLE_ATOMICS_TESTS: this is used
to:

- Avoid breaking the build for people on old versions of Clang
- Avoid needing separate lists of test objects for no_alu32, where
  atomics are not supported even if Clang has the feature.

The atomics_test.o BPF object is built unconditionally both for
test_progs and test_progs-no_alu32. For test_progs, if Clang supports
atomics, ENABLE_ATOMICS_TESTS is defined, so it includes the proper
test code. Otherwise, progs and global vars are defined anyway, as
stubs; this means that the skeleton user code still builds.

The atomics_test.o userspace object is built once and used for both
test_progs and test_progs-no_alu32. A variable called skip_tests is
defined in the BPF object's data section, which tells the userspace
object whether to skip the atomics test.

Signed-off-by: Brendan Jackman 


Ack with minor comments below.

Acked-by: Yonghong Song 


---
tools/testing/selftests/bpf/Makefile  |  10 +
.../selftests/bpf/prog_tests/atomics.c| 246 ++
tools/testing/selftests/bpf/progs/atomics.c   | 154 +++
.../selftests/bpf/verifier/atomic_and.c   |  77 ++
.../selftests/bpf/verifier/atomic_cmpxchg.c   |  96 +++
.../selftests/bpf/verifier/atomic_fetch_add.c | 106 
.../selftests/bpf/verifier/atomic_or.c|  77 ++
.../selftests/bpf/verifier/atomic_xchg.c  |  46 
.../selftests/bpf/verifier/atomic_xor.c   |  77 ++
9 files changed, 889 insertions(+)
create mode 100644 tools/testing/selftests/bpf/prog_tests/atomics.c
create mode 100644 tools/testing/selftests/bpf/progs/atomics.c
create mode 100644 tools/testing/selftests/bpf/verifier/atomic_and.c
create mode 100644 tools/testing/selftests/bpf/verifier/atomic_cmpxchg.c
create mode 100644 tools/testing/selftests/bpf/verifier/atomic_fetch_add.c
create mode 100644 tools/testing/selftests/bpf/verifier/atomic_or.c
create mode 100644 tools/testing/selftests/bpf/verifier/atomic_xchg.c
create mode 100644 tools/testing/selftests/bpf/verifier/atomic_xor.c

diff --git a/tools/testing/selftests/bpf/Makefile 
b/tools/testing/selftests/bpf/Makefile
index ac25ba5d0d6c..13bc1d736164 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -239,6 +239,12 @@ BPF_CFLAGS = -g -D__TARGET_ARCH_$(SRCARCH) $(MENDIAN)  
\
 -I$(INCLUDE_DIR) -I$(CURDIR) -I$(APIDIR)   \
 -I$(abspath $(OUTPUT)/../usr/include)
+# BPF atomics support was added to Clang in llvm-project commit 286daafd6512
+# (release 12.0.0).
+BPF_ATOMICS_SUPPORTED = $(shell \
+   echo "int x = 0; int foo(void) { return __sync_val_compare_and_swap(, 1, 
2); }" \
+   | $(CLANG) -x cpp-output -S -target bpf -mcpu=v3 - -o /dev/null && echo 
1 || echo 0)


'-x c' here more intuitive?


+
CLANG_CFLAGS = $(CLANG_SYS_INCLUDES) \
   -Wno-compare-distinct-pointer-types
@@ -399,11 +405,15 @@ TRUNNER_EXTRA_FILES := $(OUTPUT)/urandom_read 
$(OUTPUT)/bpf_testmod.ko\
   $(wildcard progs/btf_dump_test_case_*.c)
TRUNNER_BPF_BUILD_RULE := CLANG_BPF_BUILD_RULE
TRUNNER_BPF_CFLAGS := $(BPF_CFLAGS) $(CLANG_CFLAGS)
+ifeq ($(BPF_ATOMICS_SUPPORTED),1)
+  TRUNNER_BPF_CFLAGS += -DENABLE_ATOMICS_TESTS
+endif
TRUNNER_BPF_LDFLAGS := -mattr=+alu32
$(eval $(call DEFINE_TEST_RUNNER,test_progs))
# Define test_progs-no_alu32 test runner.
TRUNNER_BPF_BUILD_RULE := CLANG_NOALU32_BPF_BUILD_RULE
+TRUNNER_BPF_CFLAGS := $(BPF_CFLAGS) $(CLANG_CFLAGS)
TRUNNER_BPF_LDFLAGS :=
$(eval $(call DEFINE_TEST_RUNNER,test_progs,no_alu32))
diff --git a/tools/testing/selftests/bpf/prog_tests/atomics.c 
b/tools/testing/selftests/bpf/prog_tests/atomics.c
new file mode 100644
index ..c841a3abc2f7
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/atomics.c
@@ -0,0 +1,246 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include 
+
+#include "atomics.skel.h"
+
+static void test_add(struct atomics *skel)
+{
+   int err, prog_fd;
+   __u32 duration = 0, retval;
+   struct bpf_link *link;
+
+   link = bpf_program__attach(skel->progs.add);
+   if (CHECK(IS_ERR(link), "attach(add)", "err: %ld\n", PTR_ERR(link)))
+   return;
+
+   prog_fd = bpf_program__fd(skel->progs.add);
+   err = bpf_prog_test_run(prog_fd, 1, NULL, 0,
+   NULL, NUL

Re: [PATCH bpf-next v4 10/11] bpf: Add tests for new BPF atomic operations

2020-12-08 Thread Yonghong Song




On 12/8/20 4:41 AM, Brendan Jackman wrote:

On Mon, Dec 07, 2020 at 07:18:57PM -0800, Yonghong Song wrote:



On 12/7/20 8:07 AM, Brendan Jackman wrote:

The prog_test that's added depends on Clang/LLVM features added by
Yonghong in commit 286daafd6512 (was https://reviews.llvm.org/D72184  ).

Note the use of a define called ENABLE_ATOMICS_TESTS: this is used
to:

   - Avoid breaking the build for people on old versions of Clang
   - Avoid needing separate lists of test objects for no_alu32, where
 atomics are not supported even if Clang has the feature.

The atomics_test.o BPF object is built unconditionally both for
test_progs and test_progs-no_alu32. For test_progs, if Clang supports
atomics, ENABLE_ATOMICS_TESTS is defined, so it includes the proper
test code. Otherwise, progs and global vars are defined anyway, as
stubs; this means that the skeleton user code still builds.

The atomics_test.o userspace object is built once and used for both
test_progs and test_progs-no_alu32. A variable called skip_tests is
defined in the BPF object's data section, which tells the userspace
object whether to skip the atomics test.

Signed-off-by: Brendan Jackman 


Ack with minor comments below.

Acked-by: Yonghong Song 


---
   tools/testing/selftests/bpf/Makefile  |  10 +
   .../selftests/bpf/prog_tests/atomics.c| 246 ++
   tools/testing/selftests/bpf/progs/atomics.c   | 154 +++
   .../selftests/bpf/verifier/atomic_and.c   |  77 ++
   .../selftests/bpf/verifier/atomic_cmpxchg.c   |  96 +++
   .../selftests/bpf/verifier/atomic_fetch_add.c | 106 
   .../selftests/bpf/verifier/atomic_or.c|  77 ++
   .../selftests/bpf/verifier/atomic_xchg.c  |  46 
   .../selftests/bpf/verifier/atomic_xor.c   |  77 ++
   9 files changed, 889 insertions(+)
   create mode 100644 tools/testing/selftests/bpf/prog_tests/atomics.c
   create mode 100644 tools/testing/selftests/bpf/progs/atomics.c
   create mode 100644 tools/testing/selftests/bpf/verifier/atomic_and.c
   create mode 100644 tools/testing/selftests/bpf/verifier/atomic_cmpxchg.c
   create mode 100644 tools/testing/selftests/bpf/verifier/atomic_fetch_add.c
   create mode 100644 tools/testing/selftests/bpf/verifier/atomic_or.c
   create mode 100644 tools/testing/selftests/bpf/verifier/atomic_xchg.c
   create mode 100644 tools/testing/selftests/bpf/verifier/atomic_xor.c

diff --git a/tools/testing/selftests/bpf/Makefile 
b/tools/testing/selftests/bpf/Makefile
index ac25ba5d0d6c..13bc1d736164 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -239,6 +239,12 @@ BPF_CFLAGS = -g -D__TARGET_ARCH_$(SRCARCH) $(MENDIAN)  
\
 -I$(INCLUDE_DIR) -I$(CURDIR) -I$(APIDIR)   \
 -I$(abspath $(OUTPUT)/../usr/include)
+# BPF atomics support was added to Clang in llvm-project commit 286daafd6512
+# (release 12.0.0).
+BPF_ATOMICS_SUPPORTED = $(shell \
+   echo "int x = 0; int foo(void) { return __sync_val_compare_and_swap(, 1, 
2); }" \
+   | $(CLANG) -x cpp-output -S -target bpf -mcpu=v3 - -o /dev/null && echo 
1 || echo 0)


'-x c' here more intuitive?


+
   CLANG_CFLAGS = $(CLANG_SYS_INCLUDES) \
   -Wno-compare-distinct-pointer-types
@@ -399,11 +405,15 @@ TRUNNER_EXTRA_FILES := $(OUTPUT)/urandom_read 
$(OUTPUT)/bpf_testmod.ko\
   $(wildcard progs/btf_dump_test_case_*.c)
   TRUNNER_BPF_BUILD_RULE := CLANG_BPF_BUILD_RULE
   TRUNNER_BPF_CFLAGS := $(BPF_CFLAGS) $(CLANG_CFLAGS)
+ifeq ($(BPF_ATOMICS_SUPPORTED),1)
+  TRUNNER_BPF_CFLAGS += -DENABLE_ATOMICS_TESTS
+endif
   TRUNNER_BPF_LDFLAGS := -mattr=+alu32
   $(eval $(call DEFINE_TEST_RUNNER,test_progs))
   # Define test_progs-no_alu32 test runner.
   TRUNNER_BPF_BUILD_RULE := CLANG_NOALU32_BPF_BUILD_RULE
+TRUNNER_BPF_CFLAGS := $(BPF_CFLAGS) $(CLANG_CFLAGS)
   TRUNNER_BPF_LDFLAGS :=
   $(eval $(call DEFINE_TEST_RUNNER,test_progs,no_alu32))
diff --git a/tools/testing/selftests/bpf/prog_tests/atomics.c 
b/tools/testing/selftests/bpf/prog_tests/atomics.c
new file mode 100644
index ..c841a3abc2f7
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/atomics.c
@@ -0,0 +1,246 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include 
+
+#include "atomics.skel.h"
+
+static void test_add(struct atomics *skel)
+{
+   int err, prog_fd;
+   __u32 duration = 0, retval;
+   struct bpf_link *link;
+
+   link = bpf_program__attach(skel->progs.add);
+   if (CHECK(IS_ERR(link), "attach(add)", "err: %ld\n", PTR_ERR(link)))
+   return;
+
+   prog_fd = bpf_program__fd(skel->progs.add);
+   err = bpf_prog_test_run(prog_fd, 1, NULL, 0,
+   NULL, NULL, , );
+   if (CHECK(err || retval, "test_run add",
+ "err %d errno %d retval %d duration %d\n", err, errno, 
retva

Re: [PATCH bpf-next v4 11/11] bpf: Document new atomic instructions

2020-12-07 Thread Yonghong Song




On 12/7/20 8:07 AM, Brendan Jackman wrote:

Document new atomic instructions.

Signed-off-by: Brendan Jackman 


Ack with minor comments below.

Acked-by: Yonghong Song 


---
  Documentation/networking/filter.rst | 26 ++
  1 file changed, 26 insertions(+)

diff --git a/Documentation/networking/filter.rst 
b/Documentation/networking/filter.rst
index 1583d59d806d..26d508a5e038 100644
--- a/Documentation/networking/filter.rst
+++ b/Documentation/networking/filter.rst
@@ -1053,6 +1053,32 @@ encoding.
 .imm = BPF_ADD, .code = BPF_ATOMIC | BPF_W  | BPF_STX: lock xadd *(u32 
*)(dst_reg + off16) += src_reg
 .imm = BPF_ADD, .code = BPF_ATOMIC | BPF_DW | BPF_STX: lock xadd *(u64 
*)(dst_reg + off16) += src_reg
  
+The basic atomic operations supported (from architecture v4 onwards) are:


No "v4" any more. Just say
  The basic atomic operations supported are:


+
+BPF_ADD
+BPF_AND
+BPF_OR
+BPF_XOR
+
+Each having equivalent semantics with the ``BPF_ADD`` example, that is: the
+memory location addresed by ``dst_reg + off`` is atomically modified, with
+``src_reg`` as the other operand. If the ``BPF_FETCH`` flag is set in the
+immediate, then these operations also overwrite ``src_reg`` with the
+value that was in memory before it was modified.


For 4-byte operations, except BPF_ADD, alu32 mode is required.
alu32 is implied with -mcpu=v3.


+
+The more special operations are:
+
+BPF_XCHG
+
+This atomically exchanges ``src_reg`` with the value addressed by ``dst_reg +
+off``.
+
+BPF_CMPXCHG
+
+This atomically compares the value addressed by ``dst_reg + off`` with
+``R0``. If they match it is replaced with ``src_reg``, The value that was there
+before is loaded back to ``R0``.
+
  Note that 1 and 2 byte atomic operations are not supported.
  
  You may encounter BPF_XADD - this is a legacy name for BPF_ATOMIC, referring to




Re: [PATCH bpf-next v4 10/11] bpf: Add tests for new BPF atomic operations

2020-12-07 Thread Yonghong Song




On 12/7/20 8:07 AM, Brendan Jackman wrote:

The prog_test that's added depends on Clang/LLVM features added by
Yonghong in commit 286daafd6512 (was https://reviews.llvm.org/D72184 ).

Note the use of a define called ENABLE_ATOMICS_TESTS: this is used
to:

  - Avoid breaking the build for people on old versions of Clang
  - Avoid needing separate lists of test objects for no_alu32, where
atomics are not supported even if Clang has the feature.

The atomics_test.o BPF object is built unconditionally both for
test_progs and test_progs-no_alu32. For test_progs, if Clang supports
atomics, ENABLE_ATOMICS_TESTS is defined, so it includes the proper
test code. Otherwise, progs and global vars are defined anyway, as
stubs; this means that the skeleton user code still builds.

The atomics_test.o userspace object is built once and used for both
test_progs and test_progs-no_alu32. A variable called skip_tests is
defined in the BPF object's data section, which tells the userspace
object whether to skip the atomics test.

Signed-off-by: Brendan Jackman 


Ack with minor comments below.

Acked-by: Yonghong Song 


---
  tools/testing/selftests/bpf/Makefile  |  10 +
  .../selftests/bpf/prog_tests/atomics.c| 246 ++
  tools/testing/selftests/bpf/progs/atomics.c   | 154 +++
  .../selftests/bpf/verifier/atomic_and.c   |  77 ++
  .../selftests/bpf/verifier/atomic_cmpxchg.c   |  96 +++
  .../selftests/bpf/verifier/atomic_fetch_add.c | 106 
  .../selftests/bpf/verifier/atomic_or.c|  77 ++
  .../selftests/bpf/verifier/atomic_xchg.c  |  46 
  .../selftests/bpf/verifier/atomic_xor.c   |  77 ++
  9 files changed, 889 insertions(+)
  create mode 100644 tools/testing/selftests/bpf/prog_tests/atomics.c
  create mode 100644 tools/testing/selftests/bpf/progs/atomics.c
  create mode 100644 tools/testing/selftests/bpf/verifier/atomic_and.c
  create mode 100644 tools/testing/selftests/bpf/verifier/atomic_cmpxchg.c
  create mode 100644 tools/testing/selftests/bpf/verifier/atomic_fetch_add.c
  create mode 100644 tools/testing/selftests/bpf/verifier/atomic_or.c
  create mode 100644 tools/testing/selftests/bpf/verifier/atomic_xchg.c
  create mode 100644 tools/testing/selftests/bpf/verifier/atomic_xor.c

diff --git a/tools/testing/selftests/bpf/Makefile 
b/tools/testing/selftests/bpf/Makefile
index ac25ba5d0d6c..13bc1d736164 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -239,6 +239,12 @@ BPF_CFLAGS = -g -D__TARGET_ARCH_$(SRCARCH) $(MENDIAN)  
\
 -I$(INCLUDE_DIR) -I$(CURDIR) -I$(APIDIR)   \
 -I$(abspath $(OUTPUT)/../usr/include)
  
+# BPF atomics support was added to Clang in llvm-project commit 286daafd6512

+# (release 12.0.0).
+BPF_ATOMICS_SUPPORTED = $(shell \
+   echo "int x = 0; int foo(void) { return __sync_val_compare_and_swap(, 1, 
2); }" \
+   | $(CLANG) -x cpp-output -S -target bpf -mcpu=v3 - -o /dev/null && echo 
1 || echo 0)


'-x c' here more intuitive?


+
  CLANG_CFLAGS = $(CLANG_SYS_INCLUDES) \
   -Wno-compare-distinct-pointer-types
  
@@ -399,11 +405,15 @@ TRUNNER_EXTRA_FILES := $(OUTPUT)/urandom_read $(OUTPUT)/bpf_testmod.ko	\

   $(wildcard progs/btf_dump_test_case_*.c)
  TRUNNER_BPF_BUILD_RULE := CLANG_BPF_BUILD_RULE
  TRUNNER_BPF_CFLAGS := $(BPF_CFLAGS) $(CLANG_CFLAGS)
+ifeq ($(BPF_ATOMICS_SUPPORTED),1)
+  TRUNNER_BPF_CFLAGS += -DENABLE_ATOMICS_TESTS
+endif
  TRUNNER_BPF_LDFLAGS := -mattr=+alu32
  $(eval $(call DEFINE_TEST_RUNNER,test_progs))
  
  # Define test_progs-no_alu32 test runner.

  TRUNNER_BPF_BUILD_RULE := CLANG_NOALU32_BPF_BUILD_RULE
+TRUNNER_BPF_CFLAGS := $(BPF_CFLAGS) $(CLANG_CFLAGS)
  TRUNNER_BPF_LDFLAGS :=
  $(eval $(call DEFINE_TEST_RUNNER,test_progs,no_alu32))
  
diff --git a/tools/testing/selftests/bpf/prog_tests/atomics.c b/tools/testing/selftests/bpf/prog_tests/atomics.c

new file mode 100644
index ..c841a3abc2f7
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/atomics.c
@@ -0,0 +1,246 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include 
+
+#include "atomics.skel.h"
+
+static void test_add(struct atomics *skel)
+{
+   int err, prog_fd;
+   __u32 duration = 0, retval;
+   struct bpf_link *link;
+
+   link = bpf_program__attach(skel->progs.add);
+   if (CHECK(IS_ERR(link), "attach(add)", "err: %ld\n", PTR_ERR(link)))
+   return;
+
+   prog_fd = bpf_program__fd(skel->progs.add);
+   err = bpf_prog_test_run(prog_fd, 1, NULL, 0,
+   NULL, NULL, , );
+   if (CHECK(err || retval, "test_run add",
+ "err %d errno %d retval %d duration %d\n", err, errno, 
retval, duration))
+   goto cleanup;
+
+   ASSERT_EQ(skel->data->add64_value, 3, "add64_value");
+  

Re: [PATCH bpf-next v4 09/11] bpf: Add bitwise atomic instructions

2020-12-07 Thread Yonghong Song




On 12/7/20 8:07 AM, Brendan Jackman wrote:

This adds instructions for

atomic[64]_[fetch_]and
atomic[64]_[fetch_]or
atomic[64]_[fetch_]xor

All these operations are isomorphic enough to implement with the same
verifier, interpreter, and x86 JIT code, hence being a single commit.

The main interesting thing here is that x86 doesn't directly support
the fetch_ version these operations, so we need to generate a CMPXCHG
loop in the JIT. This requires the use of two temporary registers,
IIUC it's safe to use BPF_REG_AX and x86's AUX_REG for this purpose.

Signed-off-by: Brendan Jackman 
---
  arch/x86/net/bpf_jit_comp.c  | 50 ++-
  include/linux/filter.h   | 66 
  kernel/bpf/core.c|  3 ++
  kernel/bpf/disasm.c  | 21 +---
  kernel/bpf/verifier.c|  6 
  tools/include/linux/filter.h | 66 
  6 files changed, 207 insertions(+), 5 deletions(-)

diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 308241187582..1d4d50199293 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -808,6 +808,10 @@ static int emit_atomic(u8 **pprog, u8 atomic_op,
/* emit opcode */
switch (atomic_op) {
case BPF_ADD:
+   case BPF_SUB:
+   case BPF_AND:
+   case BPF_OR:
+   case BPF_XOR:
/* lock *(u32/u64*)(dst_reg + off) = src_reg */
EMIT1(simple_alu_opcodes[atomic_op]);
break;

[...]

diff --git a/include/linux/filter.h b/include/linux/filter.h
index e1e1fc946a7c..e100c71555a4 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -264,7 +264,13 @@ static inline bool insn_is_zext(const struct bpf_insn 
*insn)
   * Atomic operations:
   *
   *   BPF_ADD  *(uint *) (dst_reg + off16) += src_reg
+ *   BPF_AND  *(uint *) (dst_reg + off16) &= src_reg
+ *   BPF_OR   *(uint *) (dst_reg + off16) |= src_reg
+ *   BPF_XOR  *(uint *) (dst_reg + off16) ^= src_reg
   *   BPF_ADD | BPF_FETCH  src_reg = atomic_fetch_add(dst_reg + off16, 
src_reg);
+ *   BPF_AND | BPF_FETCH  src_reg = atomic_fetch_and(dst_reg + off16, 
src_reg);
+ *   BPF_OR | BPF_FETCH   src_reg = atomic_fetch_or(dst_reg + off16, 
src_reg);
+ *   BPF_XOR | BPF_FETCH  src_reg = atomic_fetch_xor(dst_reg + off16, 
src_reg);
   *   BPF_XCHG src_reg = atomic_xchg(dst_reg + off16, src_reg)
   *   BPF_CMPXCHG  r0 = atomic_cmpxchg(dst_reg + off16, r0, 
src_reg)
   */
@@ -295,6 +301,66 @@ static inline bool insn_is_zext(const struct bpf_insn 
*insn)
.off   = OFF,   \
.imm   = BPF_ADD })
  
+/* Atomic memory and, *(uint *)(dst_reg + off16) &= src_reg */

+
+#define BPF_ATOMIC_AND(SIZE, DST, SRC, OFF)\
+   ((struct bpf_insn) {\
+   .code  = BPF_STX | BPF_SIZE(SIZE) | BPF_ATOMIC, \
+   .dst_reg = DST, \
+   .src_reg = SRC, \
+   .off   = OFF,   \
+   .imm   = BPF_AND })
+
+/* Atomic memory and with fetch, src_reg = atomic_fetch_and(dst_reg + off, 
src_reg); */
+
+#define BPF_ATOMIC_FETCH_AND(SIZE, DST, SRC, OFF)  \
+   ((struct bpf_insn) {\
+   .code  = BPF_STX | BPF_SIZE(SIZE) | BPF_ATOMIC, \
+   .dst_reg = DST, \
+   .src_reg = SRC, \
+   .off   = OFF,   \
+   .imm   = BPF_AND | BPF_FETCH })
+
+/* Atomic memory or, *(uint *)(dst_reg + off16) |= src_reg */
+
+#define BPF_ATOMIC_OR(SIZE, DST, SRC, OFF) \
+   ((struct bpf_insn) {\
+   .code  = BPF_STX | BPF_SIZE(SIZE) | BPF_ATOMIC, \
+   .dst_reg = DST, \
+   .src_reg = SRC, \
+   .off   = OFF,   \
+   .imm   = BPF_OR })
+
+/* Atomic memory or with fetch, src_reg = atomic_fetch_or(dst_reg + off, 
src_reg); */
+
+#define BPF_ATOMIC_FETCH_OR(SIZE, DST, SRC, OFF)   \
+   ((struct bpf_insn) {\
+   .code  = BPF_STX | BPF_SIZE(SIZE) | BPF_ATOMIC, \
+   .dst_reg = DST, \
+   .src_reg = SRC, \
+   .off   = OFF,   \
+   .imm   = BPF_OR | BPF_FETCH })
+
+/* Atomic memory xor, *(uint *)(dst_reg + off16) ^= src_reg */
+
+#define BPF_ATOMIC_XOR(SIZE, DST, SRC, OFF)   

Re: [PATCH bpf-next v4 07/11] bpf: Add instructions for atomic_[cmp]xchg

2020-12-07 Thread Yonghong Song




On 12/7/20 8:07 AM, Brendan Jackman wrote:

This adds two atomic opcodes, both of which include the BPF_FETCH
flag. XCHG without the BPF_FETCH flag would naturally encode
atomic_set. This is not supported because it would be of limited
value to userspace (it doesn't imply any barriers). CMPXCHG without
BPF_FETCH woulud be an atomic compare-and-write. We don't have such
an operation in the kernel so it isn't provided to BPF either.

There are two significant design decisions made for the CMPXCHG
instruction:

  - To solve the issue that this operation fundamentally has 3
operands, but we only have two register fields. Therefore the
operand we compare against (the kernel's API calls it 'old') is
hard-coded to be R0. x86 has similar design (and A64 doesn't
have this problem).

A potential alternative might be to encode the other operand's
register number in the immediate field.

  - The kernel's atomic_cmpxchg returns the old value, while the C11
userspace APIs return a boolean indicating the comparison
result. Which should BPF do? A64 returns the old value. x86 returns
the old value in the hard-coded register (and also sets a
flag). That means return-old-value is easier to JIT.

Signed-off-by: Brendan Jackman 
---
  arch/x86/net/bpf_jit_comp.c|  8 
  include/linux/filter.h | 22 ++
  include/uapi/linux/bpf.h   |  4 +++-
  kernel/bpf/core.c  | 20 
  kernel/bpf/disasm.c| 15 +++
  kernel/bpf/verifier.c  | 19 +--
  tools/include/linux/filter.h   | 22 ++
  tools/include/uapi/linux/bpf.h |  4 +++-
  8 files changed, 110 insertions(+), 4 deletions(-)

diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index eea7d8b0bb12..308241187582 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -815,6 +815,14 @@ static int emit_atomic(u8 **pprog, u8 atomic_op,
/* src_reg = atomic_fetch_add(dst_reg + off, src_reg); */
EMIT2(0x0F, 0xC1);
break;
+   case BPF_XCHG:
+   /* src_reg = atomic_xchg(dst_reg + off, src_reg); */
+   EMIT1(0x87);
+   break;
+   case BPF_CMPXCHG:
+   /* r0 = atomic_cmpxchg(dst_reg + off, r0, src_reg); */
+   EMIT2(0x0F, 0xB1);
+   break;
default:
pr_err("bpf_jit: unknown atomic opcode %02x\n", atomic_op);
return -EFAULT;
diff --git a/include/linux/filter.h b/include/linux/filter.h
index b5258bca10d2..e1e1fc946a7c 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -265,6 +265,8 @@ static inline bool insn_is_zext(const struct bpf_insn *insn)
   *
   *   BPF_ADD  *(uint *) (dst_reg + off16) += src_reg
   *   BPF_ADD | BPF_FETCH  src_reg = atomic_fetch_add(dst_reg + off16, 
src_reg);
+ *   BPF_XCHG src_reg = atomic_xchg(dst_reg + off16, src_reg)
+ *   BPF_CMPXCHG  r0 = atomic_cmpxchg(dst_reg + off16, r0, src_reg)
   */
  
  #define BPF_ATOMIC64(OP, DST, SRC, OFF)\

@@ -293,6 +295,26 @@ static inline bool insn_is_zext(const struct bpf_insn 
*insn)
.off   = OFF,   \
.imm   = BPF_ADD })
  
+/* Atomic exchange, src_reg = atomic_xchg(dst_reg + off, src_reg) */

+
+#define BPF_ATOMIC_XCHG(SIZE, DST, SRC, OFF)   \
+   ((struct bpf_insn) {\
+   .code  = BPF_STX | BPF_SIZE(SIZE) | BPF_ATOMIC, \
+   .dst_reg = DST, \
+   .src_reg = SRC, \
+   .off   = OFF,   \
+   .imm   = BPF_XCHG  })
+
+/* Atomic compare-exchange, r0 = atomic_cmpxchg(dst_reg + off, r0, src_reg) */
+
+#define BPF_ATOMIC_CMPXCHG(SIZE, DST, SRC, OFF)\
+   ((struct bpf_insn) {\
+   .code  = BPF_STX | BPF_SIZE(SIZE) | BPF_ATOMIC, \
+   .dst_reg = DST, \
+   .src_reg = SRC, \
+   .off   = OFF,   \
+   .imm   = BPF_CMPXCHG })


Define BPF_ATOMIC_{XCHG, CMPXCHG} based on BPF_ATOMIC macro?


+
  /* Memory store, *(uint *) (dst_reg + off16) = imm32 */
  
  #define BPF_ST_MEM(SIZE, DST, OFF, IMM)\

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index d5389119291e..b733af50a5b9 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -45,7 +45,9 @@
  #define BPF_EXIT  0x90/* function return */
  
  /* atomic op type fields (stored in immediate) */

-#define BPF_FETCH  0x01/* fetch previous value into src reg */
+#define BPF_XCHG   (0xe0 | BPF_FETCH)  /* 

Re: [PATCH bpf-next v4 06/11] bpf: Add BPF_FETCH field / create atomic_fetch_add instruction

2020-12-07 Thread Yonghong Song




On 12/7/20 8:07 AM, Brendan Jackman wrote:

The BPF_FETCH field can be set in bpf_insn.imm, for BPF_ATOMIC
instructions, in order to have the previous value of the
atomically-modified memory location loaded into the src register
after an atomic op is carried out.

Suggested-by: Yonghong Song 
Signed-off-by: Brendan Jackman 
---
  arch/x86/net/bpf_jit_comp.c|  4 
  include/linux/filter.h |  1 +
  include/uapi/linux/bpf.h   |  3 +++
  kernel/bpf/core.c  | 13 +
  kernel/bpf/disasm.c|  7 +++
  kernel/bpf/verifier.c  | 33 -
  tools/include/linux/filter.h   | 11 +++
  tools/include/uapi/linux/bpf.h |  3 +++
  8 files changed, 66 insertions(+), 9 deletions(-)

diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c

[...]


index f345f12c1ff8..4e0100ba52c2 100644
--- a/tools/include/linux/filter.h
+++ b/tools/include/linux/filter.h
@@ -173,6 +173,7 @@
   * Atomic operations:
   *
   *   BPF_ADD  *(uint *) (dst_reg + off16) += src_reg
+ *   BPF_ADD | BPF_FETCH  src_reg = atomic_fetch_add(dst_reg + off16, 
src_reg);
   */
  
  #define BPF_ATOMIC64(OP, DST, SRC, OFF)\

@@ -201,6 +202,16 @@
.off   = OFF,   \
.imm   = BPF_ADD })
  
+/* Atomic memory add with fetch, src_reg = atomic_fetch_add(dst_reg + off, src_reg); */

+
+#define BPF_ATOMIC_FETCH_ADD(SIZE, DST, SRC, OFF)  \
+   ((struct bpf_insn) {\
+   .code  = BPF_STX | BPF_SIZE(SIZE) | BPF_ATOMIC, \
+   .dst_reg = DST, \
+   .src_reg = SRC, \
+   .off   = OFF,   \
+   .imm   = BPF_ADD | BPF_FETCH })


Not sure whether it is a good idea or not to fold this into BPF_ATOMIC 
macro. At least you can define BPF_ATOMIC macro and

#define BPF_ATOMIC_FETCH_ADD(SIZE, DST, SRC, OFF)   \
BPF_ATOMIC(SIZE, DST, SRC, OFF, BPF_ADD | BPF_FETCH)

to avoid too many code duplications?


+
  /* Memory store, *(uint *) (dst_reg + off16) = imm32 */
  
  #define BPF_ST_MEM(SIZE, DST, OFF, IMM)\

diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 98161e2d389f..d5389119291e 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -44,6 +44,9 @@
  #define BPF_CALL  0x80/* function call */
  #define BPF_EXIT  0x90/* function return */
  
+/* atomic op type fields (stored in immediate) */

+#define BPF_FETCH  0x01/* fetch previous value into src reg */
+
  /* Register numbers */
  enum {
BPF_REG_0 = 0,



Re: [PATCH bpf-next v4 05/11] bpf: Move BPF_STX reserved field check into BPF_STX verifier code

2020-12-07 Thread Yonghong Song




On 12/7/20 8:07 AM, Brendan Jackman wrote:

I can't find a reason why this code is in resolve_pseudo_ldimm64;
since I'll be modifying it in a subsequent commit, tidy it up.

Signed-off-by: Brendan Jackman 


Acked-by: Yonghong Song 


Re: [PATCH] bpf: propagate __user annotations properly

2020-12-07 Thread Yonghong Song




On 12/7/20 4:37 AM, Lukas Bulwahn wrote:

__htab_map_lookup_and_delete_batch() stores a user pointer in the local
variable ubatch and uses that in copy_{from,to}_user(), but ubatch misses a
__user annotation.

So, sparse warns in the various assignments and uses of ubatch:

   kernel/bpf/hashtab.c:1415:24: warning: incorrect type in initializer
 (different address spaces)
   kernel/bpf/hashtab.c:1415:24:expected void *ubatch
   kernel/bpf/hashtab.c:1415:24:got void [noderef] __user *

   kernel/bpf/hashtab.c:1444:46: warning: incorrect type in argument 2
 (different address spaces)
   kernel/bpf/hashtab.c:1444:46:expected void const [noderef] __user *from
   kernel/bpf/hashtab.c:1444:46:got void *ubatch

   kernel/bpf/hashtab.c:1608:16: warning: incorrect type in assignment
 (different address spaces)
   kernel/bpf/hashtab.c:1608:16:expected void *ubatch
   kernel/bpf/hashtab.c:1608:16:got void [noderef] __user *

   kernel/bpf/hashtab.c:1609:26: warning: incorrect type in argument 1
 (different address spaces)
   kernel/bpf/hashtab.c:1609:26:expected void [noderef] __user *to
   kernel/bpf/hashtab.c:1609:26:got void *ubatch

Add the __user annotation to repair this chain of propagating __user
annotations in __htab_map_lookup_and_delete_batch().


Add fix tag?

Fixes: 057996380a42 ("bpf: Add batch ops to all htab bpf map")



Signed-off-by: Lukas Bulwahn 


Thanks for the fix. LGTM. I guess either bpf or bpf-next tree is fine
since this is not a correctness issue.

Acked-by: Yonghong Song 


Re: [PATCH bpf-next v3 10/14] bpf: Add bitwise atomic instructions

2020-12-07 Thread Yonghong Song




On 12/7/20 3:28 AM, Brendan Jackman wrote:

On Fri, Dec 04, 2020 at 07:21:22AM -0800, Yonghong Song wrote:



On 12/4/20 1:36 AM, Brendan Jackman wrote:

On Thu, Dec 03, 2020 at 10:42:19PM -0800, Yonghong Song wrote:



On 12/3/20 8:02 AM, Brendan Jackman wrote:

This adds instructions for

atomic[64]_[fetch_]and
atomic[64]_[fetch_]or
atomic[64]_[fetch_]xor

All these operations are isomorphic enough to implement with the same
verifier, interpreter, and x86 JIT code, hence being a single commit.

The main interesting thing here is that x86 doesn't directly support
the fetch_ version these operations, so we need to generate a CMPXCHG
loop in the JIT. This requires the use of two temporary registers,
IIUC it's safe to use BPF_REG_AX and x86's AUX_REG for this purpose.

Change-Id: I340b10cecebea8cb8a52e3606010cde547a10ed4
Signed-off-by: Brendan Jackman 
---
arch/x86/net/bpf_jit_comp.c  | 50 +-
include/linux/filter.h   | 60 
kernel/bpf/core.c|  5 ++-
kernel/bpf/disasm.c  | 21 ++---
kernel/bpf/verifier.c|  6 
tools/include/linux/filter.h | 60 
6 files changed, 196 insertions(+), 6 deletions(-)


[...]

diff --git a/include/linux/filter.h b/include/linux/filter.h
index 6186280715ed..698f82897b0d 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -280,6 +280,66 @@ static inline bool insn_is_zext(const struct bpf_insn 
*insn)

[...]

+#define BPF_ATOMIC_FETCH_XOR(SIZE, DST, SRC, OFF)  \
+   ((struct bpf_insn) {\
+   .code  = BPF_STX | BPF_SIZE(SIZE) | BPF_ATOMIC, \
+   .dst_reg = DST, \
+   .src_reg = SRC, \
+   .off   = OFF,   \
+   .imm   = BPF_XOR | BPF_FETCH })
+
/* Atomic exchange, src_reg = atomic_xchg((dst_reg + off), src_reg) */


Looks like BPF_ATOMIC_XOR/OR/AND/... all similar to each other.
The same is for BPF_ATOMIC_FETCH_XOR/OR/AND/...

I am wondering whether it makes sence to have to
BPF_ATOMIC_BOP(BOP, SIZE, DST, SRC, OFF) and
BPF_ATOMIC_FETCH_BOP(BOP, SIZE, DST, SRC, OFF)
can have less number of macros?


Hmm yeah I think that's probably a good idea, it would be consistent
with the macros for non-atomic ALU ops.

I don't think 'BOP' would be very clear though, 'ALU' might be more
obvious.


BPF_ATOMIC_ALU and BPF_ATOMIC_FETCH_ALU indeed better.


On second thoughts I think it feels right (i.e. it would be roughly
consistent with the level of abstraction of the rest of this macro API)
to go further and just have two macros BPF_ATOMIC64 and BPF_ATOMIC32:

/*
 * Atomic ALU ops:
 *
 *   BPF_ADD  *(uint *) (dst_reg + off16) += src_reg
 *   BPF_AND  *(uint *) (dst_reg + off16) &= src_reg
 *   BPF_OR   *(uint *) (dst_reg + off16) |= src_reg
 *   BPF_XOR  *(uint *) (dst_reg + off16) ^= src_reg


"uint *" => "size_type *"?
and give an explanation that "size_type" is either "u32" or "u64"?


 *   BPF_ADD | BPF_FETCH  src_reg = atomic_fetch_add(dst_reg + 
off16, src_reg);
 *   BPF_AND | BPF_FETCH  src_reg = atomic_fetch_and(dst_reg + 
off16, src_reg);
 *   BPF_OR | BPF_FETCH   src_reg = atomic_fetch_or(dst_reg + 
off16, src_reg);
 *   BPF_XOR | BPF_FETCH  src_reg = atomic_fetch_xor(dst_reg + 
off16, src_reg);
 *   BPF_XCHG src_reg = atomic_xchg(dst_reg + off16, 
src_reg)
 *   BPF_CMPXCHG  r0 = atomic_cmpxchg(dst_reg + off16, r0, 
src_reg)
 */

#define BPF_ATOMIC64(OP, DST, SRC, OFF) \
((struct bpf_insn) {\
.code  = BPF_STX | BPF_DW | BPF_ATOMIC, \
.dst_reg = DST, \
.src_reg = SRC, \
.off   = OFF,   \
.imm   = OP })

#define BPF_ATOMIC32(OP, DST, SRC, OFF) \
((struct bpf_insn) {\
.code  = BPF_STX | BPF_W | BPF_ATOMIC, \
.dst_reg = DST, \
.src_reg = SRC, \
.off   = OFF,   \
.imm   = OP })


You could have
  BPF_ATOMIC(OP, SIZE, DST, SRC, OFF)
where SIZE is BPF_DW or BPF_W.



The downside compared to what's currently in the patc

Re: [PATCH v2 bpf-next 0/3] bpf: support module BTF in BTF display helpers

2020-12-06 Thread Yonghong Song




On 12/5/20 4:43 PM, Alan Maguire wrote:


On Sat, 5 Dec 2020, Yonghong Song wrote:




__builtin_btf_type_id() is really only supported in llvm12
and 64bit return value support is pushed to llvm12 trunk
a while back. The builtin is introduced in llvm11 but has a
corner bug, so llvm12 is recommended. So if people use the builtin,
you can assume 64bit return value. libbpf support is required
here. So in my opinion, there is no need to do feature detection.

Andrii has a patch to support 64bit return value for
__builtin_btf_type_id() and I assume that one should
be landed before or together with your patch.

Just for your info. The following is an example you could
use to determine whether __builtin_btf_type_id()
supports btf object id at llvm level.

-bash-4.4$ cat t.c
int test(int arg) {
   return __builtin_btf_type_id(arg, 1);
}

Compile to generate assembly code with latest llvm12 trunk:
   clang -target bpf -O2 -S -g -mcpu=v3 t.c
In the asm code, you should see one line with
   r0 = 1 ll

Or you can generate obj code:
   clang -target bpf -O2 -c -g -mcpu=v3 t.c
and then you disassemble the obj file
   llvm-objdump -d --no-show-raw-insn --no-leading-addr t.o
You should see below in the output
   r0 = 1 ll

Use earlier version of llvm12 trunk, the builtin has
32bit return value, you will see
   r0 = 1
which is a 32bit imm to r0, while "r0 = 1 ll" is
64bit imm to r0.



Thanks for this Yonghong!  I'm thinking the way I'll tackle it
is to simply verify that the upper 32 bits specifying the
veth module object id are non-zero; if they are zero, we'll skip
the test (I think a skip probably makes sense as not everyone will
have llvm12). Does that seem reasonable?


This should work too and we do not need to add a note in
README.rst for this test then.



With the additional few minor changes on top of Andrii's patch,
the use of __builtin_btf_type_id() worked perfectly. Thanks!

Alan



Re: [PATCH v2 bpf-next 0/3] bpf: support module BTF in BTF display helpers

2020-12-05 Thread Yonghong Song




On 12/5/20 12:35 PM, Yonghong Song wrote:



On 12/4/20 10:48 AM, Alan Maguire wrote:

This series aims to add support to bpf_snprintf_btf() and
bpf_seq_printf_btf() allowing them to store string representations
of module-specific types, as well as the kernel-specific ones
they currently support.

Patch 1 removes the btf_module_mutex, as since we will need to
look up module BTF during BPF program execution, we don't want
to risk sleeping in the various contexts in which BPF can run.
The access patterns to the btf module list seem to conform to
classic list RCU usage so with a few minor tweaks this seems
workable.

Patch 2 replaces the unused flags field in struct btf_ptr with
an obj_id field,  allowing the specification of the id of a
BTF module.  If the value is 0, the core kernel vmlinux is
assumed to contain the type's BTF information.  Otherwise the
module with that id is used to identify the type.  If the
object-id based lookup fails, we again fall back to vmlinux
BTF.

Patch 3 is a selftest that uses veth (when built as a
module) and a kprobe to display both a module-specific
and kernel-specific type; both are arguments to veth_stats_rx().
Currently it looks up the module-specific type and object ids
using libbpf; in future, these lookups will likely be supported
directly in the BPF program via __builtin_btf_type_id(); but
I need to determine a good test to determine if that builtin
supports object ids.


__builtin_btf_type_id() is really only supported in llvm12
and 64bit return value support is pushed to llvm12 trunk
a while back. The builtin is introduced in llvm11 but has a
corner bug, so llvm12 is recommended. So if people use the builtin,
you can assume 64bit return value. libbpf support is required
here. So in my opinion, there is no need to do feature detection.


if people use llvm11 which may cause test to fail, we can add
an entry in selftest README file to warn people this specific
test needs llvm12.



Andrii has a patch to support 64bit return value for
__builtin_btf_type_id() and I assume that one should
be landed before or together with your patch.

Just for your info. The following is an example you could
use to determine whether __builtin_btf_type_id()
supports btf object id at llvm level.

-bash-4.4$ cat t.c
int test(int arg) {
   return __builtin_btf_type_id(arg, 1);
}

Compile to generate assembly code with latest llvm12 trunk:
   clang -target bpf -O2 -S -g -mcpu=v3 t.c
In the asm code, you should see one line with
   r0 = 1 ll

Or you can generate obj code:
   clang -target bpf -O2 -c -g -mcpu=v3 t.c
and then you disassemble the obj file
   llvm-objdump -d --no-show-raw-insn --no-leading-addr t.o
You should see below in the output
   r0 = 1 ll

Use earlier version of llvm12 trunk, the builtin has
32bit return value, you will see
   r0 = 1
which is a 32bit imm to r0, while "r0 = 1 ll" is
64bit imm to r0.



Changes since RFC

- add patch to remove module mutex
- modify to use obj_id instead of module name as identifier
   in "struct btf_ptr" (Andrii)

Alan Maguire (3):
   bpf: eliminate btf_module_mutex as RCU synchronization can be used
   bpf: add module support to btf display helpers
   selftests/bpf: verify module-specific types can be shown via
 bpf_snprintf_btf

  include/linux/btf.h    |  12 ++
  include/uapi/linux/bpf.h   |  13 ++-
  kernel/bpf/btf.c   |  49 +---
  kernel/trace/bpf_trace.c   |  44 ++--
  tools/include/uapi/linux/bpf.h |  13 ++-
  .../selftests/bpf/prog_tests/snprintf_btf_mod.c    | 124 
+

  tools/testing/selftests/bpf/progs/bpf_iter.h   |   2 +-
  tools/testing/selftests/bpf/progs/btf_ptr.h    |   2 +-
  tools/testing/selftests/bpf/progs/veth_stats_rx.c  |  72 
  9 files changed, 292 insertions(+), 39 deletions(-)
  create mode 100644 
tools/testing/selftests/bpf/prog_tests/snprintf_btf_mod.c

  create mode 100644 tools/testing/selftests/bpf/progs/veth_stats_rx.c



Re: [PATCH v2 bpf-next 0/3] bpf: support module BTF in BTF display helpers

2020-12-05 Thread Yonghong Song




On 12/4/20 10:48 AM, Alan Maguire wrote:

This series aims to add support to bpf_snprintf_btf() and
bpf_seq_printf_btf() allowing them to store string representations
of module-specific types, as well as the kernel-specific ones
they currently support.

Patch 1 removes the btf_module_mutex, as since we will need to
look up module BTF during BPF program execution, we don't want
to risk sleeping in the various contexts in which BPF can run.
The access patterns to the btf module list seem to conform to
classic list RCU usage so with a few minor tweaks this seems
workable.

Patch 2 replaces the unused flags field in struct btf_ptr with
an obj_id field,  allowing the specification of the id of a
BTF module.  If the value is 0, the core kernel vmlinux is
assumed to contain the type's BTF information.  Otherwise the
module with that id is used to identify the type.  If the
object-id based lookup fails, we again fall back to vmlinux
BTF.

Patch 3 is a selftest that uses veth (when built as a
module) and a kprobe to display both a module-specific
and kernel-specific type; both are arguments to veth_stats_rx().
Currently it looks up the module-specific type and object ids
using libbpf; in future, these lookups will likely be supported
directly in the BPF program via __builtin_btf_type_id(); but
I need to determine a good test to determine if that builtin
supports object ids.


__builtin_btf_type_id() is really only supported in llvm12
and 64bit return value support is pushed to llvm12 trunk
a while back. The builtin is introduced in llvm11 but has a
corner bug, so llvm12 is recommended. So if people use the builtin,
you can assume 64bit return value. libbpf support is required
here. So in my opinion, there is no need to do feature detection.

Andrii has a patch to support 64bit return value for
__builtin_btf_type_id() and I assume that one should
be landed before or together with your patch.

Just for your info. The following is an example you could
use to determine whether __builtin_btf_type_id()
supports btf object id at llvm level.

-bash-4.4$ cat t.c
int test(int arg) {
  return __builtin_btf_type_id(arg, 1);
}

Compile to generate assembly code with latest llvm12 trunk:
  clang -target bpf -O2 -S -g -mcpu=v3 t.c
In the asm code, you should see one line with
  r0 = 1 ll

Or you can generate obj code:
  clang -target bpf -O2 -c -g -mcpu=v3 t.c
and then you disassemble the obj file
  llvm-objdump -d --no-show-raw-insn --no-leading-addr t.o
You should see below in the output
  r0 = 1 ll

Use earlier version of llvm12 trunk, the builtin has
32bit return value, you will see
  r0 = 1
which is a 32bit imm to r0, while "r0 = 1 ll" is
64bit imm to r0.



Changes since RFC

- add patch to remove module mutex
- modify to use obj_id instead of module name as identifier
   in "struct btf_ptr" (Andrii)

Alan Maguire (3):
   bpf: eliminate btf_module_mutex as RCU synchronization can be used
   bpf: add module support to btf display helpers
   selftests/bpf: verify module-specific types can be shown via
 bpf_snprintf_btf

  include/linux/btf.h|  12 ++
  include/uapi/linux/bpf.h   |  13 ++-
  kernel/bpf/btf.c   |  49 +---
  kernel/trace/bpf_trace.c   |  44 ++--
  tools/include/uapi/linux/bpf.h |  13 ++-
  .../selftests/bpf/prog_tests/snprintf_btf_mod.c| 124 +
  tools/testing/selftests/bpf/progs/bpf_iter.h   |   2 +-
  tools/testing/selftests/bpf/progs/btf_ptr.h|   2 +-
  tools/testing/selftests/bpf/progs/veth_stats_rx.c  |  72 
  9 files changed, 292 insertions(+), 39 deletions(-)
  create mode 100644 tools/testing/selftests/bpf/prog_tests/snprintf_btf_mod.c
  create mode 100644 tools/testing/selftests/bpf/progs/veth_stats_rx.c



Re: [PATCH bpf-next v3 13/14] bpf: Add tests for new BPF atomic operations

2020-12-04 Thread Yonghong Song




On 12/4/20 1:45 AM, Brendan Jackman wrote:

On Thu, Dec 03, 2020 at 11:06:31PM -0800, Yonghong Song wrote:

On 12/3/20 8:02 AM, Brendan Jackman wrote:

[...]

diff --git a/tools/testing/selftests/bpf/prog_tests/atomics_test.c 
b/tools/testing/selftests/bpf/prog_tests/atomics_test.c
new file mode 100644
index ..66f0ccf4f4ec
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/atomics_test.c
@@ -0,0 +1,262 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include 
+
+
+#include "atomics_test.skel.h"
+
+static struct atomics_test *setup(void)
+{
+   struct atomics_test *atomics_skel;
+   __u32 duration = 0, err;
+
+   atomics_skel = atomics_test__open_and_load();
+   if (CHECK(!atomics_skel, "atomics_skel_load", "atomics skeleton 
failed\n"))
+   return NULL;
+
+   if (atomics_skel->data->skip_tests) {
+   printf("%s:SKIP:no ENABLE_ATOMICS_TEST (missing Clang BPF atomics 
support)",
+  __func__);
+   test__skip();
+   goto err;
+   }
+
+   err = atomics_test__attach(atomics_skel);
+   if (CHECK(err, "atomics_attach", "atomics attach failed: %d\n", err))
+   goto err;
+
+   return atomics_skel;
+
+err:
+   atomics_test__destroy(atomics_skel);
+   return NULL;
+}
+
+static void test_add(void)
+{
+   struct atomics_test *atomics_skel;
+   int err, prog_fd;
+   __u32 duration = 0, retval;
+
+   atomics_skel = setup();


When running the test, I observed a noticeable delay between skel load and
skel attach. The reason is the bpf program object file contains
multiple programs and the above setup() tries to do attachment
for ALL programs but actually below only "add" program is tested.
This will unnecessarily increase test_progs running time.

The best is for setup() here only load and attach program "add".
The libbpf API bpf_program__set_autoload() can set a particular
program not autoload. You can call attach function explicitly
for one specific program. This should be able to reduce test
running time.


Interesting, thanks a lot - I'll try this out next week. Maybe we can
actually load all the progs once at the beginning (i.e. in


If you have subtest, people expects subtest can be individual runable.
This will complicate your logic.


test_atomics_test) then attach/detch each prog individually as needed...
Sorry, I haven't got much of a grip on libbpf yet.


One alternative is not to do subtests. There is nothing run to have
just one bpf program instead of many. This way, you load all and attach
once, then do all the test verification.


Re: [PATCH bpf-next v3 10/14] bpf: Add bitwise atomic instructions

2020-12-04 Thread Yonghong Song




On 12/4/20 1:36 AM, Brendan Jackman wrote:

On Thu, Dec 03, 2020 at 10:42:19PM -0800, Yonghong Song wrote:



On 12/3/20 8:02 AM, Brendan Jackman wrote:

This adds instructions for

atomic[64]_[fetch_]and
atomic[64]_[fetch_]or
atomic[64]_[fetch_]xor

All these operations are isomorphic enough to implement with the same
verifier, interpreter, and x86 JIT code, hence being a single commit.

The main interesting thing here is that x86 doesn't directly support
the fetch_ version these operations, so we need to generate a CMPXCHG
loop in the JIT. This requires the use of two temporary registers,
IIUC it's safe to use BPF_REG_AX and x86's AUX_REG for this purpose.

Change-Id: I340b10cecebea8cb8a52e3606010cde547a10ed4
Signed-off-by: Brendan Jackman 
---
   arch/x86/net/bpf_jit_comp.c  | 50 +-
   include/linux/filter.h   | 60 
   kernel/bpf/core.c|  5 ++-
   kernel/bpf/disasm.c  | 21 ++---
   kernel/bpf/verifier.c|  6 
   tools/include/linux/filter.h | 60 
   6 files changed, 196 insertions(+), 6 deletions(-)


[...]

diff --git a/include/linux/filter.h b/include/linux/filter.h
index 6186280715ed..698f82897b0d 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -280,6 +280,66 @@ static inline bool insn_is_zext(const struct bpf_insn 
*insn)

[...]

+#define BPF_ATOMIC_FETCH_XOR(SIZE, DST, SRC, OFF)  \
+   ((struct bpf_insn) {\
+   .code  = BPF_STX | BPF_SIZE(SIZE) | BPF_ATOMIC, \
+   .dst_reg = DST, \
+   .src_reg = SRC, \
+   .off   = OFF,   \
+   .imm   = BPF_XOR | BPF_FETCH })
+
   /* Atomic exchange, src_reg = atomic_xchg((dst_reg + off), src_reg) */


Looks like BPF_ATOMIC_XOR/OR/AND/... all similar to each other.
The same is for BPF_ATOMIC_FETCH_XOR/OR/AND/...

I am wondering whether it makes sence to have to
BPF_ATOMIC_BOP(BOP, SIZE, DST, SRC, OFF) and
BPF_ATOMIC_FETCH_BOP(BOP, SIZE, DST, SRC, OFF)
can have less number of macros?


Hmm yeah I think that's probably a good idea, it would be consistent
with the macros for non-atomic ALU ops.

I don't think 'BOP' would be very clear though, 'ALU' might be more
obvious.


BPF_ATOMIC_ALU and BPF_ATOMIC_FETCH_ALU indeed better.





Re: [PATCH bpf-next v3 09/14] bpf: Pull out a macro for interpreting atomic ALU operations

2020-12-04 Thread Yonghong Song




On 12/4/20 1:29 AM, Brendan Jackman wrote:

On Thu, Dec 03, 2020 at 10:30:18PM -0800, Yonghong Song wrote:



On 12/3/20 8:02 AM, Brendan Jackman wrote:

Since the atomic operations that are added in subsequent commits are
all isomorphic with BPF_ADD, pull out a macro to avoid the
interpreter becoming dominated by lines of atomic-related code.

Note that this sacrificies interpreter performance (combining
STX_ATOMIC_W and STX_ATOMIC_DW into single switch case means that we
need an extra conditional branch to differentiate them) in favour of
compact and (relatively!) simple C code.

Change-Id: I8cae5b66e75f34393de6063b91c05a8006fdd9e6
Signed-off-by: Brendan Jackman 


Ack with a minor suggestion below.

Acked-by: Yonghong Song 


---
   kernel/bpf/core.c | 79 +++
   1 file changed, 38 insertions(+), 41 deletions(-)

diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 28f960bc2e30..498d3f067be7 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -1618,55 +1618,52 @@ static u64 ___bpf_prog_run(u64 *regs, const struct 
bpf_insn *insn, u64 *stack)
LDX_PROBE(DW, 8)
   #undef LDX_PROBE
-   STX_ATOMIC_W:
-   switch (IMM) {
-   case BPF_ADD:
-   /* lock xadd *(u32 *)(dst_reg + off16) += src_reg */
-   atomic_add((u32) SRC, (atomic_t *)(unsigned long)
-  (DST + insn->off));
-   break;
-   case BPF_ADD | BPF_FETCH:
-   SRC = (u32) atomic_fetch_add(
-   (u32) SRC,
-   (atomic_t *)(unsigned long) (DST + insn->off));
-   break;
-   case BPF_XCHG:
-   SRC = (u32) atomic_xchg(
-   (atomic_t *)(unsigned long) (DST + insn->off),
-   (u32) SRC);
-   break;
-   case BPF_CMPXCHG:
-   BPF_R0 = (u32) atomic_cmpxchg(
-   (atomic_t *)(unsigned long) (DST + insn->off),
-   (u32) BPF_R0, (u32) SRC);
+#define ATOMIC(BOP, KOP)   \


ATOMIC a little bit generic. Maybe ATOMIC_FETCH_BOP?


Well it doesn't fetch in all cases and "BOP" is intended to
differentiate from KOP i.e. BOP = BPF operation KOP = Kernel operation.

Could go with ATOMIC_ALU_OP?


ATOMIC_ALU_OP sounds good.




+   case BOP:   \
+   if (BPF_SIZE(insn->code) == BPF_W)   \
+   atomic_##KOP((u32) SRC, (atomic_t *)(unsigned 
long) \
+(DST + insn->off)); \
+   else\
+   atomic64_##KOP((u64) SRC, (atomic64_t 
*)(unsigned long) \
+  (DST + insn->off));   \
+   break;  \
+   case BOP | BPF_FETCH:   \
+   if (BPF_SIZE(insn->code) == BPF_W)   \
+   SRC = (u32) atomic_fetch_##KOP( \
+   (u32) SRC,  \
+   (atomic_t *)(unsigned long) (DST + 
insn->off)); \
+   else\
+   SRC = (u64) atomic64_fetch_##KOP(   \
+   (u64) SRC,  \
+   (atomic64_t *)(s64) (DST + insn->off)); 
\
break;
-   default:
-   goto default_label;
-   }
-   CONT;
STX_ATOMIC_DW:
+   STX_ATOMIC_W:
switch (IMM) {
-   case BPF_ADD:
-   /* lock xadd *(u64 *)(dst_reg + off16) += src_reg */
-   atomic64_add((u64) SRC, (atomic64_t *)(unsigned long)
-(DST + insn->off));
-   break;
-   case BPF_ADD | BPF_FETCH:
-   SRC = (u64) atomic64_fetch_add(
-   (u64) SRC,
-   (atomic64_t *)(s64) (DST + insn->off));
-   break;
+   ATOMIC(BPF_ADD, add)
+
case BPF_XCHG:
-   SRC = (u64) atomic64_xchg(
-   (atomic64_t *)(u64) (DST + insn->off),
-   (u64) SRC);
+   if (BPF_SIZE(insn->code) == BPF_W)
+   SRC = (u32) atomic_xchg(
+

Re: [PATCH bpf-next v3 13/14] bpf: Add tests for new BPF atomic operations

2020-12-03 Thread Yonghong Song




On 12/3/20 8:02 AM, Brendan Jackman wrote:

This relies on the work done by Yonghong Song in
https://reviews.llvm.org/D72184

Note the use of a define called ENABLE_ATOMICS_TESTS: this is used
to:

  - Avoid breaking the build for people on old versions of Clang
  - Avoid needing separate lists of test objects for no_alu32, where
atomics are not supported even if Clang has the feature.

The atomics_test.o BPF object is built unconditionally both for
test_progs and test_progs-no_alu32. For test_progs, if Clang supports
atomics, ENABLE_ATOMICS_TESTS is defined, so it includes the proper
test code. Otherwise, progs and global vars are defined anyway, as
stubs; this means that the skeleton user code still builds.

The atomics_test.o userspace object is built once and used for both
test_progs and test_progs-no_alu32. A variable called skip_tests is
defined in the BPF object's data section, which tells the userspace
object whether to skip the atomics test.

Change-Id: Iecc12f35f0ded4a1dd805cce1be576e7b27917ef
Signed-off-by: Brendan Jackman 
---
  tools/testing/selftests/bpf/Makefile  |   4 +
  .../selftests/bpf/prog_tests/atomics_test.c   | 262 ++
  .../selftests/bpf/progs/atomics_test.c| 154 ++
  .../selftests/bpf/verifier/atomic_and.c   |  77 +
  .../selftests/bpf/verifier/atomic_cmpxchg.c   |  96 +++
  .../selftests/bpf/verifier/atomic_fetch_add.c | 106 +++
  .../selftests/bpf/verifier/atomic_or.c|  77 +
  .../selftests/bpf/verifier/atomic_xchg.c  |  46 +++
  .../selftests/bpf/verifier/atomic_xor.c   |  77 +
  9 files changed, 899 insertions(+)
  create mode 100644 tools/testing/selftests/bpf/prog_tests/atomics_test.c
  create mode 100644 tools/testing/selftests/bpf/progs/atomics_test.c
  create mode 100644 tools/testing/selftests/bpf/verifier/atomic_and.c
  create mode 100644 tools/testing/selftests/bpf/verifier/atomic_cmpxchg.c
  create mode 100644 tools/testing/selftests/bpf/verifier/atomic_fetch_add.c
  create mode 100644 tools/testing/selftests/bpf/verifier/atomic_or.c
  create mode 100644 tools/testing/selftests/bpf/verifier/atomic_xchg.c
  create mode 100644 tools/testing/selftests/bpf/verifier/atomic_xor.c

diff --git a/tools/testing/selftests/bpf/Makefile 
b/tools/testing/selftests/bpf/Makefile
index f21c4841a612..448a9eb1a56c 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -431,11 +431,15 @@ TRUNNER_EXTRA_FILES := $(OUTPUT)/urandom_read 
\
   $(wildcard progs/btf_dump_test_case_*.c)
  TRUNNER_BPF_BUILD_RULE := CLANG_BPF_BUILD_RULE
  TRUNNER_BPF_CFLAGS := $(BPF_CFLAGS) $(CLANG_CFLAGS)
+ifeq ($(feature-clang-bpf-atomics),1)
+  TRUNNER_BPF_CFLAGS += -DENABLE_ATOMICS_TESTS
+endif
  TRUNNER_BPF_LDFLAGS := -mattr=+alu32
  $(eval $(call DEFINE_TEST_RUNNER,test_progs))
  
  # Define test_progs-no_alu32 test runner.

  TRUNNER_BPF_BUILD_RULE := CLANG_NOALU32_BPF_BUILD_RULE
+TRUNNER_BPF_CFLAGS := $(BPF_CFLAGS) $(CLANG_CFLAGS)
  TRUNNER_BPF_LDFLAGS :=
  $(eval $(call DEFINE_TEST_RUNNER,test_progs,no_alu32))
  
diff --git a/tools/testing/selftests/bpf/prog_tests/atomics_test.c b/tools/testing/selftests/bpf/prog_tests/atomics_test.c

new file mode 100644
index ..66f0ccf4f4ec
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/atomics_test.c
@@ -0,0 +1,262 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include 
+
+
+#include "atomics_test.skel.h"
+
+static struct atomics_test *setup(void)
+{
+   struct atomics_test *atomics_skel;
+   __u32 duration = 0, err;
+
+   atomics_skel = atomics_test__open_and_load();
+   if (CHECK(!atomics_skel, "atomics_skel_load", "atomics skeleton 
failed\n"))
+   return NULL;
+
+   if (atomics_skel->data->skip_tests) {
+   printf("%s:SKIP:no ENABLE_ATOMICS_TEST (missing Clang BPF atomics 
support)",
+  __func__);
+   test__skip();
+   goto err;
+   }
+
+   err = atomics_test__attach(atomics_skel);
+   if (CHECK(err, "atomics_attach", "atomics attach failed: %d\n", err))
+   goto err;
+
+   return atomics_skel;
+
+err:
+   atomics_test__destroy(atomics_skel);
+   return NULL;
+}
+
+static void test_add(void)
+{
+   struct atomics_test *atomics_skel;
+   int err, prog_fd;
+   __u32 duration = 0, retval;
+
+   atomics_skel = setup();


When running the test, I observed a noticeable delay between skel load 
and skel attach. The reason is the bpf program object file contains

multiple programs and the above setup() tries to do attachment
for ALL programs but actually below only "add" program is tested.
This will unnecessarily increase test_progs running time.

The best is for setup() here only load and attach program "add".
The libbpf API bpf_program__set_autoloa

  1   2   3   4   >