Re: [PATCH v4 02/27] ntsync: Introduce NTSYNC_IOC_WAIT_ALL.

2024-05-13 Thread Elizabeth Figura
On Friday, April 19, 2024 11:28:14 AM CDT Peter Zijlstra wrote:
> On Thu, Apr 18, 2024 at 11:35:11AM +0200, Peter Zijlstra wrote:
> > On Wed, Apr 17, 2024 at 03:03:05PM -0500, Elizabeth Figura wrote:
> > > Ach. I wrote this with the idea that the race isn't meaningful, but
> > > looking at it again you're right—there is a harmful race here.
> > > 
> > > I think it should be fixable by moving the atomic_read inside the lock,
> > > though.
> > 
> > Right, I've ended up with the (as yet untested) below. I'll see if I can
> > find time later to actually test things.
> 
> Latest hackery... I tried testing this but I'm not having luck using the
> patched wine as per the other email.
> 

I converted the rest of the direct uses of spin_lock() using the below patch 
and tested it myself, and it passes Wine tests. As far as I can tell the logic 
is correct, too; I couldn't find any races.

I'll incorporate these changes into the next revision, unless there's a good 
reason not to.

---
--- a/drivers/misc/ntsync.c
+++ b/drivers/misc/ntsync.c
@@ -569,17 +569,19 @@ static int ntsync_event_set(struct ntsync_obj *event, 
void __user *argp, bool pu
 
 static int ntsync_event_reset(struct ntsync_obj *event, void __user *argp)
 {
+   struct ntsync_device *dev = event->dev;
__u32 prev_state;
+   bool all;
 
if (event->type != NTSYNC_TYPE_EVENT)
return -EINVAL;
 
-   spin_lock(>lock);
+   all = ntsync_lock_obj(dev, event);
 
prev_state = event->u.event.signaled;
event->u.event.signaled = false;
 
-   spin_unlock(>lock);
+   ntsync_unlock_obj(dev, event, all);
 
if (put_user(prev_state, (__u32 __user *)argp))
return -EFAULT;
@@ -590,16 +592,21 @@ static int ntsync_event_reset(struct ntsync_obj *event, 
void __user *argp)
 static int ntsync_sem_read(struct ntsync_obj *sem, void __user *argp)
 {
struct ntsync_sem_args __user *user_args = argp;
+   struct ntsync_device *dev = sem->dev;
struct ntsync_sem_args args;
+   bool all;
 
if (sem->type != NTSYNC_TYPE_SEM)
return -EINVAL;
 
args.sem = 0;
-   spin_lock(>lock);
+
+   all = ntsync_lock_obj(dev, sem);
+
args.count = sem->u.sem.count;
args.max = sem->u.sem.max;
-   spin_unlock(>lock);
+
+   ntsync_unlock_obj(dev, sem, all);
 
if (copy_to_user(user_args, , sizeof(args)))
return -EFAULT;
@@ -609,18 +616,23 @@ static int ntsync_sem_read(struct ntsync_obj *sem, void 
__user *argp)
 static int ntsync_mutex_read(struct ntsync_obj *mutex, void __user *argp)
 {
struct ntsync_mutex_args __user *user_args = argp;
+   struct ntsync_device *dev = mutex->dev;
struct ntsync_mutex_args args;
+   bool all;
int ret;
 
if (mutex->type != NTSYNC_TYPE_MUTEX)
return -EINVAL;
 
args.mutex = 0;
-   spin_lock(>lock);
+
+   all = ntsync_lock_obj(dev, mutex);
+
args.count = mutex->u.mutex.count;
args.owner = mutex->u.mutex.owner;
ret = mutex->u.mutex.ownerdead ? -EOWNERDEAD : 0;
-   spin_unlock(>lock);
+
+   ntsync_unlock_obj(dev, mutex, all);
 
if (copy_to_user(user_args, , sizeof(args)))
return -EFAULT;
@@ -630,16 +642,21 @@ static int ntsync_mutex_read(struct ntsync_obj *mutex, 
void __user *argp)
 static int ntsync_event_read(struct ntsync_obj *event, void __user *argp)
 {
struct ntsync_event_args __user *user_args = argp;
+   struct ntsync_device *dev = event->dev;
struct ntsync_event_args args;
+   bool all;
 
if (event->type != NTSYNC_TYPE_EVENT)
return -EINVAL;
 
args.event = 0;
-   spin_lock(>lock);
+
+   all = ntsync_lock_obj(dev, event);
+
args.manual = event->u.event.manual;
args.signaled = event->u.event.signaled;
-   spin_unlock(>lock);
+
+   ntsync_unlock_obj(dev, event, all);
 
if (copy_to_user(user_args, , sizeof(args)))
return -EFAULT;
@@ -962,6 +979,7 @@ static int ntsync_wait_any(struct ntsync_device *dev, void 
__user *argp)
__u32 i, total_count;
struct ntsync_q *q;
int signaled;
+   bool all;
int ret;
 
if (copy_from_user(, argp, sizeof(args)))
@@ -981,9 +999,9 @@ static int ntsync_wait_any(struct ntsync_device *dev, void 
__user *argp)
struct ntsync_q_entry *entry = >entries[i];
struct ntsync_obj *obj = entry->obj;
 
-   spin_lock(>lock);
+   all = ntsync_lock_obj(dev, obj);
list_add_tail(>node, >any_waiters);
-   spin_unlock(>lock);
+   ntsync_unlock_obj(dev, obj, all);
}
 
/*
@@ -1000,9 +1018,9 @@ static int ntsync_wait_any(struct ntsync_device *dev, 
void __user *argp)
if (atomic_read(>signaled) != -1)
break;
 
-   spin_lock(>lock);

Re: [PATCH v4 05/10] platform: Add test managed platform_device/driver APIs

2024-05-13 Thread Stephen Boyd
Quoting Stephen Boyd (2024-04-22 16:23:58)
> diff --git a/drivers/base/test/platform_kunit.c 
> b/drivers/base/test/platform_kunit.c
> new file mode 100644
> index ..54af6db2a6d8
> --- /dev/null
> +++ b/drivers/base/test/platform_kunit.c
> @@ -0,0 +1,174 @@
[...]
> +struct platform_device *
> +platform_device_alloc_kunit(struct kunit *test, const char *name, int id)
> +{
> +   struct platform_device *pdev;
> +
> +   pdev = platform_device_alloc(name, id);
> +   if (!pdev)
> +   return NULL;
> +
> +   if (kunit_add_action_or_reset(test, (kunit_action_t 
> *)_device_put, pdev))
> +   return NULL;
> +
> +   return pdev;
> +}
> +EXPORT_SYMBOL_GPL(platform_device_alloc_kunit);
> +
> +static void platform_device_add_kunit_exit(struct kunit_resource *res)
> +{
> +   struct platform_device *pdev = res->data;
> +
> +   platform_device_unregister(pdev);
> +}
> +
> +static bool
> +platform_device_alloc_kunit_match(struct kunit *test,
> + struct kunit_resource *res, void 
> *match_data)
> +{
> +   struct platform_device *pdev = match_data;
> +
> +   return res->data == pdev;
> +}
> +
> +/**
> + * platform_device_add_kunit() - Register a KUnit test managed platform 
> device
> + * @test: test context
> + * @pdev: platform device to add
> + *
> + * Register a test managed platform device. The device is unregistered when 
> the
> + * test completes.
> + *
> + * Return: 0 on success, negative errno on failure.
> + */
> +int platform_device_add_kunit(struct kunit *test, struct platform_device 
> *pdev)
> +{
> +   struct kunit_resource *res;
> +   int ret;
> +
> +   ret = platform_device_add(pdev);
> +   if (ret)
> +   return ret;
> +
> +   res = kunit_find_resource(test, platform_device_alloc_kunit_match, 
> pdev);

This doesn't work because platform_device_alloc_kunit() used
kunit_add_action_or_reset() which has a chained free routine and data
pointer. I've added a test to make sure the platform device is removed
from the bus. It's not super great though because when this code fails
to find a match it will still remove the device by calling
platform_device_unregister() when the test ends. It will follow that up
with a call to platform_device_put(), which is the problem as that
causes an underflow and operates on an already freed device.

I couldn't come up with anything better than searching the platform bus.
Maybe if there was a way to allocate the memory or redirect where
platform_device_alloc_kunit() got memory from we could hold the device
memory around after it should have been freed and make sure the kref for
the device kobject is 0. That seems pretty invasive to do though so I'm
just going to leave it for now and add this test to make sure it cleans
up.



[PATCH net] selftests/net/lib: no need to record ns name if it already exist

2024-05-13 Thread Hangbin Liu
There is no need to add the name to ns_list again if the netns already
recoreded.

Fixes: 25ae948b4478 ("selftests/net: add lib.sh")
Signed-off-by: Hangbin Liu 
---
 tools/testing/selftests/net/lib.sh | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/tools/testing/selftests/net/lib.sh 
b/tools/testing/selftests/net/lib.sh
index f9fe182dfbd4..56a9454b7ba3 100644
--- a/tools/testing/selftests/net/lib.sh
+++ b/tools/testing/selftests/net/lib.sh
@@ -73,15 +73,17 @@ setup_ns()
local ns=""
local ns_name=""
local ns_list=""
+   local ns_exist=
for ns_name in "$@"; do
# Some test may setup/remove same netns multi times
if unset ${ns_name} 2> /dev/null; then
ns="${ns_name,,}-$(mktemp -u XX)"
eval readonly ${ns_name}="$ns"
+   ns_exist=false
else
eval ns='$'${ns_name}
cleanup_ns "$ns"
-
+   ns_exist=true
fi
 
if ! ip netns add "$ns"; then
@@ -90,7 +92,7 @@ setup_ns()
return $ksft_skip
fi
ip -n "$ns" link set lo up
-   ns_list="$ns_list $ns"
+   ! $ns_exist && ns_list="$ns_list $ns"
done
NS_LIST="$NS_LIST $ns_list"
 }
-- 
2.43.0




Re: [PATCH v3] kunit: Cover 'assert.c' with tests

2024-05-13 Thread Rae Moar
On Thu, May 9, 2024 at 5:05 AM Ivan Orlov  wrote:
>
> There are multiple assertion formatting functions in the `assert.c`
> file, which are not covered with tests yet. Implement the KUnit test
> for these functions.
>
> The test consists of 11 test cases for the following functions:
>
> 1) 'is_literal'
> 2) 'is_str_literal'
> 3) 'kunit_assert_prologue', test case for multiple assert types
> 4) 'kunit_assert_print_msg'
> 5) 'kunit_unary_assert_format'
> 6) 'kunit_ptr_not_err_assert_format'
> 7) 'kunit_binary_assert_format'
> 8) 'kunit_binary_ptr_assert_format'
> 9) 'kunit_binary_str_assert_format'
> 10) 'kunit_assert_hexdump'
> 11) 'kunit_mem_assert_format'
>
> The test aims at maximizing the branch coverage for the assertion
> formatting functions.
>
> As you can see, it covers some of the static helper functions as
> well, so mark the static functions in `assert.c` as 'VISIBLE_IF_KUNIT'
> and conditionally export them with EXPORT_SYMBOL_IF_KUNIT. Add the
> corresponding definitions to `assert.h`.
>
> Build the assert test when CONFIG_KUNIT_TEST is enabled, similar to
> how it is done for the string stream test.

Hello!

This looks great to me! Thanks for all your work on this! There is
just one comment I have below. Once that is fixed up, I am happy to
add a reviewed-by.

Thanks!
-Rae

>
> Signed-off-by: Ivan Orlov 
> ---
> V1 -> V2:
> - Check the output from the string stream for containing the key parts
> instead of comparing the results with expected strings char by char, as
> it was suggested by Rae Moar . Define two macros to
> make it possible (ASSERT_TEST_EXPECT_CONTAIN and
> ASSERT_TEST_EXPECT_NCONTAIN).
> - Mark the static functions in `assert.c` as VISIBLE_IF_KUNIT and export
> them conditionally if kunit is enabled instead of including the
> `assert_test.c` file in the end of `assert.c`. This way we will decouple
> the test from the implementation (SUT).
> - Update the kunit_assert_hexdump test: now it checks for presense of
> the brackets '<>' around the non-matching bytes, instead of comparing
> the kunit_assert_hexdump output char by char.
> V2 -> V3:
> - Make test case array and test suite definitions static
> - Change the condition in `assert.h`: we should declare VISIBLE_IF_KUNIT
> functions in the header file when CONFIG_KUNIT is enabled, not
> CONFIG_KUNIT_TEST. Otherwise, if CONFIG_KUNIT_TEST is disabled,
> VISIBLE_IF_KUNIT functions in the `assert.c` are not static, and
> prototypes for them can't be found.
> - Add MODULE_LICENSE and MODULE_DESCRIPTION macros
>
>  include/kunit/assert.h  |  11 ++
>  lib/kunit/Makefile  |   1 +
>  lib/kunit/assert.c  |  24 ++-
>  lib/kunit/assert_test.c | 391 
>  4 files changed, 419 insertions(+), 8 deletions(-)
>  create mode 100644 lib/kunit/assert_test.c
>
> diff --git a/include/kunit/assert.h b/include/kunit/assert.h
> index 24c2b9fa61e8..7e7490a74b13 100644
> --- a/include/kunit/assert.h
> +++ b/include/kunit/assert.h
> @@ -218,4 +218,15 @@ void kunit_mem_assert_format(const struct kunit_assert 
> *assert,
>  const struct va_format *message,
>  struct string_stream *stream);
>
> +#if IS_ENABLED(CONFIG_KUNIT)
> +void kunit_assert_print_msg(const struct va_format *message,
> +   struct string_stream *stream);
> +bool is_literal(const char *text, long long value);
> +bool is_str_literal(const char *text, const char *value);
> +void kunit_assert_hexdump(struct string_stream *stream,
> + const void *buf,
> + const void *compared_buf,
> + const size_t len);
> +#endif
> +
>  #endif /*  _KUNIT_ASSERT_H */
> diff --git a/lib/kunit/Makefile b/lib/kunit/Makefile
> index 309659a32a78..be7c9903936f 100644
> --- a/lib/kunit/Makefile
> +++ b/lib/kunit/Makefile
> @@ -18,6 +18,7 @@ endif
>  obj-y +=   hooks.o
>
>  obj-$(CONFIG_KUNIT_TEST) +=kunit-test.o
> +obj-$(CONFIG_KUNIT_TEST) +=assert_test.o
>
>  # string-stream-test compiles built-in only.
>  ifeq ($(CONFIG_KUNIT_TEST),y)
> diff --git a/lib/kunit/assert.c b/lib/kunit/assert.c
> index dd1d633d0fe2..382eb409d34b 100644
> --- a/lib/kunit/assert.c
> +++ b/lib/kunit/assert.c
> @@ -7,6 +7,7 @@
>   */
>  #include 
>  #include 
> +#include 
>
>  #include "string-stream.h"
>
> @@ -30,12 +31,14 @@ void kunit_assert_prologue(const struct kunit_loc *loc,
>  }
>  EXPORT_SYMBOL_GPL(kunit_assert_prologue);
>
> -static void kunit_assert_print_msg(const struct va_format *message,
> -  struct string_stream *stream)
> +VISIBLE_IF_KUNIT
> +void kunit_assert_print_msg(const struct va_format *message,
> +   struct string_stream *stream)
>  {
> if (message->fmt)
> string_stream_add(stream, "\n%pV", message);
>  }
> +EXPORT_SYMBOL_IF_KUNIT(kunit_assert_print_msg);
>
>  void kunit_fail_assert_format(const 

Re: [PATCH net-next v9 00/14] Device Memory TCP

2024-05-13 Thread Jakub Kicinski
On Fri, 10 May 2024 16:21:11 -0700 Mina Almasry wrote:
> Device Memory TCP

Sorry Mina, this is too big to apply during the merge window :(
-- 
pw-bot: defer



Re: [net-next PATCH] test: hsr: Extend the hsr_redbox.sh to have more SAN devices connected

2024-05-13 Thread patchwork-bot+netdevbpf
Hello:

This patch was applied to netdev/net-next.git (main)
by Jakub Kicinski :

On Fri, 10 May 2024 16:37:10 +0200 you wrote:
> After this change the single SAN device (ns3eth1) is now replaced with
> two SAN devices - respectively ns4eth1 and ns5eth1.
> 
> It is possible to extend this script to have more SAN devices connected
> by adding them to ns3br1 bridge.
> 
> Signed-off-by: Lukasz Majewski 
> 
> [...]

Here is the summary with links:
  - [net-next] test: hsr: Extend the hsr_redbox.sh to have more SAN devices 
connected
https://git.kernel.org/netdev/net-next/c/eafbf0574e05

You are awesome, thank you!
-- 
Deet-doot-dot, I am a bot.
https://korg.docs.kernel.org/patchwork/pwbot.html





Re: [PATCH net-next v10 0/3] net: gro: remove network_header use, move p->{flush/flush_id} calculations to L4

2024-05-13 Thread patchwork-bot+netdevbpf
Hello:

This series was applied to netdev/net-next.git (main)
by Jakub Kicinski :

On Thu,  9 May 2024 21:08:16 +0200 you wrote:
> The cb fields network_offset and inner_network_offset are used instead of
> skb->network_header throughout GRO.
> 
> These fields are then leveraged in the next commit to remove flush_id state
> from napi_gro_cb, and stateful code in {ipv6,inet}_gro_receive which may be
> unnecessarily complicated due to encapsulation support in GRO. These fields
> are checked in L4 instead.
> 
> [...]

Here is the summary with links:
  - [net-next,v10,1/3] net: gro: use cb instead of skb->network_header
https://git.kernel.org/netdev/net-next/c/186b1ea73ad8
  - [net-next,v10,2/3] net: gro: move L3 flush checks to tcp_gro_receive and 
udp_gro_receive_segment
https://git.kernel.org/netdev/net-next/c/4b0ebbca3e16
  - [net-next,v10,3/3] selftests/net: add flush id selftests
https://git.kernel.org/netdev/net-next/c/bc21faefbe58

You are awesome, thank you!
-- 
Deet-doot-dot, I am a bot.
https://korg.docs.kernel.org/patchwork/pwbot.html





Re: [PATCH v3 10/29] riscv/mm : ensure PROT_WRITE leads to VM_READ | VM_WRITE

2024-05-13 Thread Charlie Jenkins
On Mon, May 13, 2024 at 11:41:34AM -0700, Deepak Gupta wrote:
> On Mon, May 13, 2024 at 11:36:49AM -0700, Charlie Jenkins wrote:
> > On Mon, May 13, 2024 at 10:47:25AM -0700, Deepak Gupta wrote:
> > > On Fri, May 10, 2024 at 02:02:54PM -0700, Charlie Jenkins wrote:
> > > > On Wed, Apr 03, 2024 at 04:34:58PM -0700, Deepak Gupta wrote:
> > > > > `arch_calc_vm_prot_bits` is implemented on risc-v to return VM_READ |
> > > > > VM_WRITE if PROT_WRITE is specified. Similarly `riscv_sys_mmap` is
> > > > > updated to convert all incoming PROT_WRITE to (PROT_WRITE | 
> > > > > PROT_READ).
> > > > > This is to make sure that any existing apps using PROT_WRITE still 
> > > > > work.
> > > > >
> > > > > Earlier `protection_map[VM_WRITE]` used to pick read-write PTE 
> > > > > encodings.
> > > > > Now `protection_map[VM_WRITE]` will always pick PAGE_SHADOWSTACK PTE
> > > > > encodings for shadow stack. Above changes ensure that existing apps
> > > > > continue to work because underneath kernel will be picking
> > > > > `protection_map[VM_WRITE|VM_READ]` PTE encodings.
> > > > >
> > > > > Signed-off-by: Deepak Gupta 
> > > > > ---
> > > > >  arch/riscv/include/asm/mman.h| 24 
> > > > >  arch/riscv/include/asm/pgtable.h |  1 +
> > > > >  arch/riscv/kernel/sys_riscv.c| 11 +++
> > > > >  arch/riscv/mm/init.c |  2 +-
> > > > >  mm/mmap.c|  1 +
> > > > >  5 files changed, 38 insertions(+), 1 deletion(-)
> > > > >  create mode 100644 arch/riscv/include/asm/mman.h
> > > > >
> > > > > diff --git a/arch/riscv/include/asm/mman.h 
> > > > > b/arch/riscv/include/asm/mman.h
> > > > > new file mode 100644
> > > > > index ..ef9fedf32546
> > > > > --- /dev/null
> > > > > +++ b/arch/riscv/include/asm/mman.h
> > > > > @@ -0,0 +1,24 @@
> > > > > +/* SPDX-License-Identifier: GPL-2.0 */
> > > > > +#ifndef __ASM_MMAN_H__
> > > > > +#define __ASM_MMAN_H__
> > > > > +
> > > > > +#include 
> > > > > +#include 
> > > > > +#include 
> > > > > +
> > > > > +static inline unsigned long arch_calc_vm_prot_bits(unsigned long 
> > > > > prot,
> > > > > + unsigned long pkey __always_unused)
> > > > > +{
> > > > > + unsigned long ret = 0;
> > > > > +
> > > > > + /*
> > > > > +  * If PROT_WRITE was specified, force it to VM_READ | VM_WRITE.
> > > > > +  * Only VM_WRITE means shadow stack.
> > > > > +  */
> > > > > + if (prot & PROT_WRITE)
> > > > > + ret = (VM_READ | VM_WRITE);
> > > > > + return ret;
> > > > > +}
> > > > > +#define arch_calc_vm_prot_bits(prot, pkey) 
> > > > > arch_calc_vm_prot_bits(prot, pkey)
> > > > > +
> > > > > +#endif /* ! __ASM_MMAN_H__ */
> > > > > diff --git a/arch/riscv/include/asm/pgtable.h 
> > > > > b/arch/riscv/include/asm/pgtable.h
> > > > > index 6066822e7396..4d5983bc6766 100644
> > > > > --- a/arch/riscv/include/asm/pgtable.h
> > > > > +++ b/arch/riscv/include/asm/pgtable.h
> > > > > @@ -184,6 +184,7 @@ extern struct pt_alloc_ops pt_ops __initdata;
> > > > >  #define PAGE_READ_EXEC   __pgprot(_PAGE_BASE | 
> > > > > _PAGE_READ | _PAGE_EXEC)
> > > > >  #define PAGE_WRITE_EXEC  __pgprot(_PAGE_BASE | 
> > > > > _PAGE_READ |  \
> > > > >_PAGE_EXEC | _PAGE_WRITE)
> > > > > +#define PAGE_SHADOWSTACK   __pgprot(_PAGE_BASE | _PAGE_WRITE)
> > > > >
> > > > >  #define PAGE_COPYPAGE_READ
> > > > >  #define PAGE_COPY_EXEC   PAGE_READ_EXEC
> > > > > diff --git a/arch/riscv/kernel/sys_riscv.c 
> > > > > b/arch/riscv/kernel/sys_riscv.c
> > > > > index f1c1416a9f1e..846c36b1b3d5 100644
> > > > > --- a/arch/riscv/kernel/sys_riscv.c
> > > > > +++ b/arch/riscv/kernel/sys_riscv.c
> > > > > @@ -8,6 +8,8 @@
> > > > >  #include 
> > > > >  #include 
> > > > >  #include 
> > > > > +#include 
> > > > > +#include 
> > > > >
> > > > >  static long riscv_sys_mmap(unsigned long addr, unsigned long len,
> > > > >  unsigned long prot, unsigned long flags,
> > > > > @@ -17,6 +19,15 @@ static long riscv_sys_mmap(unsigned long addr, 
> > > > > unsigned long len,
> > > > >   if (unlikely(offset & (~PAGE_MASK >> page_shift_offset)))
> > > > >   return -EINVAL;
> > > > >
> > > > > + /*
> > > > > +  * If only PROT_WRITE is specified then extend that to PROT_READ
> > > > > +  * protection_map[VM_WRITE] is now going to select shadow stack 
> > > > > encodings.
> > > > > +  * So specifying PROT_WRITE actually should select 
> > > > > protection_map [VM_WRITE | VM_READ]
> > > > > +  * If user wants to create shadow stack then they should use 
> > > > > `map_shadow_stack` syscall.
> > > > > +  */
> > > > > + if (unlikely((prot & PROT_WRITE) && !(prot & PROT_READ)))
> > > >
> > > > The comments says that this should extend to PROT_READ if only
> > > > PROT_WRITE is specified. This condition instead is checking if
> > > > PROT_WRITE is selected but PROT_READ is not. If prot 

Re: [PATCH] selftest: epoll_busy_poll: Fix spelling mistake "couldnt" -> "couldn't"

2024-05-13 Thread patchwork-bot+netdevbpf
Hello:

This patch was applied to netdev/net-next.git (main)
by Jakub Kicinski :

On Fri, 10 May 2024 09:48:11 +0100 you wrote:
> There is a spelling mistake in a TH_LOG message. Fix it.
> 
> Signed-off-by: Colin Ian King 
> ---
>  tools/testing/selftests/net/epoll_busy_poll.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)

Here is the summary with links:
  - selftest: epoll_busy_poll: Fix spelling mistake "couldnt" -> "couldn't"
https://git.kernel.org/netdev/net-next/c/f37dc28ac6e2

You are awesome, thank you!
-- 
Deet-doot-dot, I am a bot.
https://korg.docs.kernel.org/patchwork/pwbot.html





Re: [PATCH net-next] selftests: net: use upstream mtools

2024-05-13 Thread patchwork-bot+netdevbpf
Hello:

This patch was applied to netdev/net-next.git (main)
by Jakub Kicinski :

On Fri, 10 May 2024 14:28:56 +0300 you wrote:
> Joachim kindly merged the IPv6 support in
> https://github.com/troglobit/mtools/pull/2, so we can just use his
> version now. A few more fixes subsequently came in for IPv6, so even
> better.
> 
> Check that the deployed mtools version is 3.0 or above. Note that the
> version check breaks compatibility with my fork where I didn't bump the
> version, but I assume that won't be a problem.
> 
> [...]

Here is the summary with links:
  - [net-next] selftests: net: use upstream mtools
https://git.kernel.org/netdev/net-next/c/cfc2eefd40f1

You are awesome, thank you!
-- 
Deet-doot-dot, I am a bot.
https://korg.docs.kernel.org/patchwork/pwbot.html





[PATCH v1] memfd: `MFD_NOEXEC_SEAL` should not imply `MFD_ALLOW_SEALING`

2024-05-13 Thread Barnabás Pőcze
`MFD_NOEXEC_SEAL` should remove the executable bits and set
`F_SEAL_EXEC` to prevent further modifications to the executable
bits as per the comment in the uapi header file:

  not executable and sealed to prevent changing to executable

However, currently, it also unsets `F_SEAL_SEAL`, essentially
acting as a superset of `MFD_ALLOW_SEALING`. Nothing implies
that it should be so, and indeed up until the second version
of the of the patchset[0] that introduced `MFD_EXEC` and
`MFD_NOEXEC_SEAL`, `F_SEAL_SEAL` was not removed, however it
was changed in the third revision of the patchset[1] without
a clear explanation.

This behaviour is suprising for application developers,
there is no documentation that would reveal that `MFD_NOEXEC_SEAL`
has the additional effect of `MFD_ALLOW_SEALING`.

So do not remove `F_SEAL_SEAL` when `MFD_NOEXEC_SEAL` is requested.
This is technically an ABI break, but it seems very unlikely that an
application would depend on this behaviour (unless by accident).

[0]: https://lore.kernel.org/lkml/20220805222126.142525-3-jef...@google.com/
[1]: https://lore.kernel.org/lkml/20221202013404.163143-3-jef...@google.com/

Fixes: 105ff5339f498a ("mm/memfd: add MFD_NOEXEC_SEAL and MFD_EXEC")
Signed-off-by: Barnabás Pőcze 
---

Or did I miss the explanation as to why MFD_NOEXEC_SEAL should
imply MFD_ALLOW_SEALING? If so, please direct me to it and
sorry for the noise.

---
 mm/memfd.c | 9 -
 tools/testing/selftests/memfd/memfd_test.c | 2 +-
 2 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/mm/memfd.c b/mm/memfd.c
index 7d8d3ab3fa37..8b7f6afee21d 100644
--- a/mm/memfd.c
+++ b/mm/memfd.c
@@ -356,12 +356,11 @@ SYSCALL_DEFINE2(memfd_create,
 
inode->i_mode &= ~0111;
file_seals = memfd_file_seals_ptr(file);
-   if (file_seals) {
-   *file_seals &= ~F_SEAL_SEAL;
+   if (file_seals)
*file_seals |= F_SEAL_EXEC;
-   }
-   } else if (flags & MFD_ALLOW_SEALING) {
-   /* MFD_EXEC and MFD_ALLOW_SEALING are set */
+   }
+
+   if (flags & MFD_ALLOW_SEALING) {
file_seals = memfd_file_seals_ptr(file);
if (file_seals)
*file_seals &= ~F_SEAL_SEAL;
diff --git a/tools/testing/selftests/memfd/memfd_test.c 
b/tools/testing/selftests/memfd/memfd_test.c
index 18f585684e20..b6a7ad68c3c1 100644
--- a/tools/testing/selftests/memfd/memfd_test.c
+++ b/tools/testing/selftests/memfd/memfd_test.c
@@ -1151,7 +1151,7 @@ static void test_noexec_seal(void)
mfd_def_size,
MFD_CLOEXEC | MFD_NOEXEC_SEAL);
mfd_assert_mode(fd, 0666);
-   mfd_assert_has_seals(fd, F_SEAL_EXEC);
+   mfd_assert_has_seals(fd, F_SEAL_SEAL | F_SEAL_EXEC);
mfd_fail_chmod(fd, 0777);
close(fd);
 }
-- 
2.45.0





Re: [PATCH v3 10/29] riscv/mm : ensure PROT_WRITE leads to VM_READ | VM_WRITE

2024-05-13 Thread Deepak Gupta

On Mon, May 13, 2024 at 11:36:49AM -0700, Charlie Jenkins wrote:

On Mon, May 13, 2024 at 10:47:25AM -0700, Deepak Gupta wrote:

On Fri, May 10, 2024 at 02:02:54PM -0700, Charlie Jenkins wrote:
> On Wed, Apr 03, 2024 at 04:34:58PM -0700, Deepak Gupta wrote:
> > `arch_calc_vm_prot_bits` is implemented on risc-v to return VM_READ |
> > VM_WRITE if PROT_WRITE is specified. Similarly `riscv_sys_mmap` is
> > updated to convert all incoming PROT_WRITE to (PROT_WRITE | PROT_READ).
> > This is to make sure that any existing apps using PROT_WRITE still work.
> >
> > Earlier `protection_map[VM_WRITE]` used to pick read-write PTE encodings.
> > Now `protection_map[VM_WRITE]` will always pick PAGE_SHADOWSTACK PTE
> > encodings for shadow stack. Above changes ensure that existing apps
> > continue to work because underneath kernel will be picking
> > `protection_map[VM_WRITE|VM_READ]` PTE encodings.
> >
> > Signed-off-by: Deepak Gupta 
> > ---
> >  arch/riscv/include/asm/mman.h| 24 
> >  arch/riscv/include/asm/pgtable.h |  1 +
> >  arch/riscv/kernel/sys_riscv.c| 11 +++
> >  arch/riscv/mm/init.c |  2 +-
> >  mm/mmap.c|  1 +
> >  5 files changed, 38 insertions(+), 1 deletion(-)
> >  create mode 100644 arch/riscv/include/asm/mman.h
> >
> > diff --git a/arch/riscv/include/asm/mman.h b/arch/riscv/include/asm/mman.h
> > new file mode 100644
> > index ..ef9fedf32546
> > --- /dev/null
> > +++ b/arch/riscv/include/asm/mman.h
> > @@ -0,0 +1,24 @@
> > +/* SPDX-License-Identifier: GPL-2.0 */
> > +#ifndef __ASM_MMAN_H__
> > +#define __ASM_MMAN_H__
> > +
> > +#include 
> > +#include 
> > +#include 
> > +
> > +static inline unsigned long arch_calc_vm_prot_bits(unsigned long prot,
> > + unsigned long pkey __always_unused)
> > +{
> > + unsigned long ret = 0;
> > +
> > + /*
> > +  * If PROT_WRITE was specified, force it to VM_READ | VM_WRITE.
> > +  * Only VM_WRITE means shadow stack.
> > +  */
> > + if (prot & PROT_WRITE)
> > + ret = (VM_READ | VM_WRITE);
> > + return ret;
> > +}
> > +#define arch_calc_vm_prot_bits(prot, pkey) arch_calc_vm_prot_bits(prot, 
pkey)
> > +
> > +#endif /* ! __ASM_MMAN_H__ */
> > diff --git a/arch/riscv/include/asm/pgtable.h 
b/arch/riscv/include/asm/pgtable.h
> > index 6066822e7396..4d5983bc6766 100644
> > --- a/arch/riscv/include/asm/pgtable.h
> > +++ b/arch/riscv/include/asm/pgtable.h
> > @@ -184,6 +184,7 @@ extern struct pt_alloc_ops pt_ops __initdata;
> >  #define PAGE_READ_EXEC   __pgprot(_PAGE_BASE | _PAGE_READ | 
_PAGE_EXEC)
> >  #define PAGE_WRITE_EXEC  __pgprot(_PAGE_BASE | _PAGE_READ |
  \
> >_PAGE_EXEC | _PAGE_WRITE)
> > +#define PAGE_SHADOWSTACK   __pgprot(_PAGE_BASE | _PAGE_WRITE)
> >
> >  #define PAGE_COPYPAGE_READ
> >  #define PAGE_COPY_EXEC   PAGE_READ_EXEC
> > diff --git a/arch/riscv/kernel/sys_riscv.c b/arch/riscv/kernel/sys_riscv.c
> > index f1c1416a9f1e..846c36b1b3d5 100644
> > --- a/arch/riscv/kernel/sys_riscv.c
> > +++ b/arch/riscv/kernel/sys_riscv.c
> > @@ -8,6 +8,8 @@
> >  #include 
> >  #include 
> >  #include 
> > +#include 
> > +#include 
> >
> >  static long riscv_sys_mmap(unsigned long addr, unsigned long len,
> >  unsigned long prot, unsigned long flags,
> > @@ -17,6 +19,15 @@ static long riscv_sys_mmap(unsigned long addr, unsigned 
long len,
> >   if (unlikely(offset & (~PAGE_MASK >> page_shift_offset)))
> >   return -EINVAL;
> >
> > + /*
> > +  * If only PROT_WRITE is specified then extend that to PROT_READ
> > +  * protection_map[VM_WRITE] is now going to select shadow stack 
encodings.
> > +  * So specifying PROT_WRITE actually should select protection_map 
[VM_WRITE | VM_READ]
> > +  * If user wants to create shadow stack then they should use 
`map_shadow_stack` syscall.
> > +  */
> > + if (unlikely((prot & PROT_WRITE) && !(prot & PROT_READ)))
>
> The comments says that this should extend to PROT_READ if only
> PROT_WRITE is specified. This condition instead is checking if
> PROT_WRITE is selected but PROT_READ is not. If prot is (VM_EXEC |
> VM_WRITE) then it would be extended to (VM_EXEC | VM_WRITE | VM_READ).
> This will not currently cause any issues because these both map to the
> same value in the protection_map PAGE_COPY_EXEC, however this seems to
> be not the intention of this change.
>
> prot == PROT_WRITE better suits the condition explained in the comment.

If someone specifies this (PROT_EXEC | PROT_WRITE) today, it works because
of the way permissions are setup in `protection_map`. On risc-v there is no
way to have a page which is execute and write only. So expectation is that
if some apps were using `PROT_EXEC | PROT_WRITE` today, they were working
because internally it was translating to read, write and execute on page
permissions level. This patch make sure that, 

Re: [PATCH v3 10/29] riscv/mm : ensure PROT_WRITE leads to VM_READ | VM_WRITE

2024-05-13 Thread Charlie Jenkins
On Mon, May 13, 2024 at 10:47:25AM -0700, Deepak Gupta wrote:
> On Fri, May 10, 2024 at 02:02:54PM -0700, Charlie Jenkins wrote:
> > On Wed, Apr 03, 2024 at 04:34:58PM -0700, Deepak Gupta wrote:
> > > `arch_calc_vm_prot_bits` is implemented on risc-v to return VM_READ |
> > > VM_WRITE if PROT_WRITE is specified. Similarly `riscv_sys_mmap` is
> > > updated to convert all incoming PROT_WRITE to (PROT_WRITE | PROT_READ).
> > > This is to make sure that any existing apps using PROT_WRITE still work.
> > > 
> > > Earlier `protection_map[VM_WRITE]` used to pick read-write PTE encodings.
> > > Now `protection_map[VM_WRITE]` will always pick PAGE_SHADOWSTACK PTE
> > > encodings for shadow stack. Above changes ensure that existing apps
> > > continue to work because underneath kernel will be picking
> > > `protection_map[VM_WRITE|VM_READ]` PTE encodings.
> > > 
> > > Signed-off-by: Deepak Gupta 
> > > ---
> > >  arch/riscv/include/asm/mman.h| 24 
> > >  arch/riscv/include/asm/pgtable.h |  1 +
> > >  arch/riscv/kernel/sys_riscv.c| 11 +++
> > >  arch/riscv/mm/init.c |  2 +-
> > >  mm/mmap.c|  1 +
> > >  5 files changed, 38 insertions(+), 1 deletion(-)
> > >  create mode 100644 arch/riscv/include/asm/mman.h
> > > 
> > > diff --git a/arch/riscv/include/asm/mman.h b/arch/riscv/include/asm/mman.h
> > > new file mode 100644
> > > index ..ef9fedf32546
> > > --- /dev/null
> > > +++ b/arch/riscv/include/asm/mman.h
> > > @@ -0,0 +1,24 @@
> > > +/* SPDX-License-Identifier: GPL-2.0 */
> > > +#ifndef __ASM_MMAN_H__
> > > +#define __ASM_MMAN_H__
> > > +
> > > +#include 
> > > +#include 
> > > +#include 
> > > +
> > > +static inline unsigned long arch_calc_vm_prot_bits(unsigned long prot,
> > > + unsigned long pkey __always_unused)
> > > +{
> > > + unsigned long ret = 0;
> > > +
> > > + /*
> > > +  * If PROT_WRITE was specified, force it to VM_READ | VM_WRITE.
> > > +  * Only VM_WRITE means shadow stack.
> > > +  */
> > > + if (prot & PROT_WRITE)
> > > + ret = (VM_READ | VM_WRITE);
> > > + return ret;
> > > +}
> > > +#define arch_calc_vm_prot_bits(prot, pkey) arch_calc_vm_prot_bits(prot, 
> > > pkey)
> > > +
> > > +#endif /* ! __ASM_MMAN_H__ */
> > > diff --git a/arch/riscv/include/asm/pgtable.h 
> > > b/arch/riscv/include/asm/pgtable.h
> > > index 6066822e7396..4d5983bc6766 100644
> > > --- a/arch/riscv/include/asm/pgtable.h
> > > +++ b/arch/riscv/include/asm/pgtable.h
> > > @@ -184,6 +184,7 @@ extern struct pt_alloc_ops pt_ops __initdata;
> > >  #define PAGE_READ_EXEC   __pgprot(_PAGE_BASE | _PAGE_READ | 
> > > _PAGE_EXEC)
> > >  #define PAGE_WRITE_EXEC  __pgprot(_PAGE_BASE | _PAGE_READ |  
> > > \
> > >_PAGE_EXEC | _PAGE_WRITE)
> > > +#define PAGE_SHADOWSTACK   __pgprot(_PAGE_BASE | _PAGE_WRITE)
> > > 
> > >  #define PAGE_COPYPAGE_READ
> > >  #define PAGE_COPY_EXEC   PAGE_READ_EXEC
> > > diff --git a/arch/riscv/kernel/sys_riscv.c b/arch/riscv/kernel/sys_riscv.c
> > > index f1c1416a9f1e..846c36b1b3d5 100644
> > > --- a/arch/riscv/kernel/sys_riscv.c
> > > +++ b/arch/riscv/kernel/sys_riscv.c
> > > @@ -8,6 +8,8 @@
> > >  #include 
> > >  #include 
> > >  #include 
> > > +#include 
> > > +#include 
> > > 
> > >  static long riscv_sys_mmap(unsigned long addr, unsigned long len,
> > >  unsigned long prot, unsigned long flags,
> > > @@ -17,6 +19,15 @@ static long riscv_sys_mmap(unsigned long addr, 
> > > unsigned long len,
> > >   if (unlikely(offset & (~PAGE_MASK >> page_shift_offset)))
> > >   return -EINVAL;
> > > 
> > > + /*
> > > +  * If only PROT_WRITE is specified then extend that to PROT_READ
> > > +  * protection_map[VM_WRITE] is now going to select shadow stack 
> > > encodings.
> > > +  * So specifying PROT_WRITE actually should select protection_map 
> > > [VM_WRITE | VM_READ]
> > > +  * If user wants to create shadow stack then they should use 
> > > `map_shadow_stack` syscall.
> > > +  */
> > > + if (unlikely((prot & PROT_WRITE) && !(prot & PROT_READ)))
> > 
> > The comments says that this should extend to PROT_READ if only
> > PROT_WRITE is specified. This condition instead is checking if
> > PROT_WRITE is selected but PROT_READ is not. If prot is (VM_EXEC |
> > VM_WRITE) then it would be extended to (VM_EXEC | VM_WRITE | VM_READ).
> > This will not currently cause any issues because these both map to the
> > same value in the protection_map PAGE_COPY_EXEC, however this seems to
> > be not the intention of this change.
> > 
> > prot == PROT_WRITE better suits the condition explained in the comment.
> 
> If someone specifies this (PROT_EXEC | PROT_WRITE) today, it works because
> of the way permissions are setup in `protection_map`. On risc-v there is no
> way to have a page which is execute and write only. So expectation is that
> if some apps were using `PROT_EXEC | PROT_WRITE` today, they were working
> because 

Re: [PATCH v3 02/29] riscv: define default value for envcfg for task

2024-05-13 Thread Deepak Gupta

On Fri, May 10, 2024 at 03:33:36PM -0700, Charlie Jenkins wrote:

On Wed, Apr 03, 2024 at 04:34:50PM -0700, Deepak Gupta wrote:

Defines a base default value for envcfg per task. By default all tasks
should have cache zeroing capability. Any future base capabilities that
apply to all tasks can be turned on same way.

Signed-off-by: Deepak Gupta 
---
 arch/riscv/include/asm/csr.h | 2 ++
 arch/riscv/kernel/process.c  | 6 ++
 2 files changed, 8 insertions(+)

diff --git a/arch/riscv/include/asm/csr.h b/arch/riscv/include/asm/csr.h
index 2468c55933cd..bbd2207adb39 100644
--- a/arch/riscv/include/asm/csr.h
+++ b/arch/riscv/include/asm/csr.h
@@ -202,6 +202,8 @@
 #define ENVCFG_CBIE_FLUSH  _AC(0x1, UL)
 #define ENVCFG_CBIE_INV_AC(0x3, UL)
 #define ENVCFG_FIOM_AC(0x1, UL)
+/* by default all threads should be able to zero cache */
+#define ENVCFG_BASEENVCFG_CBZE

 /* Smstateen bits */
 #define SMSTATEEN0_AIA_IMSIC_SHIFT 58
diff --git a/arch/riscv/kernel/process.c b/arch/riscv/kernel/process.c
index 92922dbd5b5c..d3109557f951 100644
--- a/arch/riscv/kernel/process.c
+++ b/arch/riscv/kernel/process.c
@@ -152,6 +152,12 @@ void start_thread(struct pt_regs *regs, unsigned long pc,
else
regs->status |= SR_UXL_64;
 #endif
+   /*
+* read current envcfg settings, AND it with base settings applicable
+* for all the tasks. Base settings should've been set up during CPU
+* bring up.
+*/
+   current->thread_info.envcfg = csr_read(CSR_ENVCFG) & ENVCFG_BASE;


This needs to be gated on xlinuxenvcfg.


You're right. This csr read should be gated on xlinuxenvcfg. Will fix it.



- Charlie


 }

 void flush_thread(void)
--
2.43.2





Re: [PATCH v3 17/29] prctl: arch-agnostic prctl for indirect branch tracking

2024-05-13 Thread Deepak Gupta

On Fri, May 10, 2024 at 04:29:19PM -0700, Charlie Jenkins wrote:

On Wed, Apr 03, 2024 at 04:35:05PM -0700, Deepak Gupta wrote:

Three architectures (x86, aarch64, riscv) have support for indirect branch
tracking feature in a very similar fashion. On a very high level, indirect
branch tracking is a CPU feature where CPU tracks branches which uses
memory operand to perform control transfer in program. As part of this
tracking on indirect branches, CPU goes in a state where it expects a
landing pad instr on target and if not found then CPU raises some fault
(architecture dependent)

x86 landing pad instr - `ENDBRANCH`
aarch64 landing pad instr - `BTI`
riscv landing instr - `lpad`

Given that three major arches have support for indirect branch tracking,
This patch makes `prctl` for indirect branch tracking arch agnostic.

To allow userspace to enable this feature for itself, following prtcls are
defined:
 - PR_GET_INDIR_BR_LP_STATUS: Gets current configured status for indirect
   branch tracking.
 - PR_SET_INDIR_BR_LP_STATUS: Sets a configuration for indirect branch
   tracking.
   Following status options are allowed
   - PR_INDIR_BR_LP_ENABLE: Enables indirect branch tracking on user
 thread.
   - PR_INDIR_BR_LP_DISABLE; Disables indirect branch tracking on user
 thread.
 - PR_LOCK_INDIR_BR_LP_STATUS: Locks configured status for indirect branch
   tracking for user thread.

Signed-off-by: Deepak Gupta 
---
 include/uapi/linux/prctl.h | 27 +++
 kernel/sys.c   | 30 ++
 2 files changed, 57 insertions(+)

diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h
index 3c66ed8f46d8..b7a8212a068e 100644
--- a/include/uapi/linux/prctl.h
+++ b/include/uapi/linux/prctl.h
@@ -328,4 +328,31 @@ struct prctl_mm_map {
  */
 #define PR_LOCK_SHADOW_STACK_STATUS  73

+/*
+ * Get the current indirect branch tracking configuration for the current
+ * thread, this will be the value configured via PR_SET_INDIR_BR_LP_STATUS.
+ */
+#define PR_GET_INDIR_BR_LP_STATUS  74
+
+/*
+ * Set the indirect branch tracking configuration. PR_INDIR_BR_LP_ENABLE will
+ * enable cpu feature for user thread, to track all indirect branches and 
ensure
+ * they land on arch defined landing pad instruction.
+ * x86 - If enabled, an indirect branch must land on `ENDBRANCH` instruction.
+ * arch64 - If enabled, an indirect branch must land on `BTI` instruction.
+ * riscv - If enabled, an indirect branch must land on `lpad` instruction.
+ * PR_INDIR_BR_LP_DISABLE will disable feature for user thread and indirect
+ * branches will no more be tracked by cpu to land on arch defined landing pad
+ * instruction.
+ */
+#define PR_SET_INDIR_BR_LP_STATUS  75
+# define PR_INDIR_BR_LP_ENABLE(1UL << 0)
+
+/*
+ * Prevent further changes to the specified indirect branch tracking
+ * configuration.  All bits may be locked via this call, including
+ * undefined bits.
+ */
+#define PR_LOCK_INDIR_BR_LP_STATUS  76
+
 #endif /* _LINUX_PRCTL_H */
diff --git a/kernel/sys.c b/kernel/sys.c
index 242e9f147791..c770060c3f06 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -2330,6 +2330,21 @@ int __weak arch_lock_shadow_stack_status(struct 
task_struct *t, unsigned long st
return -EINVAL;
 }

+int __weak arch_get_indir_br_lp_status(struct task_struct *t, unsigned long 
__user *status)
+{
+   return -EINVAL;
+}
+
+int __weak arch_set_indir_br_lp_status(struct task_struct *t, unsigned long 
__user *status)
+{
+   return -EINVAL;
+}
+
+int __weak arch_lock_indir_br_lp_status(struct task_struct *t, unsigned long 
__user *status)
+{
+   return -EINVAL;
+}
+


These weak references each cause a warning:

kernel/sys.c:2333:12: warning: no previous prototype for 
'arch_get_indir_br_lp_status' [-Wmissing-prototypes]
2333 | int __weak arch_get_indir_br_lp_status(struct task_struct *t, unsigned 
long __user *status)
 |^~~
kernel/sys.c:2338:12: warning: no previous prototype for 
'arch_set_indir_br_lp_status' [-Wmissing-prototypes]
2338 | int __weak arch_set_indir_br_lp_status(struct task_struct *t, unsigned 
long __user *status)
 |^~~
kernel/sys.c:2343:12: warning: no previous prototype for 
'arch_lock_indir_br_lp_status' [-Wmissing-prototypes]
2343 | int __weak arch_lock_indir_br_lp_status(struct task_struct *t, unsigned 
long __user *status)

Can the definitions be added to include/linux/mm.h alongside the
*_shadow_stack_status() definitions?


Noted. Will work on a fix for this.



- Charlie


 #define PR_IO_FLUSHER (PF_MEMALLOC_NOIO | PF_LOCAL_THROTTLE)

 #ifdef CONFIG_ANON_VMA_NAME
@@ -2787,6 +2802,21 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, 
unsigned long, arg3,
return -EINVAL;
error = arch_lock_shadow_stack_status(me, arg2);
break;
+   case PR_GET_INDIR_BR_LP_STATUS:
+ 

Re: [PATCH v3 10/29] riscv/mm : ensure PROT_WRITE leads to VM_READ | VM_WRITE

2024-05-13 Thread Deepak Gupta

On Sun, May 12, 2024 at 06:24:45PM +0200, Alexandre Ghiti wrote:

Hi Deepak,

On 04/04/2024 01:34, Deepak Gupta wrote:

`arch_calc_vm_prot_bits` is implemented on risc-v to return VM_READ |
VM_WRITE if PROT_WRITE is specified. Similarly `riscv_sys_mmap` is
updated to convert all incoming PROT_WRITE to (PROT_WRITE | PROT_READ).
This is to make sure that any existing apps using PROT_WRITE still work.

Earlier `protection_map[VM_WRITE]` used to pick read-write PTE encodings.
Now `protection_map[VM_WRITE]` will always pick PAGE_SHADOWSTACK PTE
encodings for shadow stack. Above changes ensure that existing apps
continue to work because underneath kernel will be picking
`protection_map[VM_WRITE|VM_READ]` PTE encodings.

Signed-off-by: Deepak Gupta 
---
 arch/riscv/include/asm/mman.h| 24 
 arch/riscv/include/asm/pgtable.h |  1 +
 arch/riscv/kernel/sys_riscv.c| 11 +++
 arch/riscv/mm/init.c |  2 +-
 mm/mmap.c|  1 +
 5 files changed, 38 insertions(+), 1 deletion(-)
 create mode 100644 arch/riscv/include/asm/mman.h

diff --git a/arch/riscv/include/asm/mman.h b/arch/riscv/include/asm/mman.h
new file mode 100644
index ..ef9fedf32546
--- /dev/null
+++ b/arch/riscv/include/asm/mman.h
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_MMAN_H__
+#define __ASM_MMAN_H__
+
+#include 
+#include 
+#include 
+
+static inline unsigned long arch_calc_vm_prot_bits(unsigned long prot,
+   unsigned long pkey __always_unused)
+{
+   unsigned long ret = 0;
+
+   /*
+* If PROT_WRITE was specified, force it to VM_READ | VM_WRITE.
+* Only VM_WRITE means shadow stack.
+*/
+   if (prot & PROT_WRITE)
+   ret = (VM_READ | VM_WRITE);
+   return ret;
+}
+#define arch_calc_vm_prot_bits(prot, pkey) arch_calc_vm_prot_bits(prot, pkey)
+
+#endif /* ! __ASM_MMAN_H__ */
diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h
index 6066822e7396..4d5983bc6766 100644
--- a/arch/riscv/include/asm/pgtable.h
+++ b/arch/riscv/include/asm/pgtable.h
@@ -184,6 +184,7 @@ extern struct pt_alloc_ops pt_ops __initdata;
 #define PAGE_READ_EXEC __pgprot(_PAGE_BASE | _PAGE_READ | _PAGE_EXEC)
 #define PAGE_WRITE_EXEC__pgprot(_PAGE_BASE | _PAGE_READ |  
\
 _PAGE_EXEC | _PAGE_WRITE)
+#define PAGE_SHADOWSTACK   __pgprot(_PAGE_BASE | _PAGE_WRITE)
 #define PAGE_COPY  PAGE_READ
 #define PAGE_COPY_EXEC PAGE_READ_EXEC
diff --git a/arch/riscv/kernel/sys_riscv.c b/arch/riscv/kernel/sys_riscv.c
index f1c1416a9f1e..846c36b1b3d5 100644
--- a/arch/riscv/kernel/sys_riscv.c
+++ b/arch/riscv/kernel/sys_riscv.c
@@ -8,6 +8,8 @@
 #include 
 #include 
 #include 
+#include 
+#include 
 static long riscv_sys_mmap(unsigned long addr, unsigned long len,
   unsigned long prot, unsigned long flags,
@@ -17,6 +19,15 @@ static long riscv_sys_mmap(unsigned long addr, unsigned long 
len,
if (unlikely(offset & (~PAGE_MASK >> page_shift_offset)))
return -EINVAL;
+   /*
+* If only PROT_WRITE is specified then extend that to PROT_READ
+* protection_map[VM_WRITE] is now going to select shadow stack 
encodings.
+* So specifying PROT_WRITE actually should select protection_map 
[VM_WRITE | VM_READ]
+* If user wants to create shadow stack then they should use 
`map_shadow_stack` syscall.
+*/
+   if (unlikely((prot & PROT_WRITE) && !(prot & PROT_READ)))
+   prot |= PROT_READ;
+
return ksys_mmap_pgoff(addr, len, prot, flags, fd,
   offset >> (PAGE_SHIFT - page_shift_offset));
 }
diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
index fa34cf55037b..98e5ece4052a 100644
--- a/arch/riscv/mm/init.c
+++ b/arch/riscv/mm/init.c
@@ -299,7 +299,7 @@ pgd_t early_pg_dir[PTRS_PER_PGD] __initdata 
__aligned(PAGE_SIZE);
 static const pgprot_t protection_map[16] = {
[VM_NONE]   = PAGE_NONE,
[VM_READ]   = PAGE_READ,
-   [VM_WRITE]  = PAGE_COPY,
+   [VM_WRITE]  = PAGE_SHADOWSTACK,
[VM_WRITE | VM_READ]= PAGE_COPY,
[VM_EXEC]   = PAGE_EXEC,
[VM_EXEC | VM_READ] = PAGE_READ_EXEC,
diff --git a/mm/mmap.c b/mm/mmap.c
index d89770eaab6b..57a974f49b00 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -47,6 +47,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 



What happens if someone restricts the permission to PROT_WRITE using 
mprotect()? I would say this is an issue since it would turn the pages 
into shadow stack pages.


look at this patch in this patch series.
"riscv/mm : ensure PROT_WRITE leads to VM_READ | 

Re: [PATCH v3 10/29] riscv/mm : ensure PROT_WRITE leads to VM_READ | VM_WRITE

2024-05-13 Thread Deepak Gupta

On Fri, May 10, 2024 at 02:02:54PM -0700, Charlie Jenkins wrote:

On Wed, Apr 03, 2024 at 04:34:58PM -0700, Deepak Gupta wrote:

`arch_calc_vm_prot_bits` is implemented on risc-v to return VM_READ |
VM_WRITE if PROT_WRITE is specified. Similarly `riscv_sys_mmap` is
updated to convert all incoming PROT_WRITE to (PROT_WRITE | PROT_READ).
This is to make sure that any existing apps using PROT_WRITE still work.

Earlier `protection_map[VM_WRITE]` used to pick read-write PTE encodings.
Now `protection_map[VM_WRITE]` will always pick PAGE_SHADOWSTACK PTE
encodings for shadow stack. Above changes ensure that existing apps
continue to work because underneath kernel will be picking
`protection_map[VM_WRITE|VM_READ]` PTE encodings.

Signed-off-by: Deepak Gupta 
---
 arch/riscv/include/asm/mman.h| 24 
 arch/riscv/include/asm/pgtable.h |  1 +
 arch/riscv/kernel/sys_riscv.c| 11 +++
 arch/riscv/mm/init.c |  2 +-
 mm/mmap.c|  1 +
 5 files changed, 38 insertions(+), 1 deletion(-)
 create mode 100644 arch/riscv/include/asm/mman.h

diff --git a/arch/riscv/include/asm/mman.h b/arch/riscv/include/asm/mman.h
new file mode 100644
index ..ef9fedf32546
--- /dev/null
+++ b/arch/riscv/include/asm/mman.h
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_MMAN_H__
+#define __ASM_MMAN_H__
+
+#include 
+#include 
+#include 
+
+static inline unsigned long arch_calc_vm_prot_bits(unsigned long prot,
+   unsigned long pkey __always_unused)
+{
+   unsigned long ret = 0;
+
+   /*
+* If PROT_WRITE was specified, force it to VM_READ | VM_WRITE.
+* Only VM_WRITE means shadow stack.
+*/
+   if (prot & PROT_WRITE)
+   ret = (VM_READ | VM_WRITE);
+   return ret;
+}
+#define arch_calc_vm_prot_bits(prot, pkey) arch_calc_vm_prot_bits(prot, pkey)
+
+#endif /* ! __ASM_MMAN_H__ */
diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h
index 6066822e7396..4d5983bc6766 100644
--- a/arch/riscv/include/asm/pgtable.h
+++ b/arch/riscv/include/asm/pgtable.h
@@ -184,6 +184,7 @@ extern struct pt_alloc_ops pt_ops __initdata;
 #define PAGE_READ_EXEC __pgprot(_PAGE_BASE | _PAGE_READ | _PAGE_EXEC)
 #define PAGE_WRITE_EXEC__pgprot(_PAGE_BASE | _PAGE_READ |  
\
 _PAGE_EXEC | _PAGE_WRITE)
+#define PAGE_SHADOWSTACK   __pgprot(_PAGE_BASE | _PAGE_WRITE)

 #define PAGE_COPY  PAGE_READ
 #define PAGE_COPY_EXEC PAGE_READ_EXEC
diff --git a/arch/riscv/kernel/sys_riscv.c b/arch/riscv/kernel/sys_riscv.c
index f1c1416a9f1e..846c36b1b3d5 100644
--- a/arch/riscv/kernel/sys_riscv.c
+++ b/arch/riscv/kernel/sys_riscv.c
@@ -8,6 +8,8 @@
 #include 
 #include 
 #include 
+#include 
+#include 

 static long riscv_sys_mmap(unsigned long addr, unsigned long len,
   unsigned long prot, unsigned long flags,
@@ -17,6 +19,15 @@ static long riscv_sys_mmap(unsigned long addr, unsigned long 
len,
if (unlikely(offset & (~PAGE_MASK >> page_shift_offset)))
return -EINVAL;

+   /*
+* If only PROT_WRITE is specified then extend that to PROT_READ
+* protection_map[VM_WRITE] is now going to select shadow stack 
encodings.
+* So specifying PROT_WRITE actually should select protection_map 
[VM_WRITE | VM_READ]
+* If user wants to create shadow stack then they should use 
`map_shadow_stack` syscall.
+*/
+   if (unlikely((prot & PROT_WRITE) && !(prot & PROT_READ)))


The comments says that this should extend to PROT_READ if only
PROT_WRITE is specified. This condition instead is checking if
PROT_WRITE is selected but PROT_READ is not. If prot is (VM_EXEC |
VM_WRITE) then it would be extended to (VM_EXEC | VM_WRITE | VM_READ).
This will not currently cause any issues because these both map to the
same value in the protection_map PAGE_COPY_EXEC, however this seems to
be not the intention of this change.

prot == PROT_WRITE better suits the condition explained in the comment.


If someone specifies this (PROT_EXEC | PROT_WRITE) today, it works because
of the way permissions are setup in `protection_map`. On risc-v there is no
way to have a page which is execute and write only. So expectation is that
if some apps were using `PROT_EXEC | PROT_WRITE` today, they were working
because internally it was translating to read, write and execute on page
permissions level. This patch make sure that, it stays same from page
permissions perspective.

If someone was using PROT_EXEC, it may translate to execute only and this change
doesn't impact that.

Patch simply looks for presence of `PROT_WRITE` and absence of `PROT_READ` in
protection flags and if that condition is satisfied, it assumes that caller 
assumed
page is going to be read allowed as well.





+   prot |= PROT_READ;
+
return ksys_mmap_pgoff(addr, len, prot, flags, fd,
 

[GIT PULL] Kselftest update for Linux 6.10-rc1

2024-05-13 Thread Shuah Khan

Hi Linus,

Please pull the kselftest update for Linux 6.10-rc1.

This kselftest update for Linux 6.10-rc1 consists of:

- changes to make framework and tests reporting KTAP compliant
- changes to make ktap_helpers and power_supply test POSIX compliant
- adds ksft_exit_fail_perror() to include errono in string form
- fixes to avoid clang reporting false positive static analysis errors
  about functions that exit and never return. ksft_exit* functions
  are marked __noreturn to address this problem
- adds mechanism for reporting a KSFT_ result code
- fixes to build warnings related missing headers and unused variables
- fixes to clang build failures
- cleanups to resctrl test
- adds host arch for LLVM builds

Please note that Stepen found the following conflict in
tools/testing/selftests/mm/soft-dirty.c in next and fixed it up.

between commit:

  258ff696db6b ("selftests/mm: soft-dirty should fail if a testcase fails")

from the mm-unstable branch of the mm tree and commit:

  e6162a96c81d ("selftests/mm: ksft_exit functions do not return")

from the kselftest tree.

Stepehen's fix taking the 258ff696db6b change to use ksft_finished()
looks good to me.

diff for pull request is attached.

thanks,
-- Shuah


The following changes since commit dd5a440a31fae6e459c0d627162825505361:

  Linux 6.9-rc7 (2024-05-05 14:06:01 -0700)

are available in the Git repository at:

  git://git.kernel.org/pub/scm/linux/kernel/git/shuah/linux-kselftest 
tags/linux_kselftest-next-6.10-rc1

for you to fetch changes up to 2c3b8f8f37c6c0c926d584cf4158db95e62b960c:

  selftests/sgx: Include KHDR_INCLUDES in Makefile (2024-05-08 17:08:46 -0600)


linux_kselftest-next-6.10-rc1

This kselftest update for Linux 6.10-rc1 consists of:

- changes to make framework and tests reporting KTAP compliant
- changes to make ktap_helpers and power_supply test POSIX compliant
- adds ksft_exit_fail_perror() to include errono in string form
- fixes to avoid clang reporting false positive static analysis errors
  about functions that exit and never return. ksft_exit* functions
  are marked __noreturn to address this problem
- adds mechanism for reporting a KSFT_ result code
- fixes to build warnings related missing headers and unused variables
- fixes to clang build failures
- cleanups to resctrl test
- adds host arch for LLVM builds


Amer Al Shanawany (2):
  selftests: filesystems: add missing stddef header
  selftests/capabilities: fix warn_unused_result build warnings

Edward Liaw (2):
  selftests: Compile kselftest headers with -D_GNU_SOURCE
  selftests/sgx: Include KHDR_INCLUDES in Makefile

John Hubbard (3):
  selftests/binderfs: use the Makefile's rules, not Make's implicit rules
  selftests/resctrl: fix clang build failure: use LOCAL_HDRS
  selftests/resctrl: fix clang build warnings related to abs(), labs() calls

Lu Dai (1):
  selftests: kselftest_deps: fix l5_test() empty variable

Maciej Wieczor-Retman (3):
  selftests/resctrl: Add cleanup function to test framework
  selftests/resctrl: Simplify cleanup in ctrl-c handler
  selftests/resctrl: Move cleanups out of individual tests

Mark Brown (8):
  kselftest: Add mechanism for reporting a KSFT_ result code
  kselftest/tty: Report a consistent test name for the one test we run
  kselftest/clone3: Make test names for set_tid test stable
  tracing/selftests: Support log output when generating KTAP output
  tracing/selftests: Default to verbose mode when running in kselftest
  selftests/clone3: Fix compiler warning
  selftests/clone3: Check that the child exited cleanly
  selftests/clone3: Correct log message for waitpid() failures


Masami Hiramatsu (Google) (2):
  selftests/ftrace: Fix BTFARG testcase to check fprobe is enabled correctly
  selftests/ftrace: Fix checkbashisms errors

Muhammad Usama Anjum (9):
  selftests: x86: test_vsyscall: reorder code to reduce #ifdef blocks
  selftests: x86: test_vsyscall: conform test to TAP format output
  selftests: x86: test_mremap_vdso: conform test to TAP format output
  selftests/dmabuf-heap: conform test to TAP format output
  kselftest: Add missing signature to the comments
  selftests: add ksft_exit_fail_perror()
  selftests: exec: Use new ksft_exit_fail_perror() helper
  selftests: Mark ksft_exit_fail_perror() as __noreturn
  selftests: cpufreq: conform test to TAP

Nathan Chancellor (10):
  selftests/clone3: ksft_exit functions do not return
  selftests/ipc: ksft_exit functions do not return
  selftests: membarrier: ksft_exit_pass() does not return
  selftests/mm: ksft_exit functions do not return
  selftests: pidfd: ksft_exit functions do not return
  selftests/resctrl: ksft_exit_skip() does not return
 

Re: [PATCH v3 12/29] riscv mmu: teach pte_mkwrite to manufacture shadow stack PTEs

2024-05-13 Thread Deepak Gupta

On Sun, May 12, 2024 at 06:28:59PM +0200, Alexandre Ghiti wrote:


On 04/04/2024 01:35, Deepak Gupta wrote:

pte_mkwrite creates PTEs with WRITE encodings for underlying arch.
Underlying arch can have two types of writeable mappings. One that can be
written using regular store instructions. Another one that can only be
written using specialized store instructions (like shadow stack stores).
pte_mkwrite can select write PTE encoding based on VMA range (i.e.
VM_SHADOW_STACK)

Signed-off-by: Deepak Gupta 
---
 arch/riscv/include/asm/pgtable.h |  7 +++
 arch/riscv/mm/pgtable.c  | 21 +
 2 files changed, 28 insertions(+)

diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h
index 6362407f1e83..9b837239d3e8 100644
--- a/arch/riscv/include/asm/pgtable.h
+++ b/arch/riscv/include/asm/pgtable.h
@@ -403,6 +403,10 @@ static inline pte_t pte_wrprotect(pte_t pte)
 /* static inline pte_t pte_mkread(pte_t pte) */
+struct vm_area_struct;
+pte_t pte_mkwrite(pte_t pte, struct vm_area_struct *vma);
+#define pte_mkwrite pte_mkwrite
+
 static inline pte_t pte_mkwrite_novma(pte_t pte)
 {
return __pte(pte_val(pte) | _PAGE_WRITE);
@@ -694,6 +698,9 @@ static inline pmd_t pmd_mkyoung(pmd_t pmd)
return pte_pmd(pte_mkyoung(pmd_pte(pmd)));
 }
+pmd_t pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma);
+#define pmd_mkwrite pmd_mkwrite
+
 static inline pmd_t pmd_mkwrite_novma(pmd_t pmd)
 {
return pte_pmd(pte_mkwrite_novma(pmd_pte(pmd)));
diff --git a/arch/riscv/mm/pgtable.c b/arch/riscv/mm/pgtable.c
index ef887efcb679..c84ae2e0424d 100644
--- a/arch/riscv/mm/pgtable.c
+++ b/arch/riscv/mm/pgtable.c
@@ -142,3 +142,24 @@ pmd_t pmdp_collapse_flush(struct vm_area_struct *vma,
return pmd;
 }
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
+
+pte_t pte_mkwrite(pte_t pte, struct vm_area_struct *vma)
+{
+   if (vma_is_shadow_stack(vma->vm_flags))
+   return pte_mkwrite_shstk(pte);
+
+   pte = pte_mkwrite_novma(pte);



I would directly return pte_mkwrite_novma(pte) instead of assigning pte.


noted.





+
+   return pte;
+}
+
+pmd_t pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma)
+{
+   if (vma_is_shadow_stack(vma->vm_flags))
+   return pmd_mkwrite_shstk(pmd);
+
+   pmd = pmd_mkwrite_novma(pmd);



Ditto here.


noted here too.





+
+   return pmd;
+}
+



Otherwise:

Reviewed-by: Alexandre Ghiti 

Thanks,

Alex





Re: [PATCH v3 13/29] riscv mmu: write protect and shadow stack

2024-05-13 Thread Deepak Gupta

On Sun, May 12, 2024 at 06:31:24PM +0200, Alexandre Ghiti wrote:

On 04/04/2024 01:35, Deepak Gupta wrote:

`fork` implements copy on write (COW) by making pages readonly in child
and parent both.

ptep_set_wrprotect and pte_wrprotect clears _PAGE_WRITE in PTE.
Assumption is that page is readable and on fault copy on write happens.

To implement COW on such pages,



I guess you mean "shadow stack pages" here.


Yes I meant shadow stack pages. Will fix the message.





 clearing up W bit makes them XWR = 000.
This will result in wrong PTE setting which says no perms but V=1 and PFN
field pointing to final page. Instead desired behavior is to turn it into
a readable page, take an access (load/store) fault on sspush/sspop
(shadow stack) and then perform COW on such pages.
This way regular reads
would still be allowed and not lead to COW maintaining current behavior
of COW on non-shadow stack but writeable memory.

On the other hand it doesn't interfere with existing COW for read-write
memory. Assumption is always that _PAGE_READ must have been set and thus
setting _PAGE_READ is harmless.

Signed-off-by: Deepak Gupta 
---
 arch/riscv/include/asm/pgtable.h | 12 ++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h
index 9b837239d3e8..7a1c2a98d272 100644
--- a/arch/riscv/include/asm/pgtable.h
+++ b/arch/riscv/include/asm/pgtable.h
@@ -398,7 +398,7 @@ static inline int pte_special(pte_t pte)
 static inline pte_t pte_wrprotect(pte_t pte)
 {
-   return __pte(pte_val(pte) & ~(_PAGE_WRITE));
+   return __pte((pte_val(pte) & ~(_PAGE_WRITE)) | (_PAGE_READ));
 }
 /* static inline pte_t pte_mkread(pte_t pte) */
@@ -581,7 +581,15 @@ static inline pte_t ptep_get_and_clear(struct mm_struct 
*mm,
 static inline void ptep_set_wrprotect(struct mm_struct *mm,
  unsigned long address, pte_t *ptep)
 {
-   atomic_long_and(~(unsigned long)_PAGE_WRITE, (atomic_long_t *)ptep);
+   volatile pte_t read_pte = *ptep;
+   /*
+* ptep_set_wrprotect can be called for shadow stack ranges too.
+* shadow stack memory is XWR = 010 and thus clearing _PAGE_WRITE will 
lead to
+* encoding 000b which is wrong encoding with V = 1. This should lead 
to page fault
+* but we dont want this wrong configuration to be set in page tables.
+*/
+   atomic_long_set((atomic_long_t *)ptep,
+   ((pte_val(read_pte) & ~(unsigned long)_PAGE_WRITE) | 
_PAGE_READ));
 }
 #define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH



Doesn't making the shadow stack page readable allow "normal" loads to 
access the page? If it does, isn't that an issue (security-wise)?


When shadow stack permissions are there (i.e. R=0, W=1, X=0), then also shadow 
stack is
readable through "normal" loads. So nothing changes when it converts into a 
readonly page
from page permissions perspective.

Security-wise it's not a concern because from threat modeling perspective, if 
attacker had
read-write primitives (via some bug in program) available to read and write 
address space
of process/task; then they would have availiblity of return addresses on normal 
stack. It's
the write primitive that is concerning and to be protected against. And that's 
why shadow stack
is not writeable using "normal" stores.







Re: [PATCH v3 14/29] riscv/mm: Implement map_shadow_stack() syscall

2024-05-13 Thread Deepak Gupta

On Sun, May 12, 2024 at 06:50:18PM +0200, Alexandre Ghiti wrote:


On 04/04/2024 01:35, Deepak Gupta wrote:

As discussed extensively in the changelog for the addition of this
syscall on x86 ("x86/shstk: Introduce map_shadow_stack syscall") the
existing mmap() and madvise() syscalls do not map entirely well onto the
security requirements for shadow stack memory since they lead to windows
where memory is allocated but not yet protected or stacks which are not
properly and safely initialised. Instead a new syscall map_shadow_stack()
has been defined which allocates and initialises a shadow stack page.

This patch implements this syscall for riscv. riscv doesn't require token
to be setup by kernel because user mode can do that by itself. However to
provide compatibility and portability with other architectues, user mode
can specify token set flag.

Signed-off-by: Deepak Gupta 
---
 arch/riscv/kernel/Makefile  |   2 +
 arch/riscv/kernel/usercfi.c | 149 
 include/uapi/asm-generic/mman.h |   1 +
 3 files changed, 152 insertions(+)
 create mode 100644 arch/riscv/kernel/usercfi.c

diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile
index 604d6bf7e476..3bec82f4e94c 100644
--- a/arch/riscv/kernel/Makefile
+++ b/arch/riscv/kernel/Makefile
@@ -107,3 +107,5 @@ obj-$(CONFIG_COMPAT)+= compat_vdso/
 obj-$(CONFIG_64BIT)+= pi/
 obj-$(CONFIG_ACPI) += acpi.o
+
+obj-$(CONFIG_RISCV_USER_CFI) += usercfi.o
diff --git a/arch/riscv/kernel/usercfi.c b/arch/riscv/kernel/usercfi.c
new file mode 100644
index ..c4ed0d4e33d6
--- /dev/null
+++ b/arch/riscv/kernel/usercfi.c
@@ -0,0 +1,149 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2024 Rivos, Inc.
+ * Deepak Gupta 
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#define SHSTK_ENTRY_SIZE sizeof(void *)
+
+/*
+ * Writes on shadow stack can either be `sspush` or `ssamoswap`. `sspush` can 
happen
+ * implicitly on current shadow stack pointed to by CSR_SSP. `ssamoswap` takes 
pointer to
+ * shadow stack. To keep it simple, we plan to use `ssamoswap` to perform 
writes on shadow
+ * stack.
+ */
+static noinline unsigned long amo_user_shstk(unsigned long *addr, unsigned 
long val)
+{
+   /*
+* Since shadow stack is supported only in 64bit configuration,
+* ssamoswap.d is used below.



** CONFIG_RISCV_USER_CFI is dependent
+* on 64BIT and compile of this file is dependent on 
CONFIG_RISCV_USER_CFI
+* In case ssamoswap faults, return -1.



To me, this part of the comment is not needed.


Ok, will remove it.





+* Never expect -1 on shadow stack. Expect return addresses and zero



In that case, should we BUG() instead?


Caller (create_rstor_token) of `amo_user_shstk` is returning -EFAULT. It'll translate to 
signal (SIGSEGV) delivery to user app or terminate.






+*/
+   unsigned long swap = -1;
+
+   __enable_user_access();
+   asm goto(
+   ".option push\n"
+   ".option arch, +zicfiss\n"
+   "1: ssamoswap.d %[swap], %[val], %[addr]\n"
+   _ASM_EXTABLE(1b, %l[fault])
+   RISCV_ACQUIRE_BARRIER
+   ".option pop\n"
+   : [swap] "=r" (swap), [addr] "+A" (*addr)
+   : [val] "r" (val)
+   : "memory"
+   : fault
+   );
+   __disable_user_access();
+   return swap;
+fault:
+   __disable_user_access();
+   return -1;
+}
+
+/*
+ * Create a restore token on the shadow stack.  A token is always XLEN wide
+ * and aligned to XLEN.
+ */
+static int create_rstor_token(unsigned long ssp, unsigned long *token_addr)
+{
+   unsigned long addr;
+
+   /* Token must be aligned */
+   if (!IS_ALIGNED(ssp, SHSTK_ENTRY_SIZE))
+   return -EINVAL;
+
+   /* On RISC-V we're constructing token to be function of address itself 
*/
+   addr = ssp - SHSTK_ENTRY_SIZE;
+
+   if (amo_user_shstk((unsigned long __user *)addr, (unsigned long) ssp) 
== -1)
+   return -EFAULT;
+
+   if (token_addr)
+   *token_addr = addr;
+
+   return 0;
+}
+
+static unsigned long allocate_shadow_stack(unsigned long addr, unsigned long 
size,
+   unsigned long token_offset,
+   bool set_tok)
+{
+   int flags = MAP_ANONYMOUS | MAP_PRIVATE;
+   struct mm_struct *mm = current->mm;
+   unsigned long populate, tok_loc = 0;
+
+   if (addr)
+   flags |= MAP_FIXED_NOREPLACE;
+
+   mmap_write_lock(mm);
+   addr = do_mmap(NULL, addr, size, PROT_READ, flags,



Hmmm why do you map the shadow stack as 

Re: [GIT PULL] Kselftest fixes for v6.9

2024-05-13 Thread Shuah Khan

On 5/12/24 04:56, Mickaël Salaün wrote:

Hi Linus,

Without reply from Shuah, and given the importance of these fixes [1], here is
a PR to fix Kselftest (broken since v6.9-rc1) for at least KVM, pidfd, and
Landlock.  I cannot test against all kselftests though.  This has been in
linux-next since the beginning of this week, and so far only one issue has been
reported [2] and fixed [3].

Feel free to take this PR if you see fit.


Thank you - I totally missed the emails about sending these up for 6.9 :(

I see that these are already in Linux 6.9

thanks,
-- Shuah




Re: [PATCH v3 15/29] riscv/shstk: If needed allocate a new shadow stack on clone

2024-05-13 Thread Deepak Gupta

On Sun, May 12, 2024 at 07:05:27PM +0200, Alexandre Ghiti wrote:

On 04/04/2024 01:35, Deepak Gupta wrote:

Userspace specifies VM_CLONE to share address space and spawn new thread.



CLONE_VM?


Yes I meant CLONE_VM, will fix it.





`clone` allow userspace to specify a new stack for new thread. However
there is no way to specify new shadow stack base address without changing
API. This patch allocates a new shadow stack whenever VM_CLONE is given.

In case of VM_FORK, parent is suspended until child finishes and thus can



You mean CLONE_VFORK here right?


Yes I meant CLONE_VFORK, will fix it.





child use parent shadow stack. In case of !VM_CLONE, COW kicks in because
entire address space is copied from parent to child.

`clone3` is extensible and can provide mechanisms using which shadow stack
as an input parameter can be provided. This is not settled yet and being
extensively discussed on mailing list. Once that's settled, this commit
will adapt to that.

Signed-off-by: Deepak Gupta 
---
 arch/riscv/include/asm/usercfi.h |  39 ++
 arch/riscv/kernel/process.c  |  12 ++-
 arch/riscv/kernel/usercfi.c  | 121 +++
 3 files changed, 171 insertions(+), 1 deletion(-)

diff --git a/arch/riscv/include/asm/usercfi.h b/arch/riscv/include/asm/usercfi.h
index 4fa201b4fc4e..b47574a7a8c9 100644
--- a/arch/riscv/include/asm/usercfi.h
+++ b/arch/riscv/include/asm/usercfi.h
@@ -8,6 +8,9 @@
 #ifndef __ASSEMBLY__
 #include 
+struct task_struct;
+struct kernel_clone_args;
+
 #ifdef CONFIG_RISCV_USER_CFI
 struct cfi_status {
unsigned long ubcfi_en : 1; /* Enable for backward cfi. */
@@ -17,6 +20,42 @@ struct cfi_status {
unsigned long shdw_stk_size; /* size of shadow stack */
 };
+unsigned long shstk_alloc_thread_stack(struct task_struct *tsk,
+   const struct 
kernel_clone_args *args);
+void shstk_release(struct task_struct *tsk);
+void set_shstk_base(struct task_struct *task, unsigned long shstk_addr, 
unsigned long size);
+void set_active_shstk(struct task_struct *task, unsigned long shstk_addr);
+bool is_shstk_enabled(struct task_struct *task);
+
+#else
+
+static inline unsigned long shstk_alloc_thread_stack(struct task_struct *tsk,
+  const struct kernel_clone_args *args)
+{
+   return 0;
+}
+
+static inline void shstk_release(struct task_struct *tsk)
+{
+
+}
+
+static inline void set_shstk_base(struct task_struct *task, unsigned long 
shstk_addr,
+   unsigned long 
size)
+{
+
+}
+
+static inline void set_active_shstk(struct task_struct *task, unsigned long 
shstk_addr)
+{
+
+}
+
+static inline bool is_shstk_enabled(struct task_struct *task)
+{
+   return false;
+}
+
 #endif /* CONFIG_RISCV_USER_CFI */
 #endif /* __ASSEMBLY__ */
diff --git a/arch/riscv/kernel/process.c b/arch/riscv/kernel/process.c
index ce577cdc2af3..ef48a25b0eff 100644
--- a/arch/riscv/kernel/process.c
+++ b/arch/riscv/kernel/process.c
@@ -26,6 +26,7 @@
 #include 
 #include 
 #include 
+#include 
 register unsigned long gp_in_global __asm__("gp");
@@ -202,7 +203,8 @@ int arch_dup_task_struct(struct task_struct *dst, struct 
task_struct *src)
 void exit_thread(struct task_struct *tsk)
 {
-
+   if (IS_ENABLED(CONFIG_RISCV_USER_CFI))
+   shstk_release(tsk);
 }
 int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
@@ -210,6 +212,7 @@ int copy_thread(struct task_struct *p, const struct 
kernel_clone_args *args)
unsigned long clone_flags = args->flags;
unsigned long usp = args->stack;
unsigned long tls = args->tls;
+   unsigned long ssp = 0;
struct pt_regs *childregs = task_pt_regs(p);
memset(>thread.s, 0, sizeof(p->thread.s));
@@ -225,11 +228,18 @@ int copy_thread(struct task_struct *p, const struct 
kernel_clone_args *args)
p->thread.s[0] = (unsigned long)args->fn;
p->thread.s[1] = (unsigned long)args->fn_arg;
} else {
+   /* allocate new shadow stack if needed. In case of CLONE_VM we 
have to */
+   ssp = shstk_alloc_thread_stack(p, args);
+   if (IS_ERR_VALUE(ssp))
+   return PTR_ERR((void *)ssp);
+
*childregs = *(current_pt_regs());
/* Turn off status.VS */
riscv_v_vstate_off(childregs);
if (usp) /* User fork */
childregs->sp = usp;
+   if (ssp) /* if needed, set new ssp */
+   set_active_shstk(p, ssp);
if (clone_flags & CLONE_SETTLS)
childregs->tp = tls;
childregs->a0 = 0; /* Return value of fork() */
diff --git a/arch/riscv/kernel/usercfi.c b/arch/riscv/kernel/usercfi.c
index c4ed0d4e33d6..11ef7ab925c9 100644
--- a/arch/riscv/kernel/usercfi.c
+++ b/arch/riscv/kernel/usercfi.c
@@ 

Re: [PATCH v3 27/29] riscv: Documentation for landing pad / indirect branch tracking

2024-05-13 Thread Deepak Gupta

On Fri, May 10, 2024 at 01:30:32PM -0700, Charlie Jenkins wrote:

On Wed, Apr 03, 2024 at 04:35:15PM -0700, Deepak Gupta wrote:

Adding documentation on landing pad aka indirect branch tracking on riscv
and kernel interfaces exposed so that user tasks can enable it.

Signed-off-by: Deepak Gupta 
---
 Documentation/arch/riscv/zicfilp.rst | 104 +++
 1 file changed, 104 insertions(+)
 create mode 100644 Documentation/arch/riscv/zicfilp.rst

diff --git a/Documentation/arch/riscv/zicfilp.rst 
b/Documentation/arch/riscv/zicfilp.rst
new file mode 100644
index ..3007c81f0465
--- /dev/null
+++ b/Documentation/arch/riscv/zicfilp.rst
@@ -0,0 +1,104 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+:Author: Deepak Gupta 
+:Date:   12 January 2024
+
+
+Tracking indirect control transfers on RISC-V Linux
+
+
+This document briefly describes the interface provided to userspace by Linux
+to enable indirect branch tracking for user mode applications on RISV-V
+
+1. Feature Overview
+
+
+Memory corruption issues usually result in to crashes, however when in hands of
+an adversary and if used creatively can result into variety security issues.
+
+One of those security issues can be code re-use attacks on program where 
adversary
+can use corrupt function pointers and chain them together to perform jump 
oriented
+programming (JOP) or call oriented programming (COP) and thus compromising 
control
+flow integrity (CFI) of the program.
+
+Function pointers live in read-write memory and thus are susceptible to 
corruption
+and allows an adversary to reach any program counter (PC) in address space. On
+RISC-V zicfilp extension enforces a restriction on such indirect control 
transfers
+
+   - indirect control transfers must land on a landing pad instruction 
`lpad`.
+ There are two exception to this rule
+   - rs1 = x1 or rs1 = x5, i.e. a return from a function and 
returns are


What is a return that is not a return from a function?

Those would be a jump or call (depending on convention of whether return is 
saved in x1/x5)




+ protected using shadow stack (see zicfiss.rst)
+
+   - rs1 = x7. On RISC-V compiler usually does below to reach 
function
+ which is beyond the offset possible J-type instruction.
+
+   "auipc x7, "
+   "jalr (x7)"
+
+ Such form of indirect control transfer are still immutable 
and don't rely
+ on memory and thus rs1=x7 is exempted from tracking and 
considered software
+ guarded jumps.
+
+`lpad` instruction is pseudo of `auipc rd, ` and is a HINT nop. 
`lpad`


I think this should say "x0" or instead of "rd", or mention that rd=x0.


Yeah I missed that. will fix it.




+instruction must be aligned on 4 byte boundary and compares 20 bit immediate 
with x7.
+If `imm_20bit` == 0, CPU don't perform any comparision with x7. If `imm_20bit` 
!= 0,
+then `imm_20bit` must match x7 else CPU will raise `software check exception`
+(cause=18)with `*tval = 2`.
+
+Compiler can generate a hash over function signatures and setup them (truncated
+to 20bit) in x7 at callsites and function proglogs can have `lpad` with same


"prologues" instead of "proglogs"


Will fix it.




+function hash. This further reduces number of program counters a call site can
+reach.
+
+2. ELF and psABI
+-
+
+Toolchain sets up `GNU_PROPERTY_RISCV_FEATURE_1_FCFI` for property
+`GNU_PROPERTY_RISCV_FEATURE_1_AND` in notes section of the object file.
+
+3. Linux enabling
+--
+
+User space programs can have multiple shared objects loaded in its address 
space
+and it's a difficult task to make sure all the dependencies have been compiled
+with support of indirect branch. Thus it's left to dynamic loader to enable
+indirect branch tracking for the program.
+
+4. prctl() enabling
+
+
+`PR_SET_INDIR_BR_LP_STATUS` / `PR_GET_INDIR_BR_LP_STATUS` /
+`PR_LOCK_INDIR_BR_LP_STATUS` are three prctls added to manage indirect branch
+tracking. prctls are arch agnostic and returns -EINVAL on other arches.
+
+`PR_SET_INDIR_BR_LP_STATUS`: If arg1 `PR_INDIR_BR_LP_ENABLE` and if CPU 
supports
+`zicfilp` then kernel will enabled indirect branch tracking for the task.
+Dynamic loader can issue this `prctl` once it has determined that all the 
objects
+loaded in address space support indirect branch tracking. Additionally if 
there is
+a `dlopen` to an object which wasn't compiled with `zicfilp`, dynamic loader 
can
+issue this prctl with arg1 set to 0 (i.e. `PR_INDIR_BR_LP_ENABLE` being clear)
+
+`PR_GET_INDIR_BR_LP_STATUS`: Returns current status of indirect branch 
tracking.
+If enabled it'll return `PR_INDIR_BR_LP_ENABLE`
+
+`PR_LOCK_INDIR_BR_LP_STATUS`: Locks current status of indirect branch tracking 
on

Re: [PATCH v4 08/66] selftests/cgroup: Drop define _GNU_SOURCE

2024-05-13 Thread Tejun Heo
On Fri, May 10, 2024 at 12:06:25AM +, Edward Liaw wrote:
> _GNU_SOURCE is provided by lib.mk, so it should be dropped to prevent
> redefinition warnings.
> 
> Signed-off-by: Edward Liaw 

Applied to cgroup/for-6.10.

Thanks.

-- 
tejun



Re: [PATCH v6 13/17] riscv: vector: Support xtheadvector save/restore

2024-05-13 Thread Charlie Jenkins
On Mon, May 13, 2024 at 04:45:18PM +0800, Andy Chiu wrote:
> Hi Charlie,
> 
> Sorry, I am late on this. I haven't looked through the entire series
> yet, but here is something that I thought worth bringing up sooner.
> 
> On Sat, May 4, 2024 at 2:22 AM Charlie Jenkins  wrote:
> >
> > Use alternatives to add support for xtheadvector vector save/restore
> > routines.
> >
> > Signed-off-by: Charlie Jenkins 
> > ---
> >  arch/riscv/Kconfig.vendor  |  13 ++
> >  arch/riscv/include/asm/csr.h   |   6 +
> >  arch/riscv/include/asm/switch_to.h |   2 +-
> >  arch/riscv/include/asm/vector.h| 247 
> > ++---
> >  arch/riscv/kernel/cpufeature.c |   2 +-
> >  arch/riscv/kernel/kernel_mode_vector.c |   8 +-
> >  arch/riscv/kernel/process.c|   4 +-
> >  arch/riscv/kernel/signal.c |   6 +-
> >  arch/riscv/kernel/vector.c |  13 +-
> >  9 files changed, 233 insertions(+), 68 deletions(-)
> >
> > diff --git a/arch/riscv/Kconfig.vendor b/arch/riscv/Kconfig.vendor
> > index aa5a191e659e..edf49f3065ac 100644
> > --- a/arch/riscv/Kconfig.vendor
> > +++ b/arch/riscv/Kconfig.vendor
> > @@ -13,6 +13,19 @@ config RISCV_ISA_VENDOR_EXT_THEAD
> >   extensions. Without this option enabled, T-Head vendor extensions 
> > will
> >   not be detected at boot and their presence not reported to 
> > userspace.
> >
> > + If you don't know what to do here, say Y.
> > +
> > +config RISCV_ISA_XTHEADVECTOR
> > +   bool "xtheadvector extension support"
> > +   depends on RISCV_ISA_VENDOR_EXT_THEAD
> > +   depends on RISCV_ISA_V
> > +   depends on FPU
> > +   default y
> > +   help
> > + Say N here if you want to disable all xtheadvector related 
> > procedure
> > + in the kernel. This will disable vector for any T-Head board that
> > + contains xtheadvector rather than the standard vector.
> > +
> >   If you don't know what to do here, say Y.
> >  endmenu
> >
> > diff --git a/arch/riscv/include/asm/csr.h b/arch/riscv/include/asm/csr.h
> > index e5a35efd56e0..13657d096e7d 100644
> > --- a/arch/riscv/include/asm/csr.h
> > +++ b/arch/riscv/include/asm/csr.h
> > @@ -30,6 +30,12 @@
> >  #define SR_VS_CLEAN_AC(0x0400, UL)
> >  #define SR_VS_DIRTY_AC(0x0600, UL)
> >
> > +#define SR_VS_THEAD_AC(0x0180, UL) /* xtheadvector Status 
> > */
> > +#define SR_VS_OFF_THEAD_AC(0x, UL)
> > +#define SR_VS_INITIAL_THEAD_AC(0x0080, UL)
> > +#define SR_VS_CLEAN_THEAD  _AC(0x0100, UL)
> > +#define SR_VS_DIRTY_THEAD  _AC(0x0180, UL)
> > +
> >  #define SR_XS  _AC(0x00018000, UL) /* Extension Status */
> >  #define SR_XS_OFF  _AC(0x, UL)
> >  #define SR_XS_INITIAL  _AC(0x8000, UL)
> > diff --git a/arch/riscv/include/asm/switch_to.h 
> > b/arch/riscv/include/asm/switch_to.h
> > index 7efdb0584d47..ada6b5cf2d94 100644
> > --- a/arch/riscv/include/asm/switch_to.h
> > +++ b/arch/riscv/include/asm/switch_to.h
> > @@ -78,7 +78,7 @@ do {  \
> > struct task_struct *__next = (next);\
> > if (has_fpu())  \
> > __switch_to_fpu(__prev, __next);\
> > -   if (has_vector())   \
> > +   if (has_vector() || has_xtheadvector()) \
> > __switch_to_vector(__prev, __next); \
> > ((last) = __switch_to(__prev, __next)); \
> >  } while (0)
> > diff --git a/arch/riscv/include/asm/vector.h 
> > b/arch/riscv/include/asm/vector.h
> > index 731dcd0ed4de..db851dc81870 100644
> > --- a/arch/riscv/include/asm/vector.h
> > +++ b/arch/riscv/include/asm/vector.h
> > @@ -18,6 +18,27 @@
> >  #include 
> >  #include 
> >  #include 
> > +#include 
> > +#include 
> > +#include 
> > +
> > +#define __riscv_v_vstate_or(_val, TYPE) ({ \
> > +   typeof(_val) _res = _val;   \
> > +   if (has_xtheadvector()) \
> > +   _res = (_res & ~SR_VS_THEAD) | SR_VS_##TYPE##_THEAD;\
> > +   else\
> > +   _res = (_res & ~SR_VS) | SR_VS_##TYPE;  \
> > +   _res;   \
> > +})
> > +
> > +#define __riscv_v_vstate_check(_val, TYPE) ({  \
> > +   bool _res;  \
> > +   if (has_xtheadvector()) \
> > +   _res = ((_val) & SR_VS_THEAD) == SR_VS_##TYPE##_THEAD;  \
> > +   else\
> > +   _res = ((_val) & SR_VS) == SR_VS_##TYPE;\
> > +   _res;   \
> > +})
> >
> >  extern 

[GIT PULL] KUnit update for Linux 6.10-rc1

2024-05-13 Thread Shuah Khan

Hi Linus,

Please pull the following KUnit next update for Linux 6.10-rc1.

This kunit update for Linux 6.10-rc1 consists of:

- fix to race condition in try-catch completion
- change to __kunit_test_suites_init() to exit early if there is
  nothing to test
- change to string-stream-test to use KUNIT_DEFINE_ACTION_WRAPPER
- moving fault tests behind KUNIT_FAULT_TEST Kconfig option
- kthread test fixes and improvements
- iov_iter test fixes

diff is attached.

Tests passed on linux-next on my test system:
- allmodconfig build

Default arch um:
./tools/testing/kunit/kunit.py run
./tools/testing/kunit/kunit.py run --alltests

./tools/testing/kunit/kunit.py run --arch x86_64
./tools/testing/kunit/kunit.py run --alltests --arch x86_64

thanks,
-- Shuah


The following changes since commit dd5a440a31fae6e459c0d627162825505361:

  Linux 6.9-rc7 (2024-05-05 14:06:01 -0700)

are available in the Git repository at:

  git://git.kernel.org/pub/scm/linux/kernel/git/shuah/linux-kselftest 
tags/linux_kselftest-kunit-6.10-rc1

for you to fetch changes up to 5496b9b77d7420652202b73cf036e69760be5deb:

  kunit: bail out early in __kunit_test_suites_init() if there are no suites to 
test (2024-05-06 14:22:02 -0600)


linux_kselftest-kunit-6.10-rc1

This kunit update for Linux 6.10-rc1 consists of:

- fix to race condition in try-catch completion
- change to __kunit_test_suites_init() to exit early if there is
  nothing to test
- change to string-stream-test to use KUNIT_DEFINE_ACTION_WRAPPER
- moving fault tests behind KUNIT_FAULT_TEST Kconfig option
- kthread test fixes and improvements
- iov_iter test fixes


David Gow (2):
  kunit: Fix race condition in try-catch completion
  kunit: test: Move fault tests behind KUNIT_FAULT_TEST Kconfig option

Ivan Orlov (1):
  kunit: string-stream-test: use KUNIT_DEFINE_ACTION_WRAPPER

Mickaël Salaün (7):
  kunit: Handle thread creation error
  kunit: Fix kthread reference
  kunit: Fix timeout message
  kunit: Handle test faults
  kunit: Fix KUNIT_SUCCESS() calls in iov_iter tests
  kunit: Print last test location on fault
  kunit: Add tests for fault

Scott Mayhew (1):
  kunit: bail out early in __kunit_test_suites_init() if there are no 
suites to test

Wander Lairson Costa (1):
  kunit: unregister the device on error

 include/kunit/test.h   | 24 +++---
 include/kunit/try-catch.h  |  3 ---
 kernel/kthread.c   |  1 +
 lib/kunit/Kconfig  | 11 +++
 lib/kunit/device.c |  2 +-
 lib/kunit/kunit-test.c | 45 +-
 lib/kunit/string-stream-test.c | 12 ++-
 lib/kunit/test.c   |  3 +++
 lib/kunit/try-catch.c  | 40 ++---
 lib/kunit_iov_iter.c   | 18 -
 10 files changed, 121 insertions(+), 38 deletions(-)
diff --git a/include/kunit/test.h b/include/kunit/test.h
index 61637ef32302..e32b4cb7afa2 100644
--- a/include/kunit/test.h
+++ b/include/kunit/test.h
@@ -301,6 +301,8 @@ struct kunit {
 	struct list_head resources; /* Protected by lock. */
 
 	char status_comment[KUNIT_STATUS_COMMENT_SIZE];
+	/* Saves the last seen test. Useful to help with faults. */
+	struct kunit_loc last_seen;
 };
 
 static inline void kunit_set_failure(struct kunit *test)
@@ -567,6 +569,15 @@ void __printf(2, 3) kunit_log_append(struct string_stream *log, const char *fmt,
 #define kunit_err(test, fmt, ...) \
 	kunit_printk(KERN_ERR, test, fmt, ##__VA_ARGS__)
 
+/*
+ * Must be called at the beginning of each KUNIT_*_ASSERTION().
+ * Cf. KUNIT_CURRENT_LOC.
+ */
+#define _KUNIT_SAVE_LOC(test) do {	   \
+	WRITE_ONCE(test->last_seen.file, __FILE__);			   \
+	WRITE_ONCE(test->last_seen.line, __LINE__);			   \
+} while (0)
+
 /**
  * KUNIT_SUCCEED() - A no-op expectation. Only exists for code clarity.
  * @test: The test context object.
@@ -575,7 +586,7 @@ void __printf(2, 3) kunit_log_append(struct string_stream *log, const char *fmt,
  * words, it does nothing and only exists for code clarity. See
  * KUNIT_EXPECT_TRUE() for more information.
  */
-#define KUNIT_SUCCEED(test) do {} while (0)
+#define KUNIT_SUCCEED(test) _KUNIT_SAVE_LOC(test)
 
 void __noreturn __kunit_abort(struct kunit *test);
 
@@ -601,14 +612,16 @@ void __printf(6, 7) __kunit_do_failed_assertion(struct kunit *test,
 } while (0)
 
 
-#define KUNIT_FAIL_ASSERTION(test, assert_type, fmt, ...)		   \
+#define KUNIT_FAIL_ASSERTION(test, assert_type, fmt, ...) do {		   \
+	_KUNIT_SAVE_LOC(test);		   \
 	_KUNIT_FAILED(test,		   \
 		  assert_type,	   \
 		  kunit_fail_assert,   \
 		  

[GIT PULL] nolibc changes for Linux 6.10-rc1

2024-05-13 Thread Shuah Khan

Hi Linus,

Please pull the nolibc update for Linux 6.10-rc1.

This nolibc update for Linux 6.10-rc1

- adds support for uname(2)
- removes open-coded strnlen()
- exports strlen()
- adds tests for strlcat() and strlcpy()
- fixes memory error in realloc()
- fixes strlcat() return code and size usage
- fixes strlcpy() return code and size usage

diff is attached.

thanks,
-- Shuah


The following changes since commit 4cece764965020c22cff7665b18a012006359095:

  Linux 6.9-rc1 (2024-03-24 14:10:05 -0700)

are available in the Git repository at:

  git://git.kernel.org/pub/scm/linux/kernel/git/shuah/linux-kselftest 
tags/linux_kselftest-nolibc-6.10-rc1

for you to fetch changes up to 0adab2b6b7336fb6ee3c6456a432dad3b1d25647:

  tools/nolibc: add support for uname(2) (2024-04-14 20:28:54 +0200)


linux_kselftest-nolibc-6.10-rc1

This nolibc update for Linux 6.10-rc1

- adds support for uname(2)
- removes open-coded strnlen()
- exports strlen()
- adds tests for strlcat() and strlcpy()
- fixes memory error in realloc()
- fixes strlcat() return code and size usage
- fixes strlcpy() return code and size usage


Brennan Xavier McManus (1):
  tools/nolibc/stdlib: fix memory error in realloc()

Rodrigo Campos (4):
  tools/nolibc/string: export strlen()
  tools/nolibc: Fix strlcat() return code and size usage
  tools/nolibc: Fix strlcpy() return code and size usage
  selftests/nolibc: Add tests for strlcat() and strlcpy()

Thomas Weißschuh (2):
  tools/nolibc/string: remove open-coded strnlen()
  tools/nolibc: add support for uname(2)

 tools/include/nolibc/stdlib.h|  2 +-
 tools/include/nolibc/string.h| 46 +---
 tools/include/nolibc/sys.h   | 27 +
 tools/testing/selftests/nolibc/nolibc-test.c | 82 
 4 files changed, 136 insertions(+), 21 deletions(-)

diff --git a/tools/include/nolibc/stdlib.h b/tools/include/nolibc/stdlib.h
index bacfd35c5156..5be9d3c7435a 100644
--- a/tools/include/nolibc/stdlib.h
+++ b/tools/include/nolibc/stdlib.h
@@ -185,7 +185,7 @@ void *realloc(void *old_ptr, size_t new_size)
 	if (__builtin_expect(!ret, 0))
 		return NULL;
 
-	memcpy(ret, heap->user_p, heap->len);
+	memcpy(ret, heap->user_p, user_p_len);
 	munmap(heap, heap->len);
 	return ret;
 }
diff --git a/tools/include/nolibc/string.h b/tools/include/nolibc/string.h
index a01c69dd495f..f9ab28421e6d 100644
--- a/tools/include/nolibc/string.h
+++ b/tools/include/nolibc/string.h
@@ -123,7 +123,7 @@ char *strcpy(char *dst, const char *src)
  * thus itself, hence the asm() statement below that's meant to disable this
  * confusing practice.
  */
-static __attribute__((unused))
+__attribute__((weak,unused,section(".text.nolibc_strlen")))
 size_t strlen(const char *str)
 {
 	size_t len;
@@ -187,22 +187,26 @@ char *strndup(const char *str, size_t maxlen)
 static __attribute__((unused))
 size_t strlcat(char *dst, const char *src, size_t size)
 {
-	size_t len;
-	char c;
-
-	for (len = 0; dst[len];	len++)
-		;
-
-	for (;;) {
-		c = *src;
-		if (len < size)
-			dst[len] = c;
-		if (!c)
+	size_t len = strnlen(dst, size);
+
+	/*
+	 * We want len < size-1. But as size is unsigned and can wrap
+	 * around, we use len + 1 instead.
+	 */
+	while (len + 1 < size) {
+		dst[len] = *src;
+		if (*src == '\0')
 			break;
 		len++;
 		src++;
 	}
 
+	if (len < size)
+		dst[len] = '\0';
+
+	while (*src++)
+		len++;
+
 	return len;
 }
 
@@ -210,16 +214,18 @@ static __attribute__((unused))
 size_t strlcpy(char *dst, const char *src, size_t size)
 {
 	size_t len;
-	char c;
 
-	for (len = 0;;) {
-		c = src[len];
-		if (len < size)
-			dst[len] = c;
-		if (!c)
-			break;
-		len++;
+	for (len = 0; len < size; len++) {
+		dst[len] = src[len];
+		if (!dst[len])
+			return len;
 	}
+	if (size)
+		dst[size-1] = '\0';
+
+	while (src[len])
+		len++;
+
 	return len;
 }
 
diff --git a/tools/include/nolibc/sys.h b/tools/include/nolibc/sys.h
index dda9dffd1d74..7b82bc3cf107 100644
--- a/tools/include/nolibc/sys.h
+++ b/tools/include/nolibc/sys.h
@@ -22,6 +22,7 @@
 #include   /* for statx() */
 #include 
 #include 
+#include 
 
 #include "arch.h"
 #include "errno.h"
@@ -1139,6 +1140,32 @@ int umount2(const char *path, int flags)
 }
 
 
+/*
+ * int uname(struct utsname *buf);
+ */
+
+struct utsname {
+	char sysname[65];
+	char nodename[65];
+	char release[65];
+	char version[65];
+	char machine[65];
+	char domainname[65];
+};
+
+static __attribute__((unused))
+int sys_uname(struct utsname *buf)
+{
+	return my_syscall1(__NR_uname, buf);
+}
+
+static __attribute__((unused))
+int uname(struct utsname *buf)
+{
+	return __sysret(sys_uname(buf));
+}
+
+
 /*
  * int unlink(const char *path);
  */
diff --git 

Re: [PATCH net-next] selftests: net: local_termination: annotate the expected failures

2024-05-13 Thread Petr Machata


Jakub Kicinski  writes:

> @@ -157,7 +168,7 @@ run_test()
>  
>   check_rcv $rcv_if_name "Unicast IPv4 to unknown MAC address" \
>   "$smac > $UNKNOWN_UC_ADDR1, ethertype IPv4 (0x0800)" \
> - false
> + false true
>  
>   check_rcv $rcv_if_name "Unicast IPv4 to unknown MAC address, promisc" \
>   "$smac > $UNKNOWN_UC_ADDR2, ethertype IPv4 (0x0800)" \

For veth specifically there is xfail_on_veth:

xfail_on_veth $rcv_if_name \
check_rcv $rcv_if_name "Unicast IPv4 to unknown MAC address" \
  "$smac > $UNKNOWN_UC_ADDR1, ethertype IPv4 (0x0800)" \
  false

Which is IMHO clearer than passing an extra boolean.

Not sure what to do about the bridge bit though. In principle the
various xfail_on_'s can be chained, so e.g. this should work:

xfail_on_bridge $rcv_if_name \
xfail_on_veth $rcv_if_name \
check_rcv $rcv_if_name "Unicast IPv4 to unknown MAC address" \
  "$smac > $UNKNOWN_UC_ADDR1, ethertype IPv4 (0x0800)" \
  false

I find this preferable to adding these ad-hoc tweaks to each test
individually. Maybe it would make sense to have:

xfail_on_kind $rcv_if_name veth bridge \
check_rcv $rcv_if_name "Unicast IPv4 to unknown MAC address" \
  "$smac > $UNKNOWN_UC_ADDR1, ethertype IPv4 (0x0800)" \
  false

And then either replace the existing xfail_on_veth's (there are just a
handful) or convert xfail_on_veth to a wrapper around xfail_on_kind.



Re: [PATCH v1 1/1] selftests/sgx: Fix the implicit declaration of asprintf() compiler error

2024-05-13 Thread Jarkko Sakkinen
On Mon May 13, 2024 at 12:43 PM EEST, Mirsad Todorovac wrote:
> Thanks for your explanation.
>
> I did not realise that __USE_GNU is evil. :-/

It's not "evil" IMHO. It is not just part of defined API :-)

Thus the official man pages are your friend.

>
> FWIW, there is a sound explanation of the difference between
> _GNU_SOURCE and __USE_GNU
> here: https://stackoverflow.com/questions/7296963/gnu-source-and-use-gnu
>
> Thanks,
> Mirsad

BR, Jarkko



Re: [PATCH v1 1/1] selftests/sgx: Fix the implicit declaration of asprintf() compiler error

2024-05-13 Thread Mirsad Todorovac
Thanks for your explanation.

I did not realise that __USE_GNU is evil. :-/

FWIW, there is a sound explanation of the difference between
_GNU_SOURCE and __USE_GNU
here: https://stackoverflow.com/questions/7296963/gnu-source-and-use-gnu

Thanks,
Mirsad

On Mon, May 13, 2024 at 1:02 AM Jarkko Sakkinen  wrote:
>
> On Sat May 11, 2024 at 12:02 AM EEST, Mirsad Todorovac wrote:
> > On 5/10/24 22:52, John Hubbard wrote:
> > > On 5/10/24 1:37 PM, Mirsad Todorovac wrote:
> > > ...
> > >> The fix defines __USE_GNU before including  in case it isn't 
> > >> already
> > >> defined. After this intervention the module compiles OK.
> > >
> > > Instead of interventions, I believe the standard way to do this is to 
> > > simply
> > > define _GNU_SOURCE before including the header file(s). For example, the
> > > following also fixes the compilation failure on Ubuntu:
> > >
> > > diff --git a/tools/testing/selftests/sgx/main.c 
> > > b/tools/testing/selftests/sgx/main.c
> > > index 9820b3809c69..bb6e795d06e2 100644
> > > --- a/tools/testing/selftests/sgx/main.c
> > > +++ b/tools/testing/selftests/sgx/main.c
> > > @@ -1,6 +1,7 @@
> > >  // SPDX-License-Identifier: GPL-2.0
> > >  /*  Copyright(c) 2016-20 Intel Corporation. */
> > >
> > > +#define _GNU_SOURCE
> > >  #include 
> > >  #include 
> > >  #include 
> > >
> > >
> > > However, that's not required, because Edward Liaw is already on v4 of
> > > a patchset[1] that fixes up the _GNU_SOURCE problem for all selftests.
> > >
> > > [1] https://lore.kernel.org/all/2024051842.410729-2-edl...@google.com/
> > >
> > > thanks,
> >
> > Hi,
> >
> > Yes, I actually like Ed's solution more, because it solves the asprintf() 
> > prototype
> > problem with TEST_HARNESS_MAIN macro for all of the tests.
> >
> > Sorry for the noise and the time wasted reviewing. 8-|
> >
> > Best regards,
> > Mirsad Todorovac
>
> Yeah, well, it does not cause any harm and I was not sure when the patch
> set is in mainline so thus gave the pointers. Anyway, never ever touch
> __USE_GNU and always look at the man page from man7.org next time and
> should cause less friction...
>
> BR, Jarkko



Re: [PATCH v6 13/17] riscv: vector: Support xtheadvector save/restore

2024-05-13 Thread Andy Chiu
Hi Charlie,

Sorry, I am late on this. I haven't looked through the entire series
yet, but here is something that I thought worth bringing up sooner.

On Sat, May 4, 2024 at 2:22 AM Charlie Jenkins  wrote:
>
> Use alternatives to add support for xtheadvector vector save/restore
> routines.
>
> Signed-off-by: Charlie Jenkins 
> ---
>  arch/riscv/Kconfig.vendor  |  13 ++
>  arch/riscv/include/asm/csr.h   |   6 +
>  arch/riscv/include/asm/switch_to.h |   2 +-
>  arch/riscv/include/asm/vector.h| 247 
> ++---
>  arch/riscv/kernel/cpufeature.c |   2 +-
>  arch/riscv/kernel/kernel_mode_vector.c |   8 +-
>  arch/riscv/kernel/process.c|   4 +-
>  arch/riscv/kernel/signal.c |   6 +-
>  arch/riscv/kernel/vector.c |  13 +-
>  9 files changed, 233 insertions(+), 68 deletions(-)
>
> diff --git a/arch/riscv/Kconfig.vendor b/arch/riscv/Kconfig.vendor
> index aa5a191e659e..edf49f3065ac 100644
> --- a/arch/riscv/Kconfig.vendor
> +++ b/arch/riscv/Kconfig.vendor
> @@ -13,6 +13,19 @@ config RISCV_ISA_VENDOR_EXT_THEAD
>   extensions. Without this option enabled, T-Head vendor extensions 
> will
>   not be detected at boot and their presence not reported to 
> userspace.
>
> + If you don't know what to do here, say Y.
> +
> +config RISCV_ISA_XTHEADVECTOR
> +   bool "xtheadvector extension support"
> +   depends on RISCV_ISA_VENDOR_EXT_THEAD
> +   depends on RISCV_ISA_V
> +   depends on FPU
> +   default y
> +   help
> + Say N here if you want to disable all xtheadvector related procedure
> + in the kernel. This will disable vector for any T-Head board that
> + contains xtheadvector rather than the standard vector.
> +
>   If you don't know what to do here, say Y.
>  endmenu
>
> diff --git a/arch/riscv/include/asm/csr.h b/arch/riscv/include/asm/csr.h
> index e5a35efd56e0..13657d096e7d 100644
> --- a/arch/riscv/include/asm/csr.h
> +++ b/arch/riscv/include/asm/csr.h
> @@ -30,6 +30,12 @@
>  #define SR_VS_CLEAN_AC(0x0400, UL)
>  #define SR_VS_DIRTY_AC(0x0600, UL)
>
> +#define SR_VS_THEAD_AC(0x0180, UL) /* xtheadvector Status */
> +#define SR_VS_OFF_THEAD_AC(0x, UL)
> +#define SR_VS_INITIAL_THEAD_AC(0x0080, UL)
> +#define SR_VS_CLEAN_THEAD  _AC(0x0100, UL)
> +#define SR_VS_DIRTY_THEAD  _AC(0x0180, UL)
> +
>  #define SR_XS  _AC(0x00018000, UL) /* Extension Status */
>  #define SR_XS_OFF  _AC(0x, UL)
>  #define SR_XS_INITIAL  _AC(0x8000, UL)
> diff --git a/arch/riscv/include/asm/switch_to.h 
> b/arch/riscv/include/asm/switch_to.h
> index 7efdb0584d47..ada6b5cf2d94 100644
> --- a/arch/riscv/include/asm/switch_to.h
> +++ b/arch/riscv/include/asm/switch_to.h
> @@ -78,7 +78,7 @@ do {  \
> struct task_struct *__next = (next);\
> if (has_fpu())  \
> __switch_to_fpu(__prev, __next);\
> -   if (has_vector())   \
> +   if (has_vector() || has_xtheadvector()) \
> __switch_to_vector(__prev, __next); \
> ((last) = __switch_to(__prev, __next)); \
>  } while (0)
> diff --git a/arch/riscv/include/asm/vector.h b/arch/riscv/include/asm/vector.h
> index 731dcd0ed4de..db851dc81870 100644
> --- a/arch/riscv/include/asm/vector.h
> +++ b/arch/riscv/include/asm/vector.h
> @@ -18,6 +18,27 @@
>  #include 
>  #include 
>  #include 
> +#include 
> +#include 
> +#include 
> +
> +#define __riscv_v_vstate_or(_val, TYPE) ({ \
> +   typeof(_val) _res = _val;   \
> +   if (has_xtheadvector()) \
> +   _res = (_res & ~SR_VS_THEAD) | SR_VS_##TYPE##_THEAD;\
> +   else\
> +   _res = (_res & ~SR_VS) | SR_VS_##TYPE;  \
> +   _res;   \
> +})
> +
> +#define __riscv_v_vstate_check(_val, TYPE) ({  \
> +   bool _res;  \
> +   if (has_xtheadvector()) \
> +   _res = ((_val) & SR_VS_THEAD) == SR_VS_##TYPE##_THEAD;  \
> +   else\
> +   _res = ((_val) & SR_VS) == SR_VS_##TYPE;\
> +   _res;   \
> +})
>
>  extern unsigned long riscv_v_vsize;
>  int riscv_v_setup_vsize(void);
> @@ -40,39 +61,62 @@ static __always_inline bool has_vector(void)
> return riscv_has_extension_unlikely(RISCV_ISA_EXT_v);
>  }
>
> +static __always_inline bool has_xtheadvector_no_alternatives(void)
> +{
> +   if 

[PATCH] selftests/mm: compaction_test: Fix trivial test pass on Aarch64 when nr_hugepages = 0

2024-05-13 Thread Dev Jain
Currently, if at runtime we are not able to allocate a huge page, the
test will trivially pass on Aarch64 due to no exception being raised on
division by zero while computing compaction_index. Fix that by checking
for nr_hugepages == 0. Anyways, in general, avoid a division by zero by
exiting the program beforehand. While at it, fix a typo.

Signed-off-by: Dev Jain 
---
 tools/testing/selftests/mm/compaction_test.c | 6 +-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/tools/testing/selftests/mm/compaction_test.c 
b/tools/testing/selftests/mm/compaction_test.c
index 533999b6c284..df1b76f9c734 100644
--- a/tools/testing/selftests/mm/compaction_test.c
+++ b/tools/testing/selftests/mm/compaction_test.c
@@ -134,6 +134,10 @@ int check_compaction(unsigned long mem_free, unsigned int 
hugepage_size)
 
/* We should have been able to request at least 1/3 rd of the memory in
   huge pages */
+   if (!atoi(nr_hugepages)) {
+   ksft_print_msg("ERROR: No memory is available as huge pages\n");
+   goto close_fd;
+   }
compaction_index = mem_free/(atoi(nr_hugepages) * hugepage_size);
 
lseek(fd, 0, SEEK_SET);
@@ -149,7 +153,7 @@ int check_compaction(unsigned long mem_free, unsigned int 
hugepage_size)
   atoi(nr_hugepages));
 
if (compaction_index > 3) {
-   ksft_print_msg("ERROR: Less that 1/%d of memory is available\n"
+   ksft_print_msg("ERROR: Less than 1/%d of memory is available\n"
   "as huge pages\n", compaction_index);
goto close_fd;
}
-- 
2.39.2




Re: [PATCH AUTOSEL 6.1 08/25] KVM: selftests: Add test for uaccesses to non-existent vgic-v2 CPUIF

2024-05-13 Thread Marc Zyngier
On Mon, 13 May 2024 09:20:38 +0100,
Pavel Machek  wrote:
> 
> Hi!
> 
> > Assert that accesses to a non-existent vgic-v2 CPU interface
> > consistently fail across the various KVM device attr ioctls. This also
> > serves as a regression test for a bug wherein KVM hits a NULL
> > dereference when the CPUID specified in the ioctl is invalid.
> > 
> > Note that there is no need to print the observed errno, as TEST_ASSERT()
> > will take care of it.
> 
> I don't think this fixes the bug... and thus we should not need it in
> stable.

Given that this goes together with an actually bug fix that was
backported, it *is*, for once, actually useful to have it in stable.

M.

-- 
Without deviation from the norm, progress is not possible.



Re: [PATCH AUTOSEL 6.1 08/25] KVM: selftests: Add test for uaccesses to non-existent vgic-v2 CPUIF

2024-05-13 Thread Pavel Machek
Hi!

> Assert that accesses to a non-existent vgic-v2 CPU interface
> consistently fail across the various KVM device attr ioctls. This also
> serves as a regression test for a bug wherein KVM hits a NULL
> dereference when the CPUID specified in the ioctl is invalid.
> 
> Note that there is no need to print the observed errno, as TEST_ASSERT()
> will take care of it.

I don't think this fixes the bug... and thus we should not need it in
stable.

Best regards,
Pavel
-- 
DENX Software Engineering GmbH,Managing Director: Erika Unter
HRB 165235 Munich, Office: Kirchenstr.5, D-82194 Groebenzell, Germany


signature.asc
Description: PGP signature


Re: [PATCH bpf-next v2 1/4] selftests/bpf: Add some null pointer checks

2024-05-13 Thread Markus Elfring
> There is a 'malloc' call, which can be unsuccessful.

  two calls?


> This patch will add the malloc failure checking
…

Please use imperative wordings for improved change descriptions also in your 
patches.
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/Documentation/process/submitting-patches.rst?h=v6.9#n94

Regards,
Markus



Re: [PATCH bpf-next v2 1/4] selftests/bpf: Add some null pointer checks

2024-05-13 Thread Kunwu Chan

On 2024/5/10 19:20, Muhammad Usama Anjum wrote:

On 5/10/24 2:58 PM, kunwu.c...@linux.dev wrote:

From: Kunwu Chan 

There is a 'malloc' call, which can be unsuccessful.
This patch will add the malloc failure checking
to avoid possible null dereference.

Signed-off-by: Kunwu Chan 
---
  tools/testing/selftests/bpf/test_progs.c | 7 +++
  1 file changed, 7 insertions(+)

diff --git a/tools/testing/selftests/bpf/test_progs.c 
b/tools/testing/selftests/bpf/test_progs.c
index 89ff704e9dad..ecc3ddeceeeb 100644
--- a/tools/testing/selftests/bpf/test_progs.c
+++ b/tools/testing/selftests/bpf/test_progs.c
@@ -582,6 +582,11 @@ int compare_stack_ips(int smap_fd, int amap_fd, int 
stack_trace_len)
  
  	val_buf1 = malloc(stack_trace_len);

val_buf2 = malloc(stack_trace_len);
+   if (!val_buf1 || !val_buf2) {
+   err = -ENOMEM;

Return from here instead of going to out where free(val_buf*) is being called.
I think it's no harm.  And Unify the processing at the end to achieve 
uniform format.

+   goto out;
+   }
+
cur_key_p = NULL;
next_key_p = 
while (bpf_map_get_next_key(smap_fd, cur_key_p, next_key_p) == 0) {
@@ -1197,6 +1202,8 @@ static int dispatch_thread_send_subtests(int sock_fd, 
struct test_state *state)
int subtest_num = state->subtest_num;
  
  	state->subtest_states = malloc(subtest_num * sizeof(*subtest_state));

+   if (!state->subtest_states)
+   return -ENOMEM;
  
  	for (int i = 0; i < subtest_num; i++) {

subtest_state = >subtest_states[i];


--
Thanks,
  Kunwu.Chan




[PATCH net v2] selftests: net: kill smcrouted in the cleanup logic in amt.sh

2024-05-13 Thread Taehee Yoo
The amt.sh requires smcrouted for multicasting routing.
So, it starts smcrouted before forwarding tests.
It must be stopped after all tests, but it isn't.

To fix this issue, it kills smcrouted in the cleanup logic.

Fixes: c08e8baea78e ("selftests: add amt interface selftest script")
Signed-off-by: Taehee Yoo 
---
The v1 patch is here:
https://lore.kernel.org/netdev/20240508040643.229383-1-ap420...@gmail.com/

v2
 - Headline change.
 - Kill smcrouted process only if amt.pid exists.
 - Do not remove the return value.
 - Remove timeout logic because it was already fixed by following commit
   4c639b6a7b9d ("selftests: net: move amt to socat for better compatibility")
 - Fix shebang.

 tools/testing/selftests/net/amt.sh | 10 --
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/tools/testing/selftests/net/amt.sh 
b/tools/testing/selftests/net/amt.sh
index 5175a42cbe8a..d458b45c775b 100755
--- a/tools/testing/selftests/net/amt.sh
+++ b/tools/testing/selftests/net/amt.sh
@@ -1,4 +1,4 @@
-#!/bin/sh
+#!/bin/bash
 # SPDX-License-Identifier: GPL-2.0
 
 # Author: Taehee Yoo 
@@ -77,6 +77,7 @@ readonly LISTENER=$(mktemp -u listener-)
 readonly GATEWAY=$(mktemp -u gateway-)
 readonly RELAY=$(mktemp -u relay-)
 readonly SOURCE=$(mktemp -u source-)
+readonly SMCROUTEDIR="$(mktemp -d)"
 ERR=4
 err=0
 
@@ -85,6 +86,11 @@ exit_cleanup()
for ns in "$@"; do
ip netns delete "${ns}" 2>/dev/null || true
done
+   if [ -f "$SMCROUTEDIR/amt.pid" ]; then
+   smcpid=$(< $SMCROUTEDIR/amt.pid)
+   kill $smcpid
+   fi
+   rm -rf $SMCROUTEDIR
 
exit $ERR
 }
@@ -167,7 +173,7 @@ setup_iptables()
 
 setup_mcast_routing()
 {
-   ip netns exec "${RELAY}" smcrouted
+   ip netns exec "${RELAY}" smcrouted -P $SMCROUTEDIR/amt.pid
ip netns exec "${RELAY}" smcroutectl a relay_src \
172.17.0.2 239.0.0.1 amtr
ip netns exec "${RELAY}" smcroutectl a relay_src \
-- 
2.34.1