On 5/29/26 11:51 AM, Vladimir Riabchun wrote:
We currently have  /proc/<pid>/net/rpc/kill-tasks feature for
aborting pending RPC tasks. To fast-stop container, we need
to abort all RPC tasks, so we must iterate over all VE PIDs.

There are several problems with this approach:
1. In real life with cgroups-v2 we just process container
    init PID, all other network namespaces are ignored.
2. There is a risk of breaking unrelated NFS connections if a PID,
    that was a container process, is reused while we are
    stopping container.
3. This may be slow - container might have a lot of processes
    and a few network namespaces.

To fix this all, create a per-ve interface for aborting
RPC requests in all VE network namespaces.

https://virtuozzo.atlassian.net/browse/VSTOR-126316

Feature: improve kill-tasks
Signed-off-by: Vladimir Riabchun<[email protected]>
---
  include/linux/sunrpc/clnt.h |  2 ++
  include/linux/ve.h          |  4 ++++
  kernel/ve/ve.c              | 44 +++++++++++++++++++++++++++++++++++++
  net/sunrpc/clnt.c           |  3 ++-
  net/sunrpc/sunrpc_syms.c    |  3 +++
  5 files changed, 55 insertions(+), 1 deletion(-)

diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h
index 0133aeba248a..0e7c7c9107a2 100644
--- a/include/linux/sunrpc/clnt.h
+++ b/include/linux/sunrpc/clnt.h
@@ -253,6 +253,8 @@ void                rpc_clnt_probe_trunked_xprts(struct 
rpc_clnt *,
const char *rpc_proc_name(const struct rpc_task *task); +
+void rpc_kill_tasks(struct net *net);
  int rpc_task_kill_proc_init(struct net *net);
  void rpc_task_kill_proc_fini(struct net *net);
diff --git a/include/linux/ve.h b/include/linux/ve.h
index 224acf012821..95a83c7bc7de 100644
--- a/include/linux/ve.h
+++ b/include/linux/ve.h
@@ -279,6 +279,8 @@ extern bool is_ve_init_net(const struct net *net);
void ve_setup_task(struct task_struct *p, struct ve_struct *ve); +void ve_set_rpc_kill_fn(void (*fn)(struct net *));
+
  #else /* CONFIG_VE */
  #include <linux/init_task.h>
  #define get_ve(ve)    ((void)(ve), NULL)
@@ -336,6 +338,8 @@ static inline int vz_security_protocol_check(struct net 
*net, int protocol) { re
static inline void ve_setup_task(struct task_struct *p, struct ve_struct *ve) { } +static inline void ve_set_rpc_kill_fn(void (*fn)(struct net *)) { }
+
  #endif        /* CONFIG_VE */
struct seq_file;
diff --git a/kernel/ve/ve.c b/kernel/ve/ve.c
index 198c82f010cc..856d7afb9d1c 100644
--- a/kernel/ve/ve.c
+++ b/kernel/ve/ve.c
@@ -113,6 +113,8 @@ EXPORT_SYMBOL(nr_ve);
static DEFINE_IDR(ve_idr); +static void (*rpc_kill_net_fn)(struct net *);
Please annotate the function pointer as *__rcu*
+
  struct ve_struct *get_ve(struct ve_struct *ve)
  {
        if (ve)
@@ -1713,6 +1715,43 @@ static ssize_t ve_write_ctty(struct kernfs_open_file 
*of, char *buf,
        return ret;
  }
+void ve_set_rpc_kill_fn(void (*fn)(struct net *))
+{
+       WRITE_ONCE(rpc_kill_net_fn, fn);
I would prefer the use of *rcu_assign_pointer/rcu_dereference* as already done in other functions of the same file.
+       /* ve_rpc_kill_write might be using old function.
+        * rpc_kill_tasks -> NULL is dangerous, block sunrpc exit while we use
+        * functions from this module
+        */
+       synchronize_rcu();
+}
+EXPORT_SYMBOL(ve_set_rpc_kill_fn);
+
+static int ve_rpc_kill_write(struct cgroup_subsys_state *css,
+                                struct cftype *cft, u64 val)
+{
+       struct net *net;
+       struct ve_struct *ve = css_to_ve(css);
+       void (*fn)(struct net *net);
+
+       guard(rwsem_read)(&net_rwsem);      /* for_each_net protection */
+       guard(rcu)();                   /* Begin rpc_kill_net_fn usage section 
*/
+
+       fn = READ_ONCE(rpc_kill_net_fn);
+       if (!fn) {
+               pr_info_ratelimited("SUNRPC module is not loaded.\n");
+               return 0;
+       }
+
+       for_each_net(net) {
+               if (net->owner_ve != ve)
+                       continue;
+               /* rpc_kill_tasks is atomic. */
+               fn(net);
+       }
+
+       return 0;
+}
+
  static struct cftype ve_cftypes[] = {
{
@@ -1808,6 +1847,11 @@ static struct cftype ve_cftypes[] = {
                .flags                  = CFTYPE_ONLY_ON_ROOT,
                .write                  = ve_write_ctty,
        },
+       {
+               .name                   = "rpc_kill",
+               .flags                  = CFTYPE_NOT_ON_ROOT,
+               .write_u64              = ve_rpc_kill_write,
+       },
        { }
  };
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 10ef5d9c2696..d40d59145bec 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -3406,8 +3406,9 @@ rpc_clnt_swap_deactivate(struct rpc_clnt *clnt)
  EXPORT_SYMBOL_GPL(rpc_clnt_swap_deactivate);
  #endif /* CONFIG_SUNRPC_SWAP */
-static void rpc_kill_tasks(struct net *net)
+void rpc_kill_tasks(struct net *net)
  {
+       /* Note: function must be atomic, used under RCU read-lock. */
        struct rpc_clnt *clnt;
        struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
diff --git a/net/sunrpc/sunrpc_syms.c b/net/sunrpc/sunrpc_syms.c
index bab6cab29405..0cb280569135 100644
--- a/net/sunrpc/sunrpc_syms.c
+++ b/net/sunrpc/sunrpc_syms.c
@@ -13,6 +13,7 @@
  #include <linux/uio.h>
  #include <linux/unistd.h>
  #include <linux/init.h>
+#include <linux/ve.h>
#include <linux/sunrpc/sched.h>
  #include <linux/sunrpc/clnt.h>
@@ -114,6 +115,7 @@ init_sunrpc(void)
  #endif
        svc_init_xprt_sock();   /* svc sock transport */
        init_socket_xprt();     /* clnt sock transport */
+       ve_set_rpc_kill_fn(rpc_kill_tasks);
        return 0;
out5:
@@ -131,6 +133,7 @@ init_sunrpc(void)
  static void __exit
  cleanup_sunrpc(void)
  {
+       ve_set_rpc_kill_fn(NULL);
        rpc_sysfs_exit();
        rpc_cleanup_clids();
        xprt_cleanup_ids();

--
Best regards, Vasileios Almpanis
Software Developer, Virtuozzo.
_______________________________________________
Devel mailing list
[email protected]
https://lists.openvz.org/mailman/listinfo/devel

Reply via email to