On Tue, Mar 05, 2024 at 12:16:37PM +0300, Vitaliy Makkoveev wrote:
> On Mon, Mar 04, 2024 at 01:00:32PM +0100, Claudio Jeker wrote:
> > On Wed, Feb 28, 2024 at 06:03:14PM +0300, Vitaliy Makkoveev wrote:
> > > On Wed, Feb 28, 2024 at 02:45:44PM +0100, Martin Pieuchot wrote:
> > > > > > > 
> > > > > > > Not only wg(4). Depends on interface queue usage, ifq_start() 
> > > > > > > schedules
> > > > > > > (*if_qstart)() or calls it, so all the interfaces with use 
> > > > > > > rwlock(9) in
> > > > > > > (*if_qstart)() handler are in risk.
> > > > > > > 
> > > > > > > What about to always schedule (*if_qstart)()?
> > > > > > 
> > > > > > Why would you want to introduce additional latence?
> > > > > > 
> > > > > 
> > > > > I suppose it the less evil than strictly deny rwlocks in 
> > > > > (*if_qstart)().
> > > > > Anyway it will be scheduled unless `seq_len' exceeds the watermark. 
> > > > 
> > > > Please no.  This is not going to happen.  wg(4) has to be fixed.  Let's
> > > > not change the design of the kernel every time a bug is found.
> > > > 
> > > 
> > > I'm not the fan of ifq_start() behaviour.
> > > 
> > > wg(4) needs to convert `t_lock', `r_keypair_lock' and `c_lock' rwlocks
> > > to mutexes. I used mtx_init_flags() to keep existing names.
> > 
> > You do not mention if you verified that any of those codepath may sleep or
> > not.
> 
> I use wg(4), so, obviously, I verified this.
> 
> wg_qstart() calls the following routines: noise_remote_ready(),
> wg_tag_get(), wg_timers_event_want_initiation() and wg_queue_out().
> 
> noise_remote_ready() has the sleep point produced by `r_keypair_lock'
> rwlock. We also acquire `c_lock' rwlock with `r_keypair_lock' rwlock
> held. I changed them both to `r_keypair_mtx' and `c_mtx' mutexes.
> 
> `c_lock' serialized paths noise_counter_send() and noise_counter_recv()
> have no sleep points.
> 
> `r_keypair_lock' serialized paths noise_remote_begin_session(),
> noise_remote_clear(), noise_remote_expire_current(),
> noise_remote_ready(), noise_remote_encrypt() and  noise_remote_decrypt()
> have no sleep points. The suspicious noise_remote_keypair_allocate()
> only performs operations around list.
> 
> wg_tag_get() has no sleep points.
> 
> wg_timers_event_want_initiation() has the sleep point produced by
> `t_lock' rwlock. We also acquire `t_handshake_mtx' mutex with
> `t_lock' held. I changed `t_lock' to `t_mtx' mutex.
> 
> wg_queue_out() has no sleep points. 
> 
> `c_lock' protected paths has no sleep points.
> 
> The `t_mtx' is the simplest for review. It protects `t_disabled' and
> `t_need_another_keepalive' and the serialized paths are timeout or task
> scheduling.
> 
> > ...I don't have the time to go over all those codepaths and check.
> >  
> 
> I thought the reported is interesting to fix this and provides some
> feedback.

Thanks for the details. I suggest to commit this so we can get some tests
quickly and hopefully it helps to fix some of the wg(4) crashes people
hit. According to your other mail there are some additional races that
require fixing.

Diff looks OK to me -- but I don't use wg(4).
 
> > > Index: sys/net/if_wg.c
> > > ===================================================================
> > > RCS file: /cvs/src/sys/net/if_wg.c,v
> > > retrieving revision 1.36
> > > diff -u -p -r1.36 if_wg.c
> > > --- sys/net/if_wg.c       18 Jan 2024 08:46:41 -0000      1.36
> > > +++ sys/net/if_wg.c       28 Feb 2024 14:49:16 -0000
> > > @@ -150,8 +150,8 @@ struct wg_index {
> > >  };
> > >  
> > >  struct wg_timers {
> > > - /* t_lock is for blocking wg_timers_event_* when setting t_disabled. */
> > > - struct rwlock            t_lock;
> > > + /* t_mtx is for blocking wg_timers_event_* when setting t_disabled. */
> > > + struct mutex             t_mtx;
> > >  
> > >   int                      t_disabled;
> > >   int                      t_need_another_keepalive;
> > > @@ -930,7 +930,7 @@ void
> > >  wg_timers_init(struct wg_timers *t)
> > >  {
> > >   bzero(t, sizeof(*t));
> > > - rw_init(&t->t_lock, "wg_timers");
> > > + mtx_init_flags(&t->t_mtx, IPL_NET, "wg_timers", 0);
> > >   mtx_init(&t->t_handshake_mtx, IPL_NET);
> > >  
> > >   timeout_set(&t->t_new_handshake, wg_timers_run_new_handshake, t);
> > > @@ -945,19 +945,19 @@ wg_timers_init(struct wg_timers *t)
> > >  void
> > >  wg_timers_enable(struct wg_timers *t)
> > >  {
> > > - rw_enter_write(&t->t_lock);
> > > + mtx_enter(&t->t_mtx);
> > >   t->t_disabled = 0;
> > > - rw_exit_write(&t->t_lock);
> > > + mtx_leave(&t->t_mtx);
> > >   wg_timers_run_persistent_keepalive(t);
> > >  }
> > >  
> > >  void
> > >  wg_timers_disable(struct wg_timers *t)
> > >  {
> > > - rw_enter_write(&t->t_lock);
> > > + mtx_enter(&t->t_mtx);
> > >   t->t_disabled = 1;
> > >   t->t_need_another_keepalive = 0;
> > > - rw_exit_write(&t->t_lock);
> > > + mtx_leave(&t->t_mtx);
> > >  
> > >   timeout_del_barrier(&t->t_new_handshake);
> > >   timeout_del_barrier(&t->t_send_keepalive);
> > > @@ -969,12 +969,12 @@ wg_timers_disable(struct wg_timers *t)
> > >  void
> > >  wg_timers_set_persistent_keepalive(struct wg_timers *t, uint16_t 
> > > interval)
> > >  {
> > > - rw_enter_read(&t->t_lock);
> > > + mtx_enter(&t->t_mtx);
> > >   if (!t->t_disabled) {
> > >           t->t_persistent_keepalive_interval = interval;
> > >           wg_timers_run_persistent_keepalive(t);
> > >   }
> > > - rw_exit_read(&t->t_lock);
> > > + mtx_leave(&t->t_mtx);
> > >  }
> > >  
> > >  int
> > > @@ -1020,16 +1020,16 @@ wg_timers_event_data_sent(struct wg_time
> > >   int     msecs = NEW_HANDSHAKE_TIMEOUT * 1000;
> > >   msecs += arc4random_uniform(REKEY_TIMEOUT_JITTER);
> > >  
> > > - rw_enter_read(&t->t_lock);
> > > + mtx_enter(&t->t_mtx);
> > >   if (!t->t_disabled && !timeout_pending(&t->t_new_handshake))
> > >           timeout_add_msec(&t->t_new_handshake, msecs);
> > > - rw_exit_read(&t->t_lock);
> > > + mtx_leave(&t->t_mtx);
> > >  }
> > >  
> > >  void
> > >  wg_timers_event_data_received(struct wg_timers *t)
> > >  {
> > > - rw_enter_read(&t->t_lock);
> > > + mtx_enter(&t->t_mtx);
> > >   if (!t->t_disabled) {
> > >           if (!timeout_pending(&t->t_send_keepalive))
> > >                   timeout_add_sec(&t->t_send_keepalive,
> > > @@ -1037,7 +1037,7 @@ wg_timers_event_data_received(struct wg_
> > >           else
> > >                   t->t_need_another_keepalive = 1;
> > >   }
> > > - rw_exit_read(&t->t_lock);
> > > + mtx_leave(&t->t_mtx);
> > >  }
> > >  
> > >  void
> > > @@ -1055,11 +1055,11 @@ wg_timers_event_any_authenticated_packet
> > >  void
> > >  wg_timers_event_any_authenticated_packet_traversal(struct wg_timers *t)
> > >  {
> > > - rw_enter_read(&t->t_lock);
> > > + mtx_enter(&t->t_mtx);
> > >   if (!t->t_disabled && t->t_persistent_keepalive_interval > 0)
> > >           timeout_add_sec(&t->t_persistent_keepalive,
> > >               t->t_persistent_keepalive_interval);
> > > - rw_exit_read(&t->t_lock);
> > > + mtx_leave(&t->t_mtx);
> > >  }
> > >  
> > >  void
> > > @@ -1068,10 +1068,10 @@ wg_timers_event_handshake_initiated(stru
> > >   int     msecs = REKEY_TIMEOUT * 1000;
> > >   msecs += arc4random_uniform(REKEY_TIMEOUT_JITTER);
> > >  
> > > - rw_enter_read(&t->t_lock);
> > > + mtx_enter(&t->t_mtx);
> > >   if (!t->t_disabled)
> > >           timeout_add_msec(&t->t_retry_handshake, msecs);
> > > - rw_exit_read(&t->t_lock);
> > > + mtx_leave(&t->t_mtx);
> > >  }
> > >  
> > >  void
> > > @@ -1085,7 +1085,7 @@ wg_timers_event_handshake_responded(stru
> > >  void
> > >  wg_timers_event_handshake_complete(struct wg_timers *t)
> > >  {
> > > - rw_enter_read(&t->t_lock);
> > > + mtx_enter(&t->t_mtx);
> > >   if (!t->t_disabled) {
> > >           mtx_enter(&t->t_handshake_mtx);
> > >           timeout_del(&t->t_retry_handshake);
> > > @@ -1094,25 +1094,25 @@ wg_timers_event_handshake_complete(struc
> > >           mtx_leave(&t->t_handshake_mtx);
> > >           wg_timers_run_send_keepalive(t);
> > >   }
> > > - rw_exit_read(&t->t_lock);
> > > + mtx_leave(&t->t_mtx);
> > >  }
> > >  
> > >  void
> > >  wg_timers_event_session_derived(struct wg_timers *t)
> > >  {
> > > - rw_enter_read(&t->t_lock);
> > > + mtx_enter(&t->t_mtx);
> > >   if (!t->t_disabled)
> > >           timeout_add_sec(&t->t_zero_key_material, REJECT_AFTER_TIME * 3);
> > > - rw_exit_read(&t->t_lock);
> > > + mtx_leave(&t->t_mtx);
> > >  }
> > >  
> > >  void
> > >  wg_timers_event_want_initiation(struct wg_timers *t)
> > >  {
> > > - rw_enter_read(&t->t_lock);
> > > + mtx_enter(&t->t_mtx);
> > >   if (!t->t_disabled)
> > >           wg_timers_run_send_initiation(t, 0);
> > > - rw_exit_read(&t->t_lock);
> > > + mtx_leave(&t->t_mtx);
> > >  }
> > >  
> > >  void
> > > Index: sys/net/wg_noise.c
> > > ===================================================================
> > > RCS file: /cvs/src/sys/net/wg_noise.c,v
> > > retrieving revision 1.6
> > > diff -u -p -r1.6 wg_noise.c
> > > --- sys/net/wg_noise.c    3 Feb 2023 18:31:17 -0000       1.6
> > > +++ sys/net/wg_noise.c    28 Feb 2024 14:49:16 -0000
> > > @@ -20,6 +20,7 @@
> > >  #include <sys/systm.h>
> > >  #include <sys/param.h>
> > >  #include <sys/atomic.h>
> > > +#include <sys/mutex.h>
> > >  #include <sys/rwlock.h>
> > >  
> > >  #include <crypto/blake2s.h>
> > > @@ -139,7 +140,7 @@ noise_remote_init(struct noise_remote *r
> > >   bzero(r, sizeof(*r));
> > >   memcpy(r->r_public, public, NOISE_PUBLIC_KEY_LEN);
> > >   rw_init(&r->r_handshake_lock, "noise_handshake");
> > > - rw_init(&r->r_keypair_lock, "noise_keypair");
> > > + mtx_init_flags(&r->r_keypair_mtx, IPL_NET, "noise_keypair", 0);
> > >  
> > >   SLIST_INSERT_HEAD(&r->r_unused_keypairs, &r->r_keypair[0], kp_entry);
> > >   SLIST_INSERT_HEAD(&r->r_unused_keypairs, &r->r_keypair[1], kp_entry);
> > > @@ -468,10 +469,10 @@ noise_remote_begin_session(struct noise_
> > >   kp.kp_remote_index = hs->hs_remote_index;
> > >   getnanouptime(&kp.kp_birthdate);
> > >   bzero(&kp.kp_ctr, sizeof(kp.kp_ctr));
> > > - rw_init(&kp.kp_ctr.c_lock, "noise_counter");
> > > + mtx_init_flags(&kp.kp_ctr.c_mtx, IPL_NET, "noise_counter", 0);
> > >  
> > >   /* Now we need to add_new_keypair */
> > > - rw_enter_write(&r->r_keypair_lock);
> > > + mtx_enter(&r->r_keypair_mtx);
> > >   next = r->r_next;
> > >   current = r->r_current;
> > >   previous = r->r_previous;
> > > @@ -497,7 +498,7 @@ noise_remote_begin_session(struct noise_
> > >           r->r_next = noise_remote_keypair_allocate(r);
> > >           *r->r_next = kp;
> > >   }
> > > - rw_exit_write(&r->r_keypair_lock);
> > > + mtx_leave(&r->r_keypair_mtx);
> > >  
> > >   explicit_bzero(&r->r_handshake, sizeof(r->r_handshake));
> > >   rw_exit_write(&r->r_handshake_lock);
> > > @@ -514,25 +515,25 @@ noise_remote_clear(struct noise_remote *
> > >   explicit_bzero(&r->r_handshake, sizeof(r->r_handshake));
> > >   rw_exit_write(&r->r_handshake_lock);
> > >  
> > > - rw_enter_write(&r->r_keypair_lock);
> > > + mtx_enter(&r->r_keypair_mtx);
> > >   noise_remote_keypair_free(r, r->r_next);
> > >   noise_remote_keypair_free(r, r->r_current);
> > >   noise_remote_keypair_free(r, r->r_previous);
> > >   r->r_next = NULL;
> > >   r->r_current = NULL;
> > >   r->r_previous = NULL;
> > > - rw_exit_write(&r->r_keypair_lock);
> > > + mtx_leave(&r->r_keypair_mtx);
> > >  }
> > >  
> > >  void
> > >  noise_remote_expire_current(struct noise_remote *r)
> > >  {
> > > - rw_enter_write(&r->r_keypair_lock);
> > > + mtx_enter(&r->r_keypair_mtx);
> > >   if (r->r_next != NULL)
> > >           r->r_next->kp_valid = 0;
> > >   if (r->r_current != NULL)
> > >           r->r_current->kp_valid = 0;
> > > - rw_exit_write(&r->r_keypair_lock);
> > > + mtx_leave(&r->r_keypair_mtx);
> > >  }
> > >  
> > >  int
> > > @@ -541,7 +542,7 @@ noise_remote_ready(struct noise_remote *
> > >   struct noise_keypair *kp;
> > >   int ret;
> > >  
> > > - rw_enter_read(&r->r_keypair_lock);
> > > + mtx_enter(&r->r_keypair_mtx);
> > >   /* kp_ctr isn't locked here, we're happy to accept a racy read. */
> > >   if ((kp = r->r_current) == NULL ||
> > >       !kp->kp_valid ||
> > > @@ -551,7 +552,7 @@ noise_remote_ready(struct noise_remote *
> > >           ret = EINVAL;
> > >   else
> > >           ret = 0;
> > > - rw_exit_read(&r->r_keypair_lock);
> > > + mtx_leave(&r->r_keypair_mtx);
> > >   return ret;
> > >  }
> > >  
> > > @@ -562,7 +563,7 @@ noise_remote_encrypt(struct noise_remote
> > >   struct noise_keypair *kp;
> > >   int ret = EINVAL;
> > >  
> > > - rw_enter_read(&r->r_keypair_lock);
> > > + mtx_enter(&r->r_keypair_mtx);
> > >   if ((kp = r->r_current) == NULL)
> > >           goto error;
> > >  
> > > @@ -601,7 +602,7 @@ noise_remote_encrypt(struct noise_remote
> > >  
> > >   ret = 0;
> > >  error:
> > > - rw_exit_read(&r->r_keypair_lock);
> > > + mtx_leave(&r->r_keypair_mtx);
> > >   return ret;
> > >  }
> > >  
> > > @@ -616,7 +617,7 @@ noise_remote_decrypt(struct noise_remote
> > >    * attempt the current keypair first as that is most likely. We also
> > >    * want to make sure that the keypair is valid as it would be
> > >    * catastrophic to decrypt against a zero'ed keypair. */
> > > - rw_enter_read(&r->r_keypair_lock);
> > > + mtx_enter(&r->r_keypair_mtx);
> > >  
> > >   if (r->r_current != NULL && r->r_current->kp_local_index == r_idx) {
> > >           kp = r->r_current;
> > > @@ -651,8 +652,6 @@ noise_remote_decrypt(struct noise_remote
> > >    * we skip the REKEY_AFTER_TIME_RECV check. This is safe to do as a
> > >    * data packet can't confirm a session that we are an INITIATOR of. */
> > >   if (kp == r->r_next) {
> > > -         rw_exit_read(&r->r_keypair_lock);
> > > -         rw_enter_write(&r->r_keypair_lock);
> > >           if (kp == r->r_next && kp->kp_local_index == r_idx) {
> > >                   noise_remote_keypair_free(r, r->r_previous);
> > >                   r->r_previous = r->r_current;
> > > @@ -662,7 +661,6 @@ noise_remote_decrypt(struct noise_remote
> > >                   ret = ECONNRESET;
> > >                   goto error;
> > >           }
> > > -         rw_enter(&r->r_keypair_lock, RW_DOWNGRADE);
> > >   }
> > >  
> > >   /* Similar to when we encrypt, we want to notify the caller when we
> > > @@ -680,7 +678,7 @@ noise_remote_decrypt(struct noise_remote
> > >   ret = 0;
> > >  
> > >  error:
> > > - rw_exit(&r->r_keypair_lock);
> > > + mtx_leave(&r->r_keypair_mtx);
> > >   return ret;
> > >  }
> > >  
> > > @@ -731,9 +729,9 @@ noise_counter_send(struct noise_counter 
> > >   return atomic_inc_long_nv((u_long *)&ctr->c_send) - 1;
> > >  #else
> > >   uint64_t ret;
> > > - rw_enter_write(&ctr->c_lock);
> > > + mtx_enter(&ctr->c_mtx);
> > >   ret = ctr->c_send++;
> > > - rw_exit_write(&ctr->c_lock);
> > > + mtx_leave(&ctr->c_mtx);
> > >   return ret;
> > >  #endif
> > >  }
> > > @@ -745,7 +743,7 @@ noise_counter_recv(struct noise_counter 
> > >   unsigned long bit;
> > >   int ret = EEXIST;
> > >  
> > > - rw_enter_write(&ctr->c_lock);
> > > + mtx_enter(&ctr->c_mtx);
> > >  
> > >   /* Check that the recv counter is valid */
> > >   if (ctr->c_recv >= REJECT_AFTER_MESSAGES ||
> > > @@ -779,7 +777,7 @@ noise_counter_recv(struct noise_counter 
> > >  
> > >   ret = 0;
> > >  error:
> > > - rw_exit_write(&ctr->c_lock);
> > > + mtx_leave(&ctr->c_mtx);
> > >   return ret;
> > >  }
> > >  
> > > @@ -976,7 +974,7 @@ noise_timer_expired(struct timespec *bir
> > >  #define T_LIM (COUNTER_WINDOW_SIZE + 1)
> > >  #define T_INIT do {                              \
> > >   bzero(&ctr, sizeof(ctr));               \
> > > - rw_init(&ctr.c_lock, "counter");        \
> > > + mtx_init_flags(&ctr.c_mtx, IPL_NET, "counter", 0);      \
> > >  } while (0)
> > >  #define T(num, v, e) do {                                                
> > > \
> > >   if (noise_counter_recv(&ctr, v) != e) {                         \
> > > Index: sys/net/wg_noise.h
> > > ===================================================================
> > > RCS file: /cvs/src/sys/net/wg_noise.h,v
> > > retrieving revision 1.2
> > > diff -u -p -r1.2 wg_noise.h
> > > --- sys/net/wg_noise.h    9 Dec 2020 05:53:33 -0000       1.2
> > > +++ sys/net/wg_noise.h    28 Feb 2024 14:49:16 -0000
> > > @@ -21,6 +21,7 @@
> > >  
> > >  #include <sys/types.h>
> > >  #include <sys/time.h>
> > > +#include <sys/mutex.h>
> > >  #include <sys/rwlock.h>
> > >  
> > >  #include <crypto/blake2s.h>
> > > @@ -71,7 +72,7 @@ struct noise_handshake {
> > >  };
> > >  
> > >  struct noise_counter {
> > > - struct rwlock            c_lock;
> > > + struct mutex             c_mtx;
> > >   uint64_t                 c_send;
> > >   uint64_t                 c_recv;
> > >   unsigned long            c_backtrack[COUNTER_NUM];
> > > @@ -100,7 +101,7 @@ struct noise_remote {
> > >   uint8_t                          r_timestamp[NOISE_TIMESTAMP_LEN];
> > >   struct timespec                  r_last_init; /* nanouptime */
> > >  
> > > - struct rwlock                    r_keypair_lock;
> > > + struct mutex                     r_keypair_mtx;
> > >   SLIST_HEAD(,noise_keypair)       r_unused_keypairs;
> > >   struct noise_keypair            *r_next, *r_current, *r_previous;
> > >   struct noise_keypair             r_keypair[3]; /* 3: next, current, 
> > > previous. */
> > > 
> > 
> > -- 
> > :wq Claudio
> > 
> 

-- 
:wq Claudio

Reply via email to