HiFive Unmatched clean poweroff using the power button

2022-08-17 Thread Jeremie Courreges-Anglas


Some time ago I wanted to get a clean poweroff from the power button on
my Unmatched, so that I don't get fsck at reboot the morning after
someone sleeps in the room where the machine lives.  kettenis kindly
provided sfgpio(4) to get interrupts working on dapmic(4) instead of my
initial hack that used polling.

One issue I struggled with for a bit is that masking irqs also masks the
wake-up events, particularly the events we use for dapmic_reset().
With the diff below most interrupt events are masked off during runtime,
until we're shutting down/rebooting.  Maybe this is too paranoid and
I should let more events go through the intr handler, eg for wake-up
events that I don't envision yet?  (I would love to get wake on lan
support in cad(4) but my attempts led nowhere so far.)

Also interesting, the fault register needs to be cleared at boot, else
the interrupt will keep triggering after eg a hard button-driven poweroff.
We could log the faults found in FAULT_LOG at boot time to know why the
machine has stopped.  But I'm more concerned about what to do if we get
a fault at runtime (see the XXX).  When I tried to disestablish the
interrupt handler with fdt_intr_disestablish(sc->sc_ih) from
dapmic_reset(), I got a fault. Maybe something worth investigating.

The code below is based off da9063_datasheet_2v2.pdf.  I don't know of
other machines we run on that use this controller, the only entry in
dmesglog is matthieu's Unmatched machine.

Tests, input and oks welcome.


Index: dev/fdt/dapmic.c
===
RCS file: /cvs/src/sys/dev/fdt/dapmic.c,v
retrieving revision 1.2
diff -u -p -r1.2 dapmic.c
--- dev/fdt/dapmic.c6 Apr 2022 18:59:28 -   1.2
+++ dev/fdt/dapmic.c17 Aug 2022 21:59:57 -
@@ -19,6 +19,9 @@
 #include 
 #include 
 #include 
+#include 
+#include 
+#include 
 
 #include 
 #include 
@@ -28,11 +31,31 @@
 
 #include 
 
+#include 
+
 extern void (*cpuresetfn)(void);
 extern void (*powerdownfn)(void);
 
 /* Registers */
+#define FAULT_LOG  0x05
 #define EVENT_A0x06
+#define  EVENT_A_EVENTS_D  (1 << 7)
+#define  EVENT_A_EVENTS_C  (1 << 6)
+#define  EVENT_A_EVENTS_B  (1 << 5)
+#define  EVENT_A_E_nONKEY  (1 << 0)
+#define EVENT_B0x07
+#define EVENT_C0x08
+#define EVENT_D0x09
+#define IRQ_MASK_A 0x0a
+#define  IRQ_MASK_A_M_RESERVED ((1 << 7) | (1 << 6) | (1 << 5))
+#define  IRQ_MASK_A_M_SEQ_RDY  (1 << 4)
+#define  IRQ_MASK_A_M_ADC_RDY  (1 << 3)
+#define  IRQ_MASK_A_M_TICK (1 << 2)
+#define  IRQ_MASK_A_M_ALARM(1 << 1)
+#define  IRQ_MASK_A_M_nONKEY   (1 << 0)
+#define IRQ_MASK_B 0x0b
+#define IRQ_MASK_C 0x0c
+#define IRQ_MASK_D 0x0d
 #define CONTROL_F  0x13
 #define  CONTROL_F_WAKE_UP (1 << 2)
 #define  CONTROL_F_SHUTDOWN(1 << 1)
@@ -55,11 +78,20 @@ extern void (*powerdownfn)(void);
 #define ALARM_Y0x4b
 #define  ALARM_Y_TICK_ON   (1 << 7)
 
+#ifdef DAPMIC_DEBUG
+# define DPRINTF(args) do { printf args; } while (0)
+#else
+# define DPRINTF(args) do {} while (0)
+#endif
+
 struct dapmic_softc {
struct device sc_dev;
i2c_tag_t sc_tag;
i2c_addr_t sc_addr;
 
+   int (*sc_ih)(void *);
+   struct task sc_task;
+
struct todr_chip_handle sc_todr;
 };
 
@@ -80,8 +112,11 @@ int dapmic_clock_read(struct dapmic_soft
 intdapmic_clock_write(struct dapmic_softc *, struct clock_ymdhms *);
 intdapmic_gettime(struct todr_chip_handle *, struct timeval *);
 intdapmic_settime(struct todr_chip_handle *, struct timeval *);
+void   dapmic_reset_irq_mask(struct dapmic_softc *);
 void   dapmic_reset(void);
 void   dapmic_powerdown(void);
+intdapmic_intr(void *);
+void   dapmic_shutdown_task(void *);
 
 int
 dapmic_match(struct device *parent, void *match, void *aux)
@@ -96,6 +131,7 @@ dapmic_attach(struct device *parent, str
 {
struct dapmic_softc *sc = (struct dapmic_softc *)self;
struct i2c_attach_args *ia = aux;
+   int node = *(int *)ia->ia_cookie;
 
sc->sc_tag = ia->ia_tag;
sc->sc_addr = ia->ia_addr;
@@ -105,12 +141,35 @@ dapmic_attach(struct device *parent, str
sc->sc_todr.todr_settime = dapmic_settime;
todr_attach(>sc_todr);
 
-   printf("\n");
-
if (cpuresetfn == NULL)
cpuresetfn = dapmic_reset;
if (powerdownfn == NULL)
powerdownfn = dapmic_powerdown;
+
+   task_set(>sc_task, dapmic_shutdown_task, sc);
+
+   /* Mask away events we don't care about */
+   dapmic_reg_write(sc, IRQ_MASK_A,
+   0xff & ~(IRQ_MASK_A_M_RESERVED | IRQ_MASK_A_M_nONKEY));
+   dapmic_reg_write(sc, IRQ_MASK_B, 0xff);
+   dapmic_reg_write(sc, IRQ_MASK_C, 0xff);
+   

Re: mips64: trigger deferred timer interrupt from splx(9)

2022-08-17 Thread Scott Cheloha
On Wed, Aug 17, 2022 at 01:30:29PM +, Visa Hankala wrote:
> On Tue, Aug 09, 2022 at 09:54:02AM -0500, Scott Cheloha wrote:
> > On Tue, Aug 09, 2022 at 02:03:31PM +, Visa Hankala wrote:
> > > On Mon, Aug 08, 2022 at 02:52:37AM -0500, Scott Cheloha wrote:
> > > > One thing I'm still uncertain about is how glxclk fits into the
> > > > loongson picture.  It's an interrupt clock that runs hardclock() and
> > > > statclock(), but the code doesn't do any logical masking, so I don't
> > > > know whether or not I need to adjust anything in that code or account
> > > > for it at all.  If there's no logical masking there's no deferral, so
> > > > it would never call need to call md_triggerclock() from splx(9).
> > > 
> > > I think the masking of glxclk interrupts are handled by the ISA
> > > interrupt code.
> > 
> > Do those machines not have Coprocessor 0?  If they do, why would you
> > prefer glxclk over CP0?
> > 
> > > The patch misses md_triggerclock definition in mips64_machdep.c.
> > 
> > Whoops, forgot that file.  Fuller patch below.
> > 
> > > I have put this to the test on the mips64 ports builder machines.
> 
> The machines completed a build with this patch without problems.
> I tested with the debug counters removed from cp0_trigger_int5().
> 
> OK visa@

Thank you for testing!

There was a loongson portion to this patch.  Is this OK on loongson or
just octeon?

Also, what did the debug counters look like when you yanked them?  If
cp0_raise_miss was non-zero I will double the initial offset to 32
cycles.



Re: fix i386 cpu classnames

2022-08-17 Thread Mike Larkin
On Wed, Aug 17, 2022 at 03:27:51PM +1000, Jonathan Gray wrote:
> broken after rev 1.652 of machdep.c
>
> cpu0: Intel(R) Pentium(R) M processor 1200MHz ("GenuineIntel" 686-class) 1.20 
> GHz, 06-09-05
> cpu0: Intel(R) Pentium(R) M processor 1200MHz ("GenuineIntel" -class) 1.20 
> GHz, 06-09-05
>
> CPUCLASS_* can't be renumbered as machdep.c assumes class is
> family - 3 in at least one path.  486 is family 4, so class 1
>
> Index: sys/arch/i386/i386/machdep.c
> ===
> RCS file: /cvs/src/sys/arch/i386/i386/machdep.c,v
> retrieving revision 1.652
> diff -u -p -r1.652 machdep.c
> --- sys/arch/i386/i386/machdep.c  15 Aug 2022 04:17:50 -  1.652
> +++ sys/arch/i386/i386/machdep.c  17 Aug 2022 05:18:29 -
> @@ -509,6 +509,7 @@ const struct cpu_nocpuid_nameclass i386_
>  };
>
>  const char *classnames[] = {
> + "",
>   "486",
>   "586",
>   "686"
>

ok mlarkin if not already committed



Re: bgpd switch to memxyz() from bxyz()

2022-08-17 Thread Theo Buehler
On Wed, Aug 17, 2022 at 04:04:26PM +0200, Claudio Jeker wrote:
> I did switch to memset from bzero and from bcopy to memcpy whenever I
> touched the code but now I just decided to change all users of bcopy(),
> bzero() and bcmp() to use memcpy(), memset() and memcmp().
> 
> None of the bcopy() users had overlapping memory regions so memcpy()
> should be fine.

I agree that there are no overlaps as at least one of the two variables
is always on the stack. Also, the arguments were correctly reversed.

ok tb



bgpd switch to memxyz() from bxyz()

2022-08-17 Thread Claudio Jeker
I did switch to memset from bzero and from bcopy to memcpy whenever I
touched the code but now I just decided to change all users of bcopy(),
bzero() and bcmp() to use memcpy(), memset() and memcmp().

None of the bcopy() users had overlapping memory regions so memcpy()
should be fine.

-- 
:wq Claudio

Index: usr.sbin/bgpctl/bgpctl.c
===
RCS file: /cvs/src/usr.sbin/bgpctl/bgpctl.c,v
retrieving revision 1.281
diff -u -p -r1.281 bgpctl.c
--- usr.sbin/bgpctl/bgpctl.c28 Jul 2022 10:40:25 -  1.281
+++ usr.sbin/bgpctl/bgpctl.c17 Aug 2022 13:56:57 -
@@ -131,7 +131,7 @@ main(int argc, char *argv[])
if (pledge("stdio", NULL) == -1)
err(1, "pledge");
 
-   bzero(, sizeof(ribreq));
+   memset(, 0, sizeof(ribreq));
if (res->as.type != AS_UNDEF)
ribreq.as = res->as;
if (res->addr.aid) {
@@ -160,7 +160,7 @@ main(int argc, char *argv[])
if ((fd = socket(AF_UNIX, SOCK_STREAM, 0)) == -1)
err(1, "control_init: socket");
 
-   bzero(_un, sizeof(sa_un));
+   memset(_un, 0, sizeof(sa_un));
sa_un.sun_family = AF_UNIX;
if (strlcpy(sa_un.sun_path, sockname, sizeof(sa_un.sun_path)) >=
sizeof(sa_un.sun_path))
@@ -235,7 +235,7 @@ main(int argc, char *argv[])
NULL, 0);
break;
case SHOW_RIB:
-   bzero(, sizeof(ribreq));
+   memset(, 0, sizeof(ribreq));
type = IMSG_CTL_SHOW_RIB;
if (res->addr.aid) {
ribreq.prefix = res->addr;
@@ -310,7 +310,7 @@ main(int argc, char *argv[])
break;
case NETWORK_ADD:
case NETWORK_REMOVE:
-   bzero(, sizeof(net));
+   memset(, 0, sizeof(net));
net.prefix = res->addr;
net.prefixlen = res->prefixlen;
net.rd = res->rd;
@@ -333,14 +333,14 @@ main(int argc, char *argv[])
done = 1;
break;
case NETWORK_SHOW:
-   bzero(, sizeof(ribreq));
+   memset(, 0, sizeof(ribreq));
ribreq.aid = res->aid;
strlcpy(ribreq.rib, res->rib, sizeof(ribreq.rib));
imsg_compose(ibuf, IMSG_CTL_SHOW_NETWORK, 0, 0, -1,
, sizeof(ribreq));
break;
case NETWORK_MRT:
-   bzero(, sizeof(ribreq));
+   memset(, 0, sizeof(ribreq));
if (res->as.type != AS_UNDEF)
ribreq.as = res->as;
if (res->addr.aid) {
@@ -1076,7 +1076,7 @@ network_bulk(struct parse_result *res)
/* Stop processing after a comment */
if (*b == '#')
break;
-   bzero(, sizeof(net));
+   memset(, 0, sizeof(net));
if (parse_prefix(b, strlen(b), , ) != 1)
errx(1, "bad prefix: %s", b);
net.prefix = h;
@@ -1145,7 +1145,7 @@ show_mrt_dump(struct mrt_rib *mr, struct
 
for (i = 0; i < mr->nentries; i++) {
mre = >entries[i];
-   bzero(, sizeof(ctl));
+   memset(, 0, sizeof(ctl));
ctl.prefix = mr->prefix;
ctl.prefixlen = mr->prefixlen;
if (mre->originated <= now)
@@ -1228,7 +1228,7 @@ network_mrt_dump(struct mrt_rib *mr, str
now = time(NULL);
for (i = 0; i < mr->nentries; i++) {
mre = >entries[i];
-   bzero(, sizeof(ctl));
+   memset(, 0, sizeof(ctl));
ctl.prefix = mr->prefix;
ctl.prefixlen = mr->prefixlen;
if (mre->originated <= now)
@@ -1269,7 +1269,7 @@ network_mrt_dump(struct mrt_rib *mr, str
!match_aspath(mre->aspath, mre->aspath_len, >as))
continue;
 
-   bzero(, sizeof(net));
+   memset(, 0, sizeof(net));
net.prefix = ctl.prefix;
net.prefixlen = ctl.prefixlen;
net.type = NETWORK_MRTCLONE;
Index: usr.sbin/bgpctl/mrtparser.c
===
RCS file: /cvs/src/usr.sbin/bgpctl/mrtparser.c,v
retrieving revision 1.17
diff -u -p -r1.17 mrtparser.c
--- usr.sbin/bgpctl/mrtparser.c 6 Feb 2022 09:52:32 -   1.17
+++ usr.sbin/bgpctl/mrtparser.c 17 Aug 2022 13:56:57 -
@@ -58,7 +58,7 @@ mrt_read_msg(int fd, struct mrt_hdr *hdr
 {
void *buf;
 
-   bzero(hdr, sizeof(*hdr));
+   memset(hdr, 0, sizeof(*hdr));
if (mrt_read_buf(fd, hdr, sizeof(*hdr)) != sizeof(*hdr))
return (NULL);
 
Index: usr.sbin/bgpctl/parser.c

Re: mips64: trigger deferred timer interrupt from splx(9)

2022-08-17 Thread Visa Hankala
On Tue, Aug 09, 2022 at 09:54:02AM -0500, Scott Cheloha wrote:
> On Tue, Aug 09, 2022 at 02:03:31PM +, Visa Hankala wrote:
> > On Mon, Aug 08, 2022 at 02:52:37AM -0500, Scott Cheloha wrote:
> > > One thing I'm still uncertain about is how glxclk fits into the
> > > loongson picture.  It's an interrupt clock that runs hardclock() and
> > > statclock(), but the code doesn't do any logical masking, so I don't
> > > know whether or not I need to adjust anything in that code or account
> > > for it at all.  If there's no logical masking there's no deferral, so
> > > it would never call need to call md_triggerclock() from splx(9).
> > 
> > I think the masking of glxclk interrupts are handled by the ISA
> > interrupt code.
> 
> Do those machines not have Coprocessor 0?  If they do, why would you
> prefer glxclk over CP0?
> 
> > The patch misses md_triggerclock definition in mips64_machdep.c.
> 
> Whoops, forgot that file.  Fuller patch below.
> 
> > I have put this to the test on the mips64 ports builder machines.

The machines completed a build with this patch without problems.
I tested with the debug counters removed from cp0_trigger_int5().

OK visa@

> Index: mips64/mips64/clock.c
> ===
> RCS file: /cvs/src/sys/arch/mips64/mips64/clock.c,v
> retrieving revision 1.45
> diff -u -p -r1.45 clock.c
> --- mips64/mips64/clock.c 6 Apr 2022 18:59:26 -   1.45
> +++ mips64/mips64/clock.c 9 Aug 2022 14:48:47 -
> @@ -60,6 +60,7 @@ const struct cfattach clock_ca = {
>  };
>  
>  void cp0_startclock(struct cpu_info *);
> +void cp0_trigger_int5(void);
>  uint32_t cp0_int5(uint32_t, struct trapframe *);
>  
>  int
> @@ -86,19 +87,20 @@ clockattach(struct device *parent, struc
>   cp0_set_compare(cp0_get_count() - 1);
>  
>   md_startclock = cp0_startclock;
> + md_triggerclock = cp0_trigger_int5;
>  }
>  
>  /*
>   *  Interrupt handler for targets using the internal count register
>   *  as interval clock. Normally the system is run with the clock
>   *  interrupt always enabled. Masking is done here and if the clock
> - *  can not be run the tick is just counted and handled later when
> - *  the clock is logically unmasked again.
> + *  cannot be run the tick is handled later when the clock is logically
> + *  unmasked again.
>   */
>  uint32_t
>  cp0_int5(uint32_t mask, struct trapframe *tf)
>  {
> - u_int32_t clkdiff;
> + u_int32_t clkdiff, pendingticks = 0;
>   struct cpu_info *ci = curcpu();
>  
>   /*
> @@ -113,15 +115,26 @@ cp0_int5(uint32_t mask, struct trapframe
>   }
>  
>   /*
> +  * If the clock interrupt is masked, defer any work until it
> +  * is unmasked from splx(9).
> +  */
> + if (tf->ipl >= IPL_CLOCK) {
> + ci->ci_clock_deferred = 1;
> + cp0_set_compare(cp0_get_count() - 1);
> + return CR_INT_5;
> + }
> + ci->ci_clock_deferred = 0;
> +
> + /*
>* Count how many ticks have passed since the last clock interrupt...
>*/
>   clkdiff = cp0_get_count() - ci->ci_cpu_counter_last;
>   while (clkdiff >= ci->ci_cpu_counter_interval) {
>   ci->ci_cpu_counter_last += ci->ci_cpu_counter_interval;
>   clkdiff = cp0_get_count() - ci->ci_cpu_counter_last;
> - ci->ci_pendingticks++;
> + pendingticks++;
>   }
> - ci->ci_pendingticks++;
> + pendingticks++;
>   ci->ci_cpu_counter_last += ci->ci_cpu_counter_interval;
>  
>   /*
> @@ -132,32 +145,64 @@ cp0_int5(uint32_t mask, struct trapframe
>   clkdiff = cp0_get_count() - ci->ci_cpu_counter_last;
>   if ((int)clkdiff >= 0) {
>   ci->ci_cpu_counter_last += ci->ci_cpu_counter_interval;
> - ci->ci_pendingticks++;
> + pendingticks++;
>   cp0_set_compare(ci->ci_cpu_counter_last);
>   }
>  
>   /*
> -  * Process clock interrupt unless it is currently masked.
> +  * Process clock interrupt.
>*/
> - if (tf->ipl < IPL_CLOCK) {
>  #ifdef MULTIPROCESSOR
> - register_t sr;
> + register_t sr;
>  
> - sr = getsr();
> - ENABLEIPI();
> + sr = getsr();
> + ENABLEIPI();
>  #endif
> - while (ci->ci_pendingticks) {
> - atomic_inc_long(
> - (unsigned long *)_clock_count.ec_count);
> - hardclock(tf);
> - ci->ci_pendingticks--;
> - }
> + while (pendingticks) {
> + atomic_inc_long((unsigned long *)_clock_count.ec_count);
> + hardclock(tf);
> + pendingticks--;
> + }
>  #ifdef MULTIPROCESSOR
> - setsr(sr);
> + setsr(sr);
>  #endif
> - }
>  
>   return CR_INT_5;/* Clock is always on 5 */
> +}
> +
> +unsigned long cp0_raise_calls, cp0_raise_miss;
> +
> +/*
> + * Trigger the clock interrupt.
> + * 
> + * We need to spin 

Re: bgpd kroute includes cleanup

2022-08-17 Thread Claudio Jeker
On Wed, Aug 17, 2022 at 02:48:42PM +0200, Theo Buehler wrote:
> On Wed, Aug 17, 2022 at 02:25:51PM +0200, Claudio Jeker wrote:
> > Remove some unneeded headers from kroute.c
> > Also move sys/tree.h up above the other sys includes.
> 
> err.h was never used, fcntl.h was used for a bit more than two days
> after import, so these can definitely go.
> 
> arpa/inet.h is used for ntohl() and friends, or do you want to rely on
> bgpd.h to pull this in?

It is also defined by netinet/in.h but that is less portable. I add it
back.
 
> More things to clean up if you want:
> 
> kroute.c also needs sys/queue.h and imsg.h, which it gets via bgpd.h.
> Plus strings.h (bcopy, ffs) is pulled in via string.h.

Arrg, strings.h vs string.h. I thought just use string.h but it can never
be that simple.

On a rampage now to convert all the bcmp, bcopy and bzero to memcmp,
memcpy and memset.

After that we have ffs which still needs strings.h. Why posix, why?
 
> style(9) wants an empty line before errno.h.

Added.

-- 
:wq Claudio



Re: uvm_swap: introduce uvm_swap_data_lock

2022-08-17 Thread Martin Pieuchot
On 16/01/22(Sun) 15:35, Martin Pieuchot wrote:
> On 30/12/21(Thu) 23:38, Theo Buehler wrote:
> > The diff below does two things: it adds a uvm_swap_data_lock mutex and
> > trades it for the KERNEL_LOCK in uvm_swapisfull() and uvm_swap_markbad()
> 
> Why is it enough?  Which fields is the lock protecting in these
> function?  Is it `uvmexp.swpages', could that be documented?  

It is documented in the diff below.

> 
> What about `nswapdev'?  Why is the rwlock grabbed before reading it in
> sys_swapctl()?i

Because it is always modified with the lock, I added some documentation.

> What about `swpginuse'?

This is still under KERNEL_LOCK(), documented below.

> If the mutex/rwlock are used to protect the global `swap_priority' could
> that be also documented?  Once this is documented it should be trivial to
> see that some places are missing some locking.  Is it intentional?
> 
> > The uvm_swap_data_lock protects all swap data structures, so needs to be
> > grabbed a few times, many of them already documented in the comments.
> > 
> > For review, I suggest comparing to what NetBSD did and also going
> > through the consumers (swaplist_insert, swaplist_find, swaplist_trim)
> > and check that they are properly locked when called, or that there is
> > the KERNEL_LOCK() in place when swap data structures are manipulated.
> 
> I'd suggest using the KASSERT(rw_write_held()) idiom to further reduce
> the differences with NetBSD.

Done.

> > In swapmount() I introduced locking since that's needed to be able to
> > assert that the proper locks are held in swaplist_{insert,find,trim}.
> 
> Could the KERNEL_LOCK() in uvm_swap_get() be pushed a bit further down?
> What about `uvmexp.nswget' and `uvmexp.swpgonly' in there?

This has been done as part of another change.  This diff uses an atomic
operation to increase `nswget' in case multiple threads fault on a page
in swap at the same time.

Updated diff below, ok?

Index: uvm/uvm_swap.c
===
RCS file: /cvs/src/sys/uvm/uvm_swap.c,v
retrieving revision 1.163
diff -u -p -r1.163 uvm_swap.c
--- uvm/uvm_swap.c  6 Aug 2022 13:44:04 -   1.163
+++ uvm/uvm_swap.c  17 Aug 2022 11:46:20 -
@@ -45,6 +45,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -84,13 +85,16 @@
  * the system maintains a global data structure describing all swap
  * partitions/files.   there is a sorted LIST of "swappri" structures
  * which describe "swapdev"'s at that priority.   this LIST is headed
- * by the "swap_priority" global var.each "swappri" contains a 
+ * by the "swap_priority" global var.each "swappri" contains a
  * TAILQ of "swapdev" structures at that priority.
  *
  * locking:
  *  - swap_syscall_lock (sleep lock): this lock serializes the swapctl
  *system call and prevents the swap priority list from changing
  *while we are in the middle of a system call (e.g. SWAP_STATS).
+ *  - uvm_swap_data_lock (mutex): this lock protects all swap data
+ *structures including the priority list, the swapdev structures,
+ *and the swapmap arena.
  *
  * each swap device has the following info:
  *  - swap device in use (could be disabled, preventing future use)
@@ -106,7 +110,7 @@
  * userland controls and configures swap with the swapctl(2) system call.
  * the sys_swapctl performs the following operations:
  *  [1] SWAP_NSWAP: returns the number of swap devices currently configured
- *  [2] SWAP_STATS: given a pointer to an array of swapent structures 
+ *  [2] SWAP_STATS: given a pointer to an array of swapent structures
  * (passed in via "arg") of a size passed in via "misc" ... we load
  * the current swap config into the array.
  *  [3] SWAP_ON: given a pathname in arg (could be device or file) and a
@@ -208,9 +212,10 @@ struct extent *swapmap;/* controls the
 
 /* list of all active swap devices [by priority] */
 LIST_HEAD(swap_priority, swappri);
-struct swap_priority swap_priority;
+struct swap_priority swap_priority;/* [S] */
 
 /* locks */
+struct mutex uvm_swap_data_lock = MUTEX_INITIALIZER(IPL_NONE);
 struct rwlock swap_syscall_lock = RWLOCK_INITIALIZER("swplk");
 
 struct mutex oommtx = MUTEX_INITIALIZER(IPL_VM);
@@ -224,7 +229,7 @@ void swapdrum_add(struct swapdev *, in
 struct swapdev *swapdrum_getsdp(int);
 
 struct swapdev *swaplist_find(struct vnode *, int);
-voidswaplist_insert(struct swapdev *, 
+voidswaplist_insert(struct swapdev *,
 struct swappri *, int);
 voidswaplist_trim(void);
 
@@ -472,16 +477,19 @@ uvm_swap_finicrypt_all(void)
 /*
  * swaplist_insert: insert swap device "sdp" into the global list
  *
- * => caller must hold both swap_syscall_lock and uvm.swap_data_lock
- * => caller must provide a newly malloc'd swappri structure (we will
- * FREE it if we don't need it... this it to prevent malloc blocking
- * 

Re: bgpd kroute includes cleanup

2022-08-17 Thread Theo Buehler
On Wed, Aug 17, 2022 at 02:25:51PM +0200, Claudio Jeker wrote:
> Remove some unneeded headers from kroute.c
> Also move sys/tree.h up above the other sys includes.

err.h was never used, fcntl.h was used for a bit more than two days
after import, so these can definitely go.

arpa/inet.h is used for ntohl() and friends, or do you want to rely on
bgpd.h to pull this in?

More things to clean up if you want:

kroute.c also needs sys/queue.h and imsg.h, which it gets via bgpd.h.
Plus strings.h (bcopy, ffs) is pulled in via string.h.

style(9) wants an empty line before errno.h.



bgpd kroute includes cleanup

2022-08-17 Thread Claudio Jeker
Remove some unneeded headers from kroute.c
Also move sys/tree.h up above the other sys includes.

-- 
:wq Claudio

Index: kroute.c
===
RCS file: /cvs/src/usr.sbin/bgpd/kroute.c,v
retrieving revision 1.291
diff -u -p -r1.291 kroute.c
--- kroute.c17 Aug 2022 10:54:52 -  1.291
+++ kroute.c17 Aug 2022 10:59:55 -
@@ -18,22 +18,19 @@
  */
 
 #include 
+#include 
 #include 
 #include 
 #include 
-#include 
 #include 
 #include 
-#include 
 #include 
 #include 
 #include 
 #include 
 #include 
 #include 
-#include 
 #include 
-#include 
 #include 
 #include 
 #include 



Re: [RFC] acpi: add acpitimer_delay(), acpihpet_delay()

2022-08-17 Thread Jonathan Gray
On Wed, Aug 17, 2022 at 04:53:20PM +1000, Jonathan Gray wrote:
> 
> It seems to me it would be cleaner if the decision of what to use for
> delay could be moved into an md file.
> 
> Or abstract it by having a numeric weight like timecounters or driver
> match return numbers.

diff against your previous, does not change lapic_delay

diff --git sys/arch/amd64/amd64/machdep.c sys/arch/amd64/amd64/machdep.c
index 932b1dfeb47..c4645b6a6fd 100644
--- sys/arch/amd64/amd64/machdep.c
+++ sys/arch/amd64/amd64/machdep.c
@@ -2069,3 +2069,13 @@ check_context(const struct reg *regs, struct trapframe 
*tf)
 
return 0;
 }
+
+void
+delay_init(void(*f)(int), int v)
+{
+   static int c = 0;
+   if (v > c) {
+   delay_func = f;
+   c = v;
+   }
+}
diff --git sys/arch/amd64/amd64/tsc.c sys/arch/amd64/amd64/tsc.c
index fd38dc6359d..8c839357dd2 100644
--- sys/arch/amd64/amd64/tsc.c
+++ sys/arch/amd64/amd64/tsc.c
@@ -109,7 +109,7 @@ tsc_identify(struct cpu_info *ci)
 
tsc_frequency = tsc_freq_cpuid(ci);
if (tsc_frequency > 0)
-   delay_func = tsc_delay;
+   delay_init(tsc_delay, 300);
 }
 
 static inline int
diff --git sys/arch/amd64/include/cpu.h sys/arch/amd64/include/cpu.h
index b8db48f2714..a82af172452 100644
--- sys/arch/amd64/include/cpu.h
+++ sys/arch/amd64/include/cpu.h
@@ -359,6 +359,7 @@ void signotify(struct proc *);
  * We need a machine-independent name for this.
  */
 extern void (*delay_func)(int);
+void delay_init(void(*)(int), int);
 struct timeval;
 
 #define DELAY(x)   (*delay_func)(x)
diff --git sys/arch/i386/i386/machdep.c sys/arch/i386/i386/machdep.c
index e4cb15b4dc1..7da5c26e240 100644
--- sys/arch/i386/i386/machdep.c
+++ sys/arch/i386/i386/machdep.c
@@ -3996,3 +3996,13 @@ cpu_rnd_messybits(void)
nanotime();
return (ts.tv_nsec ^ (ts.tv_sec << 20));
 }
+
+void
+delay_init(void(*f)(int), int v)
+{
+   static int c = 0;
+   if (v > c) {
+   delay_func = f;
+   c = v;
+   }
+}
diff --git sys/arch/i386/include/cpu.h sys/arch/i386/include/cpu.h
index 5f300710562..211ee475678 100644
--- sys/arch/i386/include/cpu.h
+++ sys/arch/i386/include/cpu.h
@@ -302,6 +302,7 @@ void signotify(struct proc *);
  * We need a machine-independent name for this.
  */
 extern void (*delay_func)(int);
+void delay_init(void(*)(int), int);
 struct timeval;
 
 #defineDELAY(x)(*delay_func)(x)
diff --git sys/dev/acpi/acpihpet.c sys/dev/acpi/acpihpet.c
index 6dc595e50ab..4332b4dbc0e 100644
--- sys/dev/acpi/acpihpet.c
+++ sys/dev/acpi/acpihpet.c
@@ -27,8 +27,6 @@
 #include 
 #include 
 
-#include "acpitimer.h"
-
 int acpihpet_attached;
 
 int acpihpet_match(struct device *, void *, void *);
@@ -270,15 +268,7 @@ acpihpet_attach(struct device *parent, struct device 
*self, void *aux)
hpet_timecounter.tc_name = sc->sc_dev.dv_xname;
tc_init(_timecounter);
 
-#if defined(__amd64__) || defined(__i386__)
-   if (delay_func == i8254_delay)
-   delay_func = acpihpet_delay;
-#if NACPITIMER > 1
-   extern void acpitimer_delay(int);
-   if (delay_func == acpitimer_delay)
-   delay_func = acpihpet_delay;
-#endif
-#endif /* defined(__amd64__) || defined(__i386__) */
+   delay_init(acpihpet_delay, 200);
 
 #if defined(__amd64__)
extern void cpu_recalibrate_tsc(struct timecounter *);
diff --git sys/dev/acpi/acpitimer.c sys/dev/acpi/acpitimer.c
index 0c4c7b71a01..e2757a40f3d 100644
--- sys/dev/acpi/acpitimer.c
+++ sys/dev/acpi/acpitimer.c
@@ -103,10 +103,8 @@ acpitimerattach(struct device *parent, struct device 
*self, void *aux)
acpi_timecounter.tc_name = sc->sc_dev.dv_xname;
tc_init(_timecounter);
 
-#if defined(__amd64__) || defined(__i386__)
-   if (delay_func == i8254_delay)
-   delay_func = acpitimer_delay;
-#endif
+   delay_init(acpitimer_delay, 100);
+
 #if defined(__amd64__)
extern void cpu_recalibrate_tsc(struct timecounter *);
cpu_recalibrate_tsc(_timecounter);
diff --git sys/dev/acpi/files.acpi sys/dev/acpi/files.acpi
index 8ec3ec2f8b3..f97eb6d4e3e 100644
--- sys/dev/acpi/files.acpi
+++ sys/dev/acpi/files.acpi
@@ -13,7 +13,7 @@ file  dev/acpi/acpidebug.cacpi & ddb
 # ACPI timer
 device acpitimer
 attach acpitimer at acpi
-file   dev/acpi/acpitimer.cacpitimer needs-flag
+file   dev/acpi/acpitimer.cacpitimer
 
 # AC device
 device acpiac
diff --git sys/dev/pv/pvbus.c sys/dev/pv/pvbus.c
index cbe543ac312..ee53afe2138 100644
--- sys/dev/pv/pvbus.c
+++ sys/dev/pv/pvbus.c
@@ -319,9 +319,8 @@ pvbus_hyperv(struct pvbus_hv *hv)
HYPERV_VERSION_EBX_MINOR_S;
 
 #if NHYPERV > 0
-   if (hv->hv_features & CPUID_HV_MSR_TIME_REFCNT &&
-   delay_func == i8254_delay)
-   delay_func = hv_delay;
+   if (hv->hv_features & CPUID_HV_MSR_TIME_REFCNT)
+   delay_init(hv_delay, 250);
 #endif
 }
 



pcidevs: PM991_NVME: add 980 name

2022-08-17 Thread Klemens Nanni
SSD 980 is matched as PM991 but as far as I can tell from research,
those are two different products with different product/model codes
using the same PCI product id.

nvme0 at pci4 dev 0 function 0 "Samsung PM991 NVMe" rev 0x00: msix, NVMe 1.4
nvme0: Samsung SSD 980 500GB, firmware 1B4QFXO7, serial S64DNX0RC12899Z
scsibus3 at nvme0: 2 targets, initiator 0
sd2 at scsibus3 targ 1 lun 0: 

Same for the PRO variant.

Feedback? Objection? OK?


Index: pcidevs
===
RCS file: /cvs/src/sys/dev/pci/pcidevs,v
retrieving revision 1.2001
diff -u -p -r1.2001 pcidevs
--- pcidevs 16 Aug 2022 09:28:45 -  1.2001
+++ pcidevs 17 Aug 2022 10:25:36 -
@@ -8334,8 +8334,8 @@ product SAMSUNG2 SM951_AHCI   0xa801  SM951
 product SAMSUNG2 SM951_NVME0xa802  SM951/PM951 NVMe
 product SAMSUNG2 SM961_NVME0xa804  SM961/PM961 NVMe
 product SAMSUNG2 SM981_NVME0xa808  SM981/PM981 NVMe
-product SAMSUNG2 PM991_NVME0xa809  PM991 NVMe
-product SAMSUNG2 PM9A1_NVME0xa80a  PM9A1 NVMe
+product SAMSUNG2 PM991_NVME0xa809  PM991/980 NVMe
+product SAMSUNG2 PM9A1_NVME0xa80a  PM9A1/980PRO NVMe
 product SAMSUNG2 NVME_171X 0xa820  NVMe
 product SAMSUNG2 NVME_172X 0xa821  NVMe
 product SAMSUNG2 NVME_172X_A_B 0xa822  NVMe



Re: bgpd unroll struct kif_node into struct kif

2022-08-17 Thread Theo Buehler
On Wed, Aug 17, 2022 at 12:18:05PM +0200, Claudio Jeker wrote:
> The same thing was done for kroute and knexthop. kif can benefit from the
> same. Diff is mostly mechanical.

ok



bgpd unroll struct kif_node into struct kif

2022-08-17 Thread Claudio Jeker
The same thing was done for kroute and knexthop. kif can benefit from the
same. Diff is mostly mechanical.

-- 
:wq Claudio

Index: kroute.c
===
RCS file: /cvs/src/usr.sbin/bgpd/kroute.c,v
retrieving revision 1.290
diff -u -p -r1.290 kroute.c
--- kroute.c17 Aug 2022 09:16:44 -  1.290
+++ kroute.c17 Aug 2022 10:14:46 -
@@ -98,6 +98,7 @@ struct kredist_node {
 };
 
 struct kif {
+   RB_ENTRY(kif)entry;
char ifname[IFNAMSIZ];
uint64_t baudrate;
u_intrdomain;
@@ -109,11 +110,6 @@ struct kif {
uint8_t  depend_state;  /* for session depend on */
 };
 
-struct kif_node {
-   RB_ENTRY(kif_node)   entry;
-   struct kif   k;
-};
-
 intktable_new(u_int, u_int, char *, int);
 void   ktable_free(u_int);
 void   ktable_destroy(struct ktable *);
@@ -134,7 +130,7 @@ int kroute_compare(struct kroute *, stru
 intkroute6_compare(struct kroute6 *, struct kroute6 *);
 intknexthop_compare(struct knexthop *, struct knexthop *);
 intkredist_compare(struct kredist_node *, struct kredist_node *);
-intkif_compare(struct kif_node *, struct kif_node *);
+intkif_compare(struct kif *, struct kif *);
 
 struct kroute  *kroute_find(struct ktable *, const struct bgpd_addr *,
uint8_t, uint8_t);
@@ -153,9 +149,9 @@ int  knexthop_insert(struct ktable *, s
 voidknexthop_remove(struct ktable *, struct knexthop *);
 voidknexthop_clear(struct ktable *);
 
-struct kif_node*kif_find(int);
-int kif_insert(struct kif_node *);
-int kif_remove(struct kif_node *);
+struct kif *kif_find(int);
+int kif_insert(struct kif *);
+int kif_remove(struct kif *);
 voidkif_clear(void);
 
 int kroute_validate(struct kroute *);
@@ -199,9 +195,9 @@ RB_GENERATE(knexthop_tree, knexthop, ent
 RB_PROTOTYPE(kredist_tree, kredist_node, entry, kredist_compare)
 RB_GENERATE(kredist_tree, kredist_node, entry, kredist_compare)
 
-RB_HEAD(kif_tree, kif_node)kit;
-RB_PROTOTYPE(kif_tree, kif_node, entry, kif_compare)
-RB_GENERATE(kif_tree, kif_node, entry, kif_compare)
+RB_HEAD(kif_tree, kif) kit;
+RB_PROTOTYPE(kif_tree, kif, entry, kif_compare)
+RB_GENERATE(kif_tree, kif, entry, kif_compare)
 
 #define KT2KNT(x)  (&(ktable_get((x)->nhtableid)->knt))
 
@@ -874,7 +870,7 @@ kr_show_route(struct imsg *imsg)
sa_family_t  af;
struct ctl_show_nexthop  snh;
struct knexthop *h;
-   struct kif_node *kif;
+   struct kif  *kif;
u_inti;
u_short  ifindex = 0;
 
@@ -983,7 +979,7 @@ kr_show_route(struct imsg *imsg)
snh.kr.priority = kr_priority();
if ((kif = kif_find(ifindex)) != NULL)
memcpy(,
-   kr_show_interface(>k),
+   kr_show_interface(kif),
sizeof(snh.iface));
}
send_imsg_session(IMSG_CTL_SHOW_NEXTHOP, imsg->hdr.pid,
@@ -993,7 +989,7 @@ kr_show_route(struct imsg *imsg)
case IMSG_CTL_SHOW_INTERFACE:
RB_FOREACH(kif, kif_tree, )
send_imsg_session(IMSG_CTL_SHOW_INTERFACE,
-   imsg->hdr.pid, kr_show_interface(>k),
+   imsg->hdr.pid, kr_show_interface(kif),
sizeof(struct ctl_show_interface));
break;
case IMSG_CTL_SHOW_FIB_TABLES:
@@ -1034,11 +1030,11 @@ kr_send_dependon(struct kif *kif)
 void
 kr_ifinfo(char *ifname)
 {
-   struct kif_node *kif;
+   struct kif  *kif;
 
RB_FOREACH(kif, kif_tree, )
-   if (!strcmp(ifname, kif->k.ifname)) {
-   kr_send_dependon(>k);
+   if (!strcmp(ifname, kif->ifname)) {
+   kr_send_dependon(kif);
return;
}
 }
@@ -1553,9 +1549,9 @@ kredist_compare(struct kredist_node *a, 
 }
 
 int
-kif_compare(struct kif_node *a, struct kif_node *b)
+kif_compare(struct kif *a, struct kif *b)
 {
-   return (b->k.ifindex - a->k.ifindex);
+   return (b->ifindex - a->ifindex);
 }
 
 
@@ -1994,19 +1990,19 @@ knexthop_clear(struct ktable *kt)
knexthop_remove(kt, kn);
 }
 
-struct kif_node *
+struct kif *
 kif_find(int ifindex)
 {
-   struct kif_node s;
+   struct kif  s;
 
bzero(, sizeof(s));
-   s.k.ifindex = ifindex;
+   s.ifindex = ifindex;
 
return (RB_FIND(kif_tree, , ));
 }
 
 int
-kif_insert(struct kif_node *kif)
+kif_insert(struct kif *kif)
 {

Re: Race in disk_attach_callback?

2022-08-17 Thread Miod Vallat
> What is the result if root runs disklabel, and forces it to all zeros?

If the root duid is all zeroes, then the only way to refer to the root
disk is to use its /dev/{s,w}d* device name, as zero duids are ignored.

If you set a zero duid in disklabel(8), setdisklabel() in the kernel
will compute a new, non-zero value.



Re: [RFC] acpi: add acpitimer_delay(), acpihpet_delay()

2022-08-17 Thread Jonathan Gray
On Wed, Aug 17, 2022 at 12:37:52AM -0500, Scott Cheloha wrote:
> On Wed, Aug 17, 2022 at 02:28:14PM +1000, Jonathan Gray wrote:
> > On Tue, Aug 16, 2022 at 11:53:51AM -0500, Scott Cheloha wrote:
> > > On Sun, Aug 14, 2022 at 11:24:37PM -0500, Scott Cheloha wrote:
> > > > 
> > > > In the future when the LAPIC timer is run in oneshot mode there will
> > > > be no lapic_delay().
> > > > 
> > > > [...]
> > > > 
> > > > This is *very* bad for older amd64 machines, because you are left with
> > > > i8254_delay().
> > > > 
> > > > I would like to offer a less awful delay(9) implementation for this
> > > > class of hardware.  Otherwise we may trip over bizarre phantom bugs on
> > > > MP kernels because only one CPU can read the i8254 at a time.
> > > > 
> > > > [...]
> > > > 
> > > > Real i386 hardware should be fine.  Later models with an ACPI PM timer
> > > > will be fine using acpitimer_delay() instead of i8254_delay().
> > > > 
> > > > [...]
> > > > 
> > > > Here are the sample measurements from my 2017 laptop (kaby lake
> > > > refresh) running the attached patch.  It takes longer than a
> > > > microsecond to read either of the ACPI timers.  The PM timer is better
> > > > than the HPET.  The HPET is a bit better than the i8254.  I hope the
> > > > numbers are a little better on older hardware.
> > > > 
> > > > acpitimer_test_delay:  expected  0.01000  actual  0.10638  
> > > > error  0.09638
> > > > acpitimer_test_delay:  expected  0.1  actual  0.15464  
> > > > error  0.05464
> > > > acpitimer_test_delay:  expected  0.00010  actual  0.000107619  
> > > > error  0.07619
> > > > acpitimer_test_delay:  expected  0.00100  actual  0.001007275  
> > > > error  0.07275
> > > > acpitimer_test_delay:  expected  0.01000  actual  0.010007891  
> > > > error  0.07891
> > > > 
> > > > acpihpet_test_delay:   expected  0.01000  actual  0.22208  
> > > > error  0.21208
> > > > acpihpet_test_delay:   expected  0.1  actual  0.31690  
> > > > error  0.21690
> > > > acpihpet_test_delay:   expected  0.00010  actual  0.000112647  
> > > > error  0.12647
> > > > acpihpet_test_delay:   expected  0.00100  actual  0.001021480  
> > > > error  0.21480
> > > > acpihpet_test_delay:   expected  0.01000  actual  0.010013736  
> > > > error  0.13736
> > > > 
> > > > i8254_test_delay:  expected  0.01000  actual  0.40110  
> > > > error  0.39110
> > > > i8254_test_delay:  expected  0.1  actual  0.39471  
> > > > error  0.29471
> > > > i8254_test_delay:  expected  0.00010  actual  0.000128031  
> > > > error  0.28031
> > > > i8254_test_delay:  expected  0.00100  actual  0.001024586  
> > > > error  0.24586
> > > > i8254_test_delay:  expected  0.01000  actual  0.010021859  
> > > > error  0.21859
> > > 
> > > Attched is an updated patch.  I left the test measurement code in
> > > place because I would like to see a test on a real i386 machine, just
> > > to make sure it works as expected.  I can't imagine why it wouldn't
> > > work, but we should never assume anything.
> > > 
> > > Changes from v1:
> > > 
> > > - Actually set delay_func from acpitimerattach() and
> > >   acpihpet_attach().
> > > 
> > >   I think it's safe to assume, on real hardware, that the ACPI PMT is
> > >   preferable to the i8254 and the HPET is preferable to both of them.
> > > 
> > >   This is not *always* true, but it is true on the older machines that
> > >   can't use tsc_delay(), so the assumption works in practice.
> > > 
> > >   Outside of those three timers, the hierarchy gets murky.  There are
> > >   other timers that are better than the HPET, but they aren't always
> > >   available.  If those timers are already providing delay_func this
> > >   code does not usurp them.
> > 
> > As I understand it, you want lapic to be in one-shot mode for something
> > along the lines of tickless.
> 
> Yes.
> 
> Although "tickless" is a misnomer.
> 
> > So you are trying to find MP machines
> > where TSC is not useable for delay?
> 
> Right.  Those are the only machines where it's relevant to consider
> the accuracy of acpitimer_delay() or acpihpet_delay()... unless I've
> forgotten something.
> 
> > TSC is only considered for delay if the invariant and constant flags
> > are set.
> > invariant:
> > "In the Core i7 and future processor generations, the TSC will continue
> > to run in the deepest C-states. Therefore, the TSC will run at a
> > constant rate in all ACPI P-, C-. and T-states. Support for this feature
> > is indicated by CPUID.0x8000_0007.EDX[8]. On processors with invariant
> > TSC support, the OS may use the TSC for wall clock timer services
> > (instead of ACPI or HPET timers). TSC reads are much more efficient and
> > do not incur the overhead associated with a ring transition or access to
> > a platform resource."
> > 
> > constant:
> > runs at a constant rate across frequency/P 

Re: Race in disk_attach_callback?

2022-08-17 Thread Theo de Raadt
Miod Vallat  wrote:

> Come to think further about it, I think it is better for diskmap to
> always trust disk drivers to either :
> - not have any label (dk_label == NULL, or points to zeroed memory)
> or
> - have a valid label (duid is not zeroes).

What is the result if root runs disklabel, and forces it to all zeros?

(You are already running this diff so you can test faster)

zero duid setups still exist.  I am sure I encountered one in the last year.
So I hesitate about this idea of using it like a flag.



Re: Race in disk_attach_callback?

2022-08-17 Thread Miod Vallat
Come to think further about it, I think it is better for diskmap to
always trust disk drivers to either :
- not have any label (dk_label == NULL, or points to zeroed memory)
or
- have a valid label (duid is not zeroes).

The following diff thus relaxes the logic to always trust
dk_label->d_uid, unless it is zero. This passes the vnd test I mailed
yesterday, without the need for a dev/vnd.c change.

Index: sys/dev/softraid.c
===
RCS file: /OpenBSD/src/sys/dev/softraid.c,v
retrieving revision 1.425
diff -u -p -u -p -r1.425 softraid.c
--- sys/dev/softraid.c  16 Apr 2022 19:19:58 -  1.425
+++ sys/dev/softraid.c  17 Aug 2022 05:20:51 -
@@ -3685,13 +3685,11 @@ sr_ioctl_installboot(struct sr_softc *sc
}
}
 
-   bzero(duid, sizeof(duid));
TAILQ_FOREACH(dk, ,  dk_link)
if (!strncmp(dk->dk_name, bb->bb_dev, sizeof(bb->bb_dev)))
break;
if (dk == NULL || dk->dk_label == NULL ||
-   (dk->dk_flags & DKF_LABELVALID) == 0 ||
-   bcmp(dk->dk_label->d_uid, , sizeof(duid)) == 0) {
+   duid_iszero(dk->dk_label->d_uid)) {
sr_error(sc, "failed to get DUID for softraid volume");
goto done;
}
Index: sys/kern/subr_disk.c
===
RCS file: /OpenBSD/src/sys/kern/subr_disk.c,v
retrieving revision 1.253
diff -u -p -u -p -r1.253 subr_disk.c
--- sys/kern/subr_disk.c14 Aug 2022 01:58:27 -  1.253
+++ sys/kern/subr_disk.c17 Aug 2022 05:20:51 -
@@ -1121,7 +1121,6 @@ disk_attach_callback(void *xdat)
/* Read disklabel. */
if (disk_readlabel(, dk->dk_devno, errbuf, sizeof(errbuf)) == NULL) {
enqueue_randomness(dl.d_checksum);
-   dk->dk_flags |= DKF_LABELVALID;
}
 
 done:
@@ -1440,14 +1439,14 @@ setroot(struct device *bootdv, int part,
TAILQ_FOREACH(dk, , dk_link)
if (dk->dk_device == bootdv)
break;
-   if (dk && (dk->dk_flags & DKF_LABELVALID))
+   if (dk)
bcopy(dk->dk_label->d_uid, bootduid, sizeof(bootduid));
} else if (bootdv == NULL) {
/* Locate boot disk based on the provided DUID. */
TAILQ_FOREACH(dk, , dk_link)
if (duid_equal(dk->dk_label->d_uid, bootduid))
break;
-   if (dk && (dk->dk_flags & DKF_LABELVALID))
+   if (dk)
bootdv = dk->dk_device;
}
bcopy(bootduid, rootduid, sizeof(rootduid));
@@ -1561,8 +1560,7 @@ gotswap:
if (bootdv->dv_class == DV_DISK) {
if (!duid_iszero(rootduid)) {
TAILQ_FOREACH(dk, , dk_link)
-   if ((dk->dk_flags & DKF_LABELVALID) &&
-   dk->dk_label && duid_equal(
+   if (dk->dk_label && duid_equal(
dk->dk_label->d_uid, rootduid))
break;
if (dk == NULL)
@@ -1788,7 +1786,8 @@ disk_map(char *path, char *mappath, int 
 
mdk = NULL;
TAILQ_FOREACH(dk, , dk_link) {
-   if ((dk->dk_flags & DKF_LABELVALID) && dk->dk_label &&
+   if (dk->dk_label &&
+   !duid_iszero(dk->dk_label->d_uid) &&
memcmp(dk->dk_label->d_uid, uid,
sizeof(dk->dk_label->d_uid)) == 0) {
/* Fail if there are duplicate UIDs! */
Index: sys/sys/disk.h
===
RCS file: /OpenBSD/src/sys/sys/disk.h,v
retrieving revision 1.36
diff -u -p -u -p -r1.36 disk.h
--- sys/sys/disk.h  4 May 2017 22:47:27 -   1.36
+++ sys/sys/disk.h  17 Aug 2022 05:20:51 -
@@ -83,7 +83,6 @@ struct disk {
 #define DKF_CONSTRUCTED0x0001
 #define DKF_OPENED 0x0002
 #define DKF_NOLABELREAD0x0004
-#define DKF_LABELVALID 0x0008
 
/*
 * Metrics data; note that some metrics may have no meaning