Module Name:    src
Committed By:   rillig
Date:           Sun Apr  7 12:05:23 UTC 2024

Modified Files:
        src/common/lib/libutil: snprintb.c
        src/lib/libutil: snprintb.3
        src/tests/lib/libutil: t_snprintb.c

Log Message:
snprintb: reject combinations of 'f' with ':' as well as 'F' with '='

These combinations would lead to garbled output.


To generate a diff of this commit:
cvs rdiff -u -r1.46 -r1.47 src/common/lib/libutil/snprintb.c
cvs rdiff -u -r1.36 -r1.37 src/lib/libutil/snprintb.3
cvs rdiff -u -r1.33 -r1.34 src/tests/lib/libutil/t_snprintb.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/common/lib/libutil/snprintb.c
diff -u src/common/lib/libutil/snprintb.c:1.46 src/common/lib/libutil/snprintb.c:1.47
--- src/common/lib/libutil/snprintb.c:1.46	Sun Apr  7 10:10:54 2024
+++ src/common/lib/libutil/snprintb.c	Sun Apr  7 12:05:23 2024
@@ -1,4 +1,4 @@
-/*	$NetBSD: snprintb.c,v 1.46 2024/04/07 10:10:54 rillig Exp $	*/
+/*	$NetBSD: snprintb.c,v 1.47 2024/04/07 12:05:23 rillig Exp $	*/
 
 /*-
  * Copyright (c) 2002, 2024 The NetBSD Foundation, Inc.
@@ -35,7 +35,7 @@
 
 #  include <sys/cdefs.h>
 #  if defined(LIBC_SCCS)
-__RCSID("$NetBSD: snprintb.c,v 1.46 2024/04/07 10:10:54 rillig Exp $");
+__RCSID("$NetBSD: snprintb.c,v 1.47 2024/04/07 12:05:23 rillig Exp $");
 #  endif
 
 #  include <sys/types.h>
@@ -46,7 +46,7 @@ __RCSID("$NetBSD: snprintb.c,v 1.46 2024
 #  include <errno.h>
 # else /* ! _KERNEL */
 #  include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: snprintb.c,v 1.46 2024/04/07 10:10:54 rillig Exp $");
+__KERNEL_RCSID(0, "$NetBSD: snprintb.c,v 1.47 2024/04/07 12:05:23 rillig Exp $");
 #  include <sys/param.h>
 #  include <sys/inttypes.h>
 #  include <sys/systm.h>
@@ -150,7 +150,8 @@ old_style(state *s)
 static int
 new_style(state *s)
 {
-	uint64_t field = s->val;
+	uint8_t field_kind = 0;	// 0 or 'f' or 'F'
+	uint64_t field = 0;	// valid if field_kind != '\0'
 	int matched = 1;
 	const char *prev_bitfmt = s->bitfmt;
 	while (*s->bitfmt != '\0') {
@@ -158,6 +159,7 @@ new_style(state *s)
 		uint8_t kind = cur_bitfmt[0];
 		switch (kind) {
 		case 'b':
+			field_kind = 0;
 			prev_bitfmt = cur_bitfmt;
 			uint8_t b_bit = cur_bitfmt[1];
 			if (b_bit >= 64)
@@ -174,6 +176,7 @@ new_style(state *s)
 			break;
 		case 'f':
 		case 'F':
+			field_kind = kind;
 			prev_bitfmt = cur_bitfmt;
 			matched = 0;
 			uint8_t f_lsb = cur_bitfmt[1];
@@ -200,6 +203,10 @@ new_style(state *s)
 		case '=':
 		case ':':
 			s->bitfmt += 2;
+			if (kind == '=' && field_kind != 'f')
+				return -1;
+			if (kind == ':' && field_kind != 'F')
+				return -1;
 			uint8_t cmp = cur_bitfmt[1];
 			if (cur_bitfmt[2] == '\0')
 				return -1;
@@ -213,6 +220,8 @@ new_style(state *s)
 			maybe_wrap_line(s, prev_bitfmt);
 			break;
 		case '*':
+			if (field_kind == 0)
+				return -1;
 			if (cur_bitfmt[1] == '\0')
 				return -1;
 			s->bitfmt++;

Index: src/lib/libutil/snprintb.3
diff -u src/lib/libutil/snprintb.3:1.36 src/lib/libutil/snprintb.3:1.37
--- src/lib/libutil/snprintb.3:1.36	Thu Feb 29 21:08:54 2024
+++ src/lib/libutil/snprintb.3	Sun Apr  7 12:05:23 2024
@@ -1,4 +1,4 @@
-.\"     $NetBSD: snprintb.3,v 1.36 2024/02/29 21:08:54 rillig Exp $
+.\"     $NetBSD: snprintb.3,v 1.37 2024/04/07 12:05:23 rillig Exp $
 .\"
 .\" Copyright (c) 1998, 2024 The NetBSD Foundation, Inc.
 .\" All rights reserved.
@@ -48,7 +48,6 @@ The
 .Fn snprintb
 function formats a bitmask into a mnemonic form suitable for printing.
 .Pp
-This conversion is useful for decoding bit fields in device registers.
 It formats the integer
 .Fa val
 into the buffer
@@ -103,13 +102,18 @@ character
 The decoding directive in
 .Fa fmt
 describes how the bitfield is to be interpreted and displayed.
-It follows two possible syntaxes, referred to as
+It follows two possible formats, referred to as
 .Dq old
 and
 .Dq new .
-The main advantage of the
+The
+.Dq old
+format is limited to describing single bits in a 32-bit value,
+the bit positions are 1-based.
+The
 .Dq new
-formatting is that it is capable of handling multi-bit fields.
+format supports multi-bit fields and 64-bit values,
+the bit positions are 0-based.
 .Pp
 If the first character of
 .Fa fmt
@@ -119,153 +123,149 @@ the remainder of the
 .Fa fmt
 argument follows the
 .Dq new
-syntax.
-The second character
-.Pq the first for the old format
-is a binary character representation of the
-output numeral base in which the bitfield will be printed before it is decoded.
-Recognized radix values
+format.
+.Pp
+The next character
+.Po the first for the
+.Dq old
+format
+.Pc
+specifies the numeral base in which to print the numbers in the output.
+The possible values
 .Pq in C escape-character format
 are
-.Ql \e10
-.Pq octal ,
-.Ql \e12
-.Pq decimal ,
-and
-.Ql \e20
-.Pq hexadecimal .
+.Ql \e010
+for octal,
+.Ql \e012
+for decimal, and
+.Ql \e020
+for hexadecimal.
 .Pp
 The remaining characters in the
 .Fa fmt
-argument are interpreted as a list of formatting directives.
+argument represent the formatting conversions,
+according to the
+.Dq old
+or
+.Dq new
+format.
 .
-.Ss Old Syntax
+.Ss Old Format
 .Pp
-The
+In the
 .Dq old
-format syntax is a series of bit-position\(endescription pairs.
-.Pp
-Each directive begins with a binary character value that represents
-the position of the bit being described.
-.Pp
-.Sy NB :
-the bit positions in the old syntax are
-.Em 1-based\^ !
-A bit position value of 1
-.Pq Ql \e1
-describes the least significant bit.
-Whereas a position value of 32
-.Po octal
-.Ql \e040 ,
-hexadecimal
-.Ql \ex20 ,
-the ASCII space character
-.Pc
-describes the most significant bit.
-The old syntax is limited to 32-bit values.
+format, each conversion specifies a bit position
+and a description that is printed if the corresponding bit is set.
 .Pp
-The remaining characters are the description to print should the bit
-being described be set.
-.Pp
-Descriptions are delimited by the next bit position value character
-encountered
-.Pq distinguishable by its value being \*[Le] 32 ,
+The bit position is a 1-based single-byte binary value,
+ranging from
+.Ql \e001
+(1) for the least significant bit up to
+.Ql \e040
+(32) for the most significant bit.
+.Pp
+The description is delimited by the next character whose value is 32 or less
+.Po see
+.Xr ascii 7
+.Pc ,
 or by the end of the format string itself.
 .
-.Ss New Syntax
+.Ss New Format
 .Pp
-For the
+In the
 .Dq new
-format syntax,
-a formatting directive begins with a field type
-followed by a binary field position and possibly a field length,
-followed by a description.
+format,
+each conversion begins with a conversion type,
+followed by type-specific parameters, each encoded as a single byte,
+followed by a
+.Tn NUL Ns -terminated description.
 The bit positions are 0-based,
-the least significant bit is bit-position zero.
-Each description is terminated by a
-.Tn NUL
-byte.
+they range from
+.Sq \e000
+for the least significant bit to
+.Sq \e077
+(63) for the most significant bit.
 .
 .Bl -tag -width Cm
 .
-.It Cm b\e Ns Ar B
-Describes a single bit at bit-position
-.Ar B .
-The remaining characters are the description to print should the bit
-being described be set.
-This field directive is similar in function to the old format.
-When converting old formats to the new syntax don't forget that the
-new syntax uses zero-based bit positions.
+.It Cm b Ar bit Ar descr
+Prints the description from
+.Ar descr
+if the bit at the position
+.Ar bit
+is set.
 .
-.It Cm f\e Ns Ar B Ns Cm \e Ns Ar L
-Describes a multi-bit field beginning at bit-position
-.Ar B
-and having a bit-length of
-.Ar L .
-The remaining characters are printed as a description of the field
-followed by
-.Ql \&=
-and the value of the field.
-The value of the field is printed in the base specified as the second
-character of the
-.Ar fmt
-argument.
+.It Cm f Ar lsb Ar width Ar descr
+Prints the description from
+.Ar descr ,
+a delimiting
+.Sq \&=
+and the numerical value of the multi-bit field
+whose least significant bit is at
+.Ar lsb
+and that spans
+.Ar width
+bits.
+To print individual values of the field, see the
+.Sq Cm \&=
+and
+.Sq Cm \&*
+conversions below.
 .
-.It Cm F\e Ns Ar B Ns Cm \e Ns Ar L
+.It Cm F Ar lsb Ar width Op Ar descr
 Describes a multi-bit field like
 .Sq Cm f ,
 but just extracts the value for use with the
-.Sq Cm \&=
-and
 .Sq Cm \&:
-formatting directives described below.
+and
+.Sq Cm \&*
+conversions below.
+The description from
+.Ar descr
+is ignored,
+it is only present for uniformity with the other conversions.
 .
-.It Cm \&=\e Ns Ar V
-The field previously extracted by the last
+.It Cm \&= Ar cmp Ar descr
+Compares the field value from the previous
 .Sq Cm f
-or
-.Sq Cm F
-directive is compared to the byte value
-.Ar V
-.Pq for values 0 through 255 .
-If they are equal,
+conversion to the single-byte value
+.Ar cmp ,
+which ranges from 0 to 255.
+If they are equal, prints
 .Ql \&=
-followed by the description string following
-.Ar V
-is printed.
-This and the
-.Sq Cm \&:
-directive may be repeated to annotate multiple possible values.
+followed by the description from
+.Ar descr .
+This conversion may be repeated.
 .
-.It Cm \&:\e Ns Ar V
-Operates like the
-.Sq Cm \&=
-directive, but omits the leading
-.Ql \&= .
+.It Cm \&: Ar cmp Ar descr
+Compares the field value from the previous
+.Sq Cm F
+conversion to the single-byte value
+.Ar cmp ,
+which ranges from 0 to 255.
+If they are equal, prints the description from
+.Ar descr .
+This conversion may be repeated.
 .
-.It Cm * Ns Ar FMT
-This provides a
-.Dq default
-case that prints the extracted field value using the
+.It Cm * Ar fmt
+If none of the previous
+.Sq Cm \&=
+or
+.Sq Cm \&:
+conversions matched, prints the field value, using the
 .Xr printf 3
 format
-.Ar FMT
-when other
-.Sq Cm \&:
-or
-.Sq Cm \&=
-directives have not matched.
-.Ar FMT
-may contain a
+.Ar fmt .
+The format string
+.Ar fmt
+may contain a single
 .Vt uintmax_t
-format specification that prints the value that
-did not match, since the field can be more than 32 bits wide.
+conversion specification that prints the value that did not match.
 .El
 .Pp
 The new format is terminated by an additional
 .Tn NUL
-character at the end, following that delimiting the last
-bit-position\(endescription pair.
+character at the end, following that delimiting the last conversion.
 This
 .Tn NUL
 is supplied by the compiler to terminate the string literal and
@@ -302,8 +302,7 @@ An example of the new formatting style:
 .Bd -literal -offset indent
 snprintb(buf, bufsize,
     "\e177\e020"
-    "b\e0LSB\e0" "b\e1BITONE\e0"
-    "f\e4\e4NIBBLE2\e0"
+    "b\e0LSB\e0" "b\e1BITONE\e0" "f\e4\e4NIBBLE2\e0"
     "f\ex10\e4BURST\e0" "=\e4FOUR\e0" "=\exf""FIFTEEN\e0"
     "b\ex1fMSB\e0",
     0x800f0701)
@@ -400,6 +399,9 @@ does not describe a supported numeral ba
 or a bit number in the
 .Ar fmt
 argument is out of bounds,
+or the sequence of conversions in the
+.Ar fmt
+argument is invalid,
 or
 .Fn snprintf
 failed.

Index: src/tests/lib/libutil/t_snprintb.c
diff -u src/tests/lib/libutil/t_snprintb.c:1.33 src/tests/lib/libutil/t_snprintb.c:1.34
--- src/tests/lib/libutil/t_snprintb.c:1.33	Sun Apr  7 10:10:54 2024
+++ src/tests/lib/libutil/t_snprintb.c	Sun Apr  7 12:05:23 2024
@@ -1,4 +1,4 @@
-/* $NetBSD: t_snprintb.c,v 1.33 2024/04/07 10:10:54 rillig Exp $ */
+/* $NetBSD: t_snprintb.c,v 1.34 2024/04/07 12:05:23 rillig Exp $ */
 
 /*
  * Copyright (c) 2002, 2004, 2008, 2010, 2024 The NetBSD Foundation, Inc.
@@ -32,7 +32,7 @@
 #include <sys/cdefs.h>
 __COPYRIGHT("@(#) Copyright (c) 2008, 2010, 2024\
  The NetBSD Foundation, inc. All rights reserved.");
-__RCSID("$NetBSD: t_snprintb.c,v 1.33 2024/04/07 10:10:54 rillig Exp $");
+__RCSID("$NetBSD: t_snprintb.c,v 1.34 2024/04/07 12:05:23 rillig Exp $");
 
 #include <stdio.h>
 #include <string.h>
@@ -123,6 +123,9 @@ check_snprintb_m(const char *file, size_
 #define	h_snprintb_error(bitfmt, want_buf)				\
 	h_snprintb_m_len(1024, bitfmt, 0x00, 0, -1, want_buf)
 
+#define	h_snprintb_val_error(bitfmt, val, want_buf)			\
+	h_snprintb_m_len(1024, bitfmt, val, 0, -1, want_buf)
+
 #define	h_snprintb_m(bitfmt, val, line_max, want_buf)			\
 	h_snprintb_m_len(1024, bitfmt, val, line_max,			\
 	    sizeof(want_buf) - 1, want_buf)
@@ -419,15 +422,13 @@ ATF_TC_BODY(snprintb, tc)
 	//
 	// The description of a 'b' conversion must not be empty, as the
 	// output would contain several commas in a row.
-	h_snprintb_len(
-	    1024,
+	h_snprintb_val_error(
 	    "\177\020"
 	    "b\000lsb\0"
 	    "b\001\0"
 	    "b\002\0"
 	    "b\007msb\0",
 	    0xff,
-	    -1,
 	    "0xff<lsb#");
 
 	// new style single bits, bit number too large
@@ -591,13 +592,11 @@ ATF_TC_BODY(snprintb, tc)
 	//
 	// The description of an 'f' conversion must not be empty, as the
 	// output would contain an isolated '='.
-	h_snprintb_len(
-	    1024,
+	h_snprintb_val_error(
 	    "\177\020"
 	    "f\000\004\0"
 		"=\001one\0",
 	    0x1,
-	    -1,
 	    "0x1#");
 
 	// new style bit-field, non-printable description
@@ -620,29 +619,25 @@ ATF_TC_BODY(snprintb, tc)
 	//
 	// The description of a '=' conversion must not be empty, as the
 	// output would contain several '=' in a row.
-	h_snprintb_len(
-	    1024,
+	h_snprintb_val_error(
 	    "\177\020"
 	    "f\000\004f\0"
 		"=\001one\0"
 		"=\001\0"
 		"=\001\0",
 	    0x1,
-	    -1,
 	    "0x1<f=0x1=one#");
 
 	// new style bit-field, 'F' followed by ':' with empty description
 	//
 	// The description of a ':' conversion must not be empty, as the
 	// output would contain empty angle brackets.
-	h_snprintb_len(
-	    1024,
+	h_snprintb_val_error(
 	    "\177\020"
 	    "F\000\004\0"
 		":\001\0"
 		"*default\0",
 	    0x1,
-	    -1,
 	    "0x1<#");
 
 	// new style bit-field, 'F', ':' with empty description, '*'
@@ -651,14 +646,12 @@ ATF_TC_BODY(snprintb, tc)
 	// output would contain empty angle brackets. Not in this particular
 	// test case, as the value is different, but the structural error is
 	// detected nevertheless.
-	h_snprintb_len(
-	    1024,
+	h_snprintb_val_error(
 	    "\177\020"
 	    "F\000\004\0"
 		":\001\0"
 		"*default\0",
 	    0x2,
-	    -1,
 	    "0x2<#");
 
 	// new style bit-field, 'f' with non-exhaustive '='
@@ -819,29 +812,24 @@ ATF_TC_BODY(snprintb, tc)
 	    "0xff<f=0xff=000000000000000000000000000255%>");
 
 	// new style unknown directive, at the beginning
-	h_snprintb_len(
-	    128,
+	h_snprintb_val_error(
 	    "\177\020"
 	    "unknown\0",
 	    0xff,
-	    -1,
 	    "0xff#");
 
 	// new style unknown directive, after a known directive
-	h_snprintb_len(
-	    128,
+	h_snprintb_val_error(
 	    "\177\020"
 	    "b\007msb\0"
 	    "unknown\0",
 	    0xff,
-	    -1,
 	    "0xff<msb#");
 
 	// new style combinations, 'b' '='
 	//
-	// A '=' directive without a preceding 'f' or 'F' directive generates
-	// misleading output outside the angle brackets, which is a mistake.
-	h_snprintb(
+	// A '=' conversion requires a preceding 'f' conversion.
+	h_snprintb_val_error(
 	    "\177\020"
 	    "b\004bit4\0"
 		"=\000clear\0"
@@ -852,14 +840,12 @@ ATF_TC_BODY(snprintb, tc)
 		"=\001set\0"
 		"=\245complete\0",
 	    0xa5,
-	    "0xa5=complete<bit0=complete>");
+	    "0xa5#");
 
 	// new style combinations, 'b' ':'
 	//
-	// A ':' directive without a preceding 'f' or 'F' directive generates
-	// misleading output outside or inside the angle brackets, which is a
-	// mistake.
-	h_snprintb(
+	// A ':' conversion requires a preceding 'f' or 'F' conversion.
+	h_snprintb_val_error(
 	    "\177\020"
 	    "b\004bit4\0"
 		":\000clear\0"
@@ -870,96 +856,93 @@ ATF_TC_BODY(snprintb, tc)
 		":\001set\0"
 		":\245complete\0",
 	    0xa5,
-	    "0xa5complete<bit0complete>");
+	    "0xa5#");
 
 	// new style combinations, 'b' '*'
 	//
-	// A '*' directive without a preceding 'f' or 'F' directive is ignored.
-	h_snprintb(
+	// A '*' conversion requires a preceding 'f' or 'F' conversion.
+	h_snprintb_val_error(
 	    "\177\020"
 	    "b\004bit4\0"
 		"*default(%ju)\0"
 	    "b\000bit0\0"
 		"*default(%ju)\0",
 	    0xa5,
-	    "0xa5<bit0>");
+	    "0xa5#");
 
 	// new style combinations, 'f' 'b' '='
 	//
-	// A 'b' directive that occurs between an 'f' and an '=' directive
-	// generates misleading output, which is a mistake.
-	h_snprintb(
+	// A '=' conversion requires a preceding 'f' conversion, there must
+	// not be a 'b' conversion in between.
+	h_snprintb_val_error(
 	    "\177\020"
 	    "f\000\010f\0"
 	    "b\005bit5\0"
 		"=\245match\0",
 	    0xa5,
-	    "0xa5<f=0xa5,bit5=match>");
+	    "0xa5<f=0xa5,bit5#");
 
-	// new style combinations, 'f' 'b' ':'
+	// new style combinations, 'F' 'b' ':'
 	//
-	// A 'b' directive that occurs between an 'f' and a ':' directive
-	// generates misleading output, which is a mistake.
-	h_snprintb(
+	// A ':' conversion requires a preceding 'F' conversion, there must
+	// not be a 'b' conversion in between.
+	//
+	// The isolated leading comma is produced by the non-exhaustive 'F'
+	// conversion. Detecting these at runtime would be too costly.
+	h_snprintb_val_error(
 	    "\177\020"
-	    "f\000\010f\0"
+	    "F\000\010f\0"
 	    "b\005bit5\0"
 		":\245match\0",
 	    0xa5,
-	    "0xa5<f=0xa5,bit5match>");
+	    "0xa5<,bit5#");
 
 	// new style combinations, 'f' ':'
 	//
-	// Combining the 'f' directive with the ':' directive produces the
-	// misleading output '0x1one', which is a mistake.
-	h_snprintb(
+	// The ':' conversion requires a preceding 'F' conversion, not 'f'.
+	h_snprintb_val_error(
 	    "\177\20"
 	    "f\000\004nibble\0"
 		":\001one\0",
 	    0x01,
-	    "0x1<nibble=0x1one>");
+	    "0x1<nibble=0x1#");
 
 	// new style combinations, 'F' '='
 	//
-	// Combining the 'F' and '=' directives outputs an isolated '=', which
-	// is a mistake.
-	h_snprintb(
+	// A '=' conversion requires a preceding 'f' conversion, not 'F'.
+	h_snprintb_val_error(
 	    "\177\20"
 	    "F\000\004\0"
 		"=\001one\0",
 	    0x01,
-	    "0x1<=one>");
+	    "0x1<#");
 
 	// new style combinations, '='
 	//
-	// A '=' directive without a preceding 'f' or 'F' directive generates
-	// output that doesn't match the standard '0xaa<description>' form,
-	// which is a mistake.
-	h_snprintb(
+	// A '=' conversion requires a preceding 'f' or 'F' conversion.
+	h_snprintb_val_error(
 	    "\177\020"
 		"=\245match\0",
 	    0xa5,
-	    "0xa5=match");
+	    "0xa5#");
 
 	// new style combinations, ':'
 	//
-	// A ':' directive without a preceding 'f' or 'F' directive generates
-	// misleading output, which is a mistake.
-	h_snprintb(
+	// A ':' conversion requires a preceding 'f' or 'F' conversion.
+	h_snprintb_val_error(
 	    "\177\020"
 		":\245match\0",
 	    0xa5,
-	    "0xa5match");
+	    "0xa5#");
 
 	// new style combinations, '*'
 	//
-	// A '*' directive without a preceding 'f' or 'F' is useless, which is
-	// a mistake.
-	h_snprintb(
+	// A '*' conversion requires a preceding 'f' or 'F' conversion.
+	h_snprintb_val_error(
 	    "\177\020"
 		"*match\0",
 	    0xa5,
-	    "0xa5");
+	    "0xa5#");
 
 	// new style combinations, 'f' '*' '='
 	//
@@ -1492,7 +1475,7 @@ ATF_TC_BODY(snprintb_m, tc)
 	h_snprintb_m(
 	    "\177\020"
 	    "f\000\004bits\0"
-		":\000zero\0",
+		"=\000zero\0",
 	    0xff,
 	    11,
 	    "0xff<bits=#\0");

Reply via email to