Module Name: src
Committed By: rillig
Date: Sun Apr 7 12:05:23 UTC 2024
Modified Files:
src/common/lib/libutil: snprintb.c
src/lib/libutil: snprintb.3
src/tests/lib/libutil: t_snprintb.c
Log Message:
snprintb: reject combinations of 'f' with ':' as well as 'F' with '='
These combinations would lead to garbled output.
To generate a diff of this commit:
cvs rdiff -u -r1.46 -r1.47 src/common/lib/libutil/snprintb.c
cvs rdiff -u -r1.36 -r1.37 src/lib/libutil/snprintb.3
cvs rdiff -u -r1.33 -r1.34 src/tests/lib/libutil/t_snprintb.c
Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.
Modified files:
Index: src/common/lib/libutil/snprintb.c
diff -u src/common/lib/libutil/snprintb.c:1.46 src/common/lib/libutil/snprintb.c:1.47
--- src/common/lib/libutil/snprintb.c:1.46 Sun Apr 7 10:10:54 2024
+++ src/common/lib/libutil/snprintb.c Sun Apr 7 12:05:23 2024
@@ -1,4 +1,4 @@
-/* $NetBSD: snprintb.c,v 1.46 2024/04/07 10:10:54 rillig Exp $ */
+/* $NetBSD: snprintb.c,v 1.47 2024/04/07 12:05:23 rillig Exp $ */
/*-
* Copyright (c) 2002, 2024 The NetBSD Foundation, Inc.
@@ -35,7 +35,7 @@
# include <sys/cdefs.h>
# if defined(LIBC_SCCS)
-__RCSID("$NetBSD: snprintb.c,v 1.46 2024/04/07 10:10:54 rillig Exp $");
+__RCSID("$NetBSD: snprintb.c,v 1.47 2024/04/07 12:05:23 rillig Exp $");
# endif
# include <sys/types.h>
@@ -46,7 +46,7 @@ __RCSID("$NetBSD: snprintb.c,v 1.46 2024
# include <errno.h>
# else /* ! _KERNEL */
# include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: snprintb.c,v 1.46 2024/04/07 10:10:54 rillig Exp $");
+__KERNEL_RCSID(0, "$NetBSD: snprintb.c,v 1.47 2024/04/07 12:05:23 rillig Exp $");
# include <sys/param.h>
# include <sys/inttypes.h>
# include <sys/systm.h>
@@ -150,7 +150,8 @@ old_style(state *s)
static int
new_style(state *s)
{
- uint64_t field = s->val;
+ uint8_t field_kind = 0; // 0 or 'f' or 'F'
+ uint64_t field = 0; // valid if field_kind != '\0'
int matched = 1;
const char *prev_bitfmt = s->bitfmt;
while (*s->bitfmt != '\0') {
@@ -158,6 +159,7 @@ new_style(state *s)
uint8_t kind = cur_bitfmt[0];
switch (kind) {
case 'b':
+ field_kind = 0;
prev_bitfmt = cur_bitfmt;
uint8_t b_bit = cur_bitfmt[1];
if (b_bit >= 64)
@@ -174,6 +176,7 @@ new_style(state *s)
break;
case 'f':
case 'F':
+ field_kind = kind;
prev_bitfmt = cur_bitfmt;
matched = 0;
uint8_t f_lsb = cur_bitfmt[1];
@@ -200,6 +203,10 @@ new_style(state *s)
case '=':
case ':':
s->bitfmt += 2;
+ if (kind == '=' && field_kind != 'f')
+ return -1;
+ if (kind == ':' && field_kind != 'F')
+ return -1;
uint8_t cmp = cur_bitfmt[1];
if (cur_bitfmt[2] == '\0')
return -1;
@@ -213,6 +220,8 @@ new_style(state *s)
maybe_wrap_line(s, prev_bitfmt);
break;
case '*':
+ if (field_kind == 0)
+ return -1;
if (cur_bitfmt[1] == '\0')
return -1;
s->bitfmt++;
Index: src/lib/libutil/snprintb.3
diff -u src/lib/libutil/snprintb.3:1.36 src/lib/libutil/snprintb.3:1.37
--- src/lib/libutil/snprintb.3:1.36 Thu Feb 29 21:08:54 2024
+++ src/lib/libutil/snprintb.3 Sun Apr 7 12:05:23 2024
@@ -1,4 +1,4 @@
-.\" $NetBSD: snprintb.3,v 1.36 2024/02/29 21:08:54 rillig Exp $
+.\" $NetBSD: snprintb.3,v 1.37 2024/04/07 12:05:23 rillig Exp $
.\"
.\" Copyright (c) 1998, 2024 The NetBSD Foundation, Inc.
.\" All rights reserved.
@@ -48,7 +48,6 @@ The
.Fn snprintb
function formats a bitmask into a mnemonic form suitable for printing.
.Pp
-This conversion is useful for decoding bit fields in device registers.
It formats the integer
.Fa val
into the buffer
@@ -103,13 +102,18 @@ character
The decoding directive in
.Fa fmt
describes how the bitfield is to be interpreted and displayed.
-It follows two possible syntaxes, referred to as
+It follows two possible formats, referred to as
.Dq old
and
.Dq new .
-The main advantage of the
+The
+.Dq old
+format is limited to describing single bits in a 32-bit value,
+the bit positions are 1-based.
+The
.Dq new
-formatting is that it is capable of handling multi-bit fields.
+format supports multi-bit fields and 64-bit values,
+the bit positions are 0-based.
.Pp
If the first character of
.Fa fmt
@@ -119,153 +123,149 @@ the remainder of the
.Fa fmt
argument follows the
.Dq new
-syntax.
-The second character
-.Pq the first for the old format
-is a binary character representation of the
-output numeral base in which the bitfield will be printed before it is decoded.
-Recognized radix values
+format.
+.Pp
+The next character
+.Po the first for the
+.Dq old
+format
+.Pc
+specifies the numeral base in which to print the numbers in the output.
+The possible values
.Pq in C escape-character format
are
-.Ql \e10
-.Pq octal ,
-.Ql \e12
-.Pq decimal ,
-and
-.Ql \e20
-.Pq hexadecimal .
+.Ql \e010
+for octal,
+.Ql \e012
+for decimal, and
+.Ql \e020
+for hexadecimal.
.Pp
The remaining characters in the
.Fa fmt
-argument are interpreted as a list of formatting directives.
+argument represent the formatting conversions,
+according to the
+.Dq old
+or
+.Dq new
+format.
.
-.Ss Old Syntax
+.Ss Old Format
.Pp
-The
+In the
.Dq old
-format syntax is a series of bit-position\(endescription pairs.
-.Pp
-Each directive begins with a binary character value that represents
-the position of the bit being described.
-.Pp
-.Sy NB :
-the bit positions in the old syntax are
-.Em 1-based\^ !
-A bit position value of 1
-.Pq Ql \e1
-describes the least significant bit.
-Whereas a position value of 32
-.Po octal
-.Ql \e040 ,
-hexadecimal
-.Ql \ex20 ,
-the ASCII space character
-.Pc
-describes the most significant bit.
-The old syntax is limited to 32-bit values.
+format, each conversion specifies a bit position
+and a description that is printed if the corresponding bit is set.
.Pp
-The remaining characters are the description to print should the bit
-being described be set.
-.Pp
-Descriptions are delimited by the next bit position value character
-encountered
-.Pq distinguishable by its value being \*[Le] 32 ,
+The bit position is a 1-based single-byte binary value,
+ranging from
+.Ql \e001
+(1) for the least significant bit up to
+.Ql \e040
+(32) for the most significant bit.
+.Pp
+The description is delimited by the next character whose value is 32 or less
+.Po see
+.Xr ascii 7
+.Pc ,
or by the end of the format string itself.
.
-.Ss New Syntax
+.Ss New Format
.Pp
-For the
+In the
.Dq new
-format syntax,
-a formatting directive begins with a field type
-followed by a binary field position and possibly a field length,
-followed by a description.
+format,
+each conversion begins with a conversion type,
+followed by type-specific parameters, each encoded as a single byte,
+followed by a
+.Tn NUL Ns -terminated description.
The bit positions are 0-based,
-the least significant bit is bit-position zero.
-Each description is terminated by a
-.Tn NUL
-byte.
+they range from
+.Sq \e000
+for the least significant bit to
+.Sq \e077
+(63) for the most significant bit.
.
.Bl -tag -width Cm
.
-.It Cm b\e Ns Ar B
-Describes a single bit at bit-position
-.Ar B .
-The remaining characters are the description to print should the bit
-being described be set.
-This field directive is similar in function to the old format.
-When converting old formats to the new syntax don't forget that the
-new syntax uses zero-based bit positions.
+.It Cm b Ar bit Ar descr
+Prints the description from
+.Ar descr
+if the bit at the position
+.Ar bit
+is set.
.
-.It Cm f\e Ns Ar B Ns Cm \e Ns Ar L
-Describes a multi-bit field beginning at bit-position
-.Ar B
-and having a bit-length of
-.Ar L .
-The remaining characters are printed as a description of the field
-followed by
-.Ql \&=
-and the value of the field.
-The value of the field is printed in the base specified as the second
-character of the
-.Ar fmt
-argument.
+.It Cm f Ar lsb Ar width Ar descr
+Prints the description from
+.Ar descr ,
+a delimiting
+.Sq \&=
+and the numerical value of the multi-bit field
+whose least significant bit is at
+.Ar lsb
+and that spans
+.Ar width
+bits.
+To print individual values of the field, see the
+.Sq Cm \&=
+and
+.Sq Cm \&*
+conversions below.
.
-.It Cm F\e Ns Ar B Ns Cm \e Ns Ar L
+.It Cm F Ar lsb Ar width Op Ar descr
Describes a multi-bit field like
.Sq Cm f ,
but just extracts the value for use with the
-.Sq Cm \&=
-and
.Sq Cm \&:
-formatting directives described below.
+and
+.Sq Cm \&*
+conversions below.
+The description from
+.Ar descr
+is ignored,
+it is only present for uniformity with the other conversions.
.
-.It Cm \&=\e Ns Ar V
-The field previously extracted by the last
+.It Cm \&= Ar cmp Ar descr
+Compares the field value from the previous
.Sq Cm f
-or
-.Sq Cm F
-directive is compared to the byte value
-.Ar V
-.Pq for values 0 through 255 .
-If they are equal,
+conversion to the single-byte value
+.Ar cmp ,
+which ranges from 0 to 255.
+If they are equal, prints
.Ql \&=
-followed by the description string following
-.Ar V
-is printed.
-This and the
-.Sq Cm \&:
-directive may be repeated to annotate multiple possible values.
+followed by the description from
+.Ar descr .
+This conversion may be repeated.
.
-.It Cm \&:\e Ns Ar V
-Operates like the
-.Sq Cm \&=
-directive, but omits the leading
-.Ql \&= .
+.It Cm \&: Ar cmp Ar descr
+Compares the field value from the previous
+.Sq Cm F
+conversion to the single-byte value
+.Ar cmp ,
+which ranges from 0 to 255.
+If they are equal, prints the description from
+.Ar descr .
+This conversion may be repeated.
.
-.It Cm * Ns Ar FMT
-This provides a
-.Dq default
-case that prints the extracted field value using the
+.It Cm * Ar fmt
+If none of the previous
+.Sq Cm \&=
+or
+.Sq Cm \&:
+conversions matched, prints the field value, using the
.Xr printf 3
format
-.Ar FMT
-when other
-.Sq Cm \&:
-or
-.Sq Cm \&=
-directives have not matched.
-.Ar FMT
-may contain a
+.Ar fmt .
+The format string
+.Ar fmt
+may contain a single
.Vt uintmax_t
-format specification that prints the value that
-did not match, since the field can be more than 32 bits wide.
+conversion specification that prints the value that did not match.
.El
.Pp
The new format is terminated by an additional
.Tn NUL
-character at the end, following that delimiting the last
-bit-position\(endescription pair.
+character at the end, following that delimiting the last conversion.
This
.Tn NUL
is supplied by the compiler to terminate the string literal and
@@ -302,8 +302,7 @@ An example of the new formatting style:
.Bd -literal -offset indent
snprintb(buf, bufsize,
"\e177\e020"
- "b\e0LSB\e0" "b\e1BITONE\e0"
- "f\e4\e4NIBBLE2\e0"
+ "b\e0LSB\e0" "b\e1BITONE\e0" "f\e4\e4NIBBLE2\e0"
"f\ex10\e4BURST\e0" "=\e4FOUR\e0" "=\exf""FIFTEEN\e0"
"b\ex1fMSB\e0",
0x800f0701)
@@ -400,6 +399,9 @@ does not describe a supported numeral ba
or a bit number in the
.Ar fmt
argument is out of bounds,
+or the sequence of conversions in the
+.Ar fmt
+argument is invalid,
or
.Fn snprintf
failed.
Index: src/tests/lib/libutil/t_snprintb.c
diff -u src/tests/lib/libutil/t_snprintb.c:1.33 src/tests/lib/libutil/t_snprintb.c:1.34
--- src/tests/lib/libutil/t_snprintb.c:1.33 Sun Apr 7 10:10:54 2024
+++ src/tests/lib/libutil/t_snprintb.c Sun Apr 7 12:05:23 2024
@@ -1,4 +1,4 @@
-/* $NetBSD: t_snprintb.c,v 1.33 2024/04/07 10:10:54 rillig Exp $ */
+/* $NetBSD: t_snprintb.c,v 1.34 2024/04/07 12:05:23 rillig Exp $ */
/*
* Copyright (c) 2002, 2004, 2008, 2010, 2024 The NetBSD Foundation, Inc.
@@ -32,7 +32,7 @@
#include <sys/cdefs.h>
__COPYRIGHT("@(#) Copyright (c) 2008, 2010, 2024\
The NetBSD Foundation, inc. All rights reserved.");
-__RCSID("$NetBSD: t_snprintb.c,v 1.33 2024/04/07 10:10:54 rillig Exp $");
+__RCSID("$NetBSD: t_snprintb.c,v 1.34 2024/04/07 12:05:23 rillig Exp $");
#include <stdio.h>
#include <string.h>
@@ -123,6 +123,9 @@ check_snprintb_m(const char *file, size_
#define h_snprintb_error(bitfmt, want_buf) \
h_snprintb_m_len(1024, bitfmt, 0x00, 0, -1, want_buf)
+#define h_snprintb_val_error(bitfmt, val, want_buf) \
+ h_snprintb_m_len(1024, bitfmt, val, 0, -1, want_buf)
+
#define h_snprintb_m(bitfmt, val, line_max, want_buf) \
h_snprintb_m_len(1024, bitfmt, val, line_max, \
sizeof(want_buf) - 1, want_buf)
@@ -419,15 +422,13 @@ ATF_TC_BODY(snprintb, tc)
//
// The description of a 'b' conversion must not be empty, as the
// output would contain several commas in a row.
- h_snprintb_len(
- 1024,
+ h_snprintb_val_error(
"\177\020"
"b\000lsb\0"
"b\001\0"
"b\002\0"
"b\007msb\0",
0xff,
- -1,
"0xff<lsb#");
// new style single bits, bit number too large
@@ -591,13 +592,11 @@ ATF_TC_BODY(snprintb, tc)
//
// The description of an 'f' conversion must not be empty, as the
// output would contain an isolated '='.
- h_snprintb_len(
- 1024,
+ h_snprintb_val_error(
"\177\020"
"f\000\004\0"
"=\001one\0",
0x1,
- -1,
"0x1#");
// new style bit-field, non-printable description
@@ -620,29 +619,25 @@ ATF_TC_BODY(snprintb, tc)
//
// The description of a '=' conversion must not be empty, as the
// output would contain several '=' in a row.
- h_snprintb_len(
- 1024,
+ h_snprintb_val_error(
"\177\020"
"f\000\004f\0"
"=\001one\0"
"=\001\0"
"=\001\0",
0x1,
- -1,
"0x1<f=0x1=one#");
// new style bit-field, 'F' followed by ':' with empty description
//
// The description of a ':' conversion must not be empty, as the
// output would contain empty angle brackets.
- h_snprintb_len(
- 1024,
+ h_snprintb_val_error(
"\177\020"
"F\000\004\0"
":\001\0"
"*default\0",
0x1,
- -1,
"0x1<#");
// new style bit-field, 'F', ':' with empty description, '*'
@@ -651,14 +646,12 @@ ATF_TC_BODY(snprintb, tc)
// output would contain empty angle brackets. Not in this particular
// test case, as the value is different, but the structural error is
// detected nevertheless.
- h_snprintb_len(
- 1024,
+ h_snprintb_val_error(
"\177\020"
"F\000\004\0"
":\001\0"
"*default\0",
0x2,
- -1,
"0x2<#");
// new style bit-field, 'f' with non-exhaustive '='
@@ -819,29 +812,24 @@ ATF_TC_BODY(snprintb, tc)
"0xff<f=0xff=000000000000000000000000000255%>");
// new style unknown directive, at the beginning
- h_snprintb_len(
- 128,
+ h_snprintb_val_error(
"\177\020"
"unknown\0",
0xff,
- -1,
"0xff#");
// new style unknown directive, after a known directive
- h_snprintb_len(
- 128,
+ h_snprintb_val_error(
"\177\020"
"b\007msb\0"
"unknown\0",
0xff,
- -1,
"0xff<msb#");
// new style combinations, 'b' '='
//
- // A '=' directive without a preceding 'f' or 'F' directive generates
- // misleading output outside the angle brackets, which is a mistake.
- h_snprintb(
+ // A '=' conversion requires a preceding 'f' conversion.
+ h_snprintb_val_error(
"\177\020"
"b\004bit4\0"
"=\000clear\0"
@@ -852,14 +840,12 @@ ATF_TC_BODY(snprintb, tc)
"=\001set\0"
"=\245complete\0",
0xa5,
- "0xa5=complete<bit0=complete>");
+ "0xa5#");
// new style combinations, 'b' ':'
//
- // A ':' directive without a preceding 'f' or 'F' directive generates
- // misleading output outside or inside the angle brackets, which is a
- // mistake.
- h_snprintb(
+ // A ':' conversion requires a preceding 'f' or 'F' conversion.
+ h_snprintb_val_error(
"\177\020"
"b\004bit4\0"
":\000clear\0"
@@ -870,96 +856,93 @@ ATF_TC_BODY(snprintb, tc)
":\001set\0"
":\245complete\0",
0xa5,
- "0xa5complete<bit0complete>");
+ "0xa5#");
// new style combinations, 'b' '*'
//
- // A '*' directive without a preceding 'f' or 'F' directive is ignored.
- h_snprintb(
+ // A '*' conversion requires a preceding 'f' or 'F' conversion.
+ h_snprintb_val_error(
"\177\020"
"b\004bit4\0"
"*default(%ju)\0"
"b\000bit0\0"
"*default(%ju)\0",
0xa5,
- "0xa5<bit0>");
+ "0xa5#");
// new style combinations, 'f' 'b' '='
//
- // A 'b' directive that occurs between an 'f' and an '=' directive
- // generates misleading output, which is a mistake.
- h_snprintb(
+ // A '=' conversion requires a preceding 'f' conversion, there must
+ // not be a 'b' conversion in between.
+ h_snprintb_val_error(
"\177\020"
"f\000\010f\0"
"b\005bit5\0"
"=\245match\0",
0xa5,
- "0xa5<f=0xa5,bit5=match>");
+ "0xa5<f=0xa5,bit5#");
- // new style combinations, 'f' 'b' ':'
+ // new style combinations, 'F' 'b' ':'
//
- // A 'b' directive that occurs between an 'f' and a ':' directive
- // generates misleading output, which is a mistake.
- h_snprintb(
+ // A ':' conversion requires a preceding 'F' conversion, there must
+ // not be a 'b' conversion in between.
+ //
+ // The isolated leading comma is produced by the non-exhaustive 'F'
+ // conversion. Detecting these at runtime would be too costly.
+ h_snprintb_val_error(
"\177\020"
- "f\000\010f\0"
+ "F\000\010f\0"
"b\005bit5\0"
":\245match\0",
0xa5,
- "0xa5<f=0xa5,bit5match>");
+ "0xa5<,bit5#");
// new style combinations, 'f' ':'
//
- // Combining the 'f' directive with the ':' directive produces the
- // misleading output '0x1one', which is a mistake.
- h_snprintb(
+ // The ':' conversion requires a preceding 'F' conversion, not 'f'.
+ h_snprintb_val_error(
"\177\20"
"f\000\004nibble\0"
":\001one\0",
0x01,
- "0x1<nibble=0x1one>");
+ "0x1<nibble=0x1#");
// new style combinations, 'F' '='
//
- // Combining the 'F' and '=' directives outputs an isolated '=', which
- // is a mistake.
- h_snprintb(
+ // A '=' conversion requires a preceding 'f' conversion, not 'F'.
+ h_snprintb_val_error(
"\177\20"
"F\000\004\0"
"=\001one\0",
0x01,
- "0x1<=one>");
+ "0x1<#");
// new style combinations, '='
//
- // A '=' directive without a preceding 'f' or 'F' directive generates
- // output that doesn't match the standard '0xaa<description>' form,
- // which is a mistake.
- h_snprintb(
+ // A '=' conversion requires a preceding 'f' or 'F' conversion.
+ h_snprintb_val_error(
"\177\020"
"=\245match\0",
0xa5,
- "0xa5=match");
+ "0xa5#");
// new style combinations, ':'
//
- // A ':' directive without a preceding 'f' or 'F' directive generates
- // misleading output, which is a mistake.
- h_snprintb(
+ // A ':' conversion requires a preceding 'f' or 'F' conversion.
+ h_snprintb_val_error(
"\177\020"
":\245match\0",
0xa5,
- "0xa5match");
+ "0xa5#");
// new style combinations, '*'
//
- // A '*' directive without a preceding 'f' or 'F' is useless, which is
- // a mistake.
- h_snprintb(
+ // A '*' conversion requires a preceding 'f' or 'F' conversion.
+ h_snprintb_val_error(
"\177\020"
"*match\0",
0xa5,
- "0xa5");
+ "0xa5#");
// new style combinations, 'f' '*' '='
//
@@ -1492,7 +1475,7 @@ ATF_TC_BODY(snprintb_m, tc)
h_snprintb_m(
"\177\020"
"f\000\004bits\0"
- ":\000zero\0",
+ "=\000zero\0",
0xff,
11,
"0xff<bits=#\0");