Hi all,
This patch adds a new feature to the ``printf`` family of functions:
``%B`` conversion specifier for printing unsigned numbers in binary.
Behaviour is exactly as with ``%X``, only changing the base (16 -> 2).
``%b`` is already in use by some ``printf(1)`` implementations, so I
didn't use it for binary. Anyway, binary doesn't have letters, so only
the ``0b``/``0B`` specifier would change.
I also documented the new specifier in the man pages.
Disclaimer: I couldn't test it myself, so test it before applying it.
I also sent today a patch to add this specifier to glibc. They are
concerned about adding a new non-standard specifier, but if more C libs
are going to add it at the same time, it may become a thing.
Alex.
From 1a41d44571ccaf9ffaf36b2c2b96dd34e48eb5b7 Mon Sep 17 00:00:00 2001
From: Alejandro Colomar <colomar.6....@gmail.com>
Date: Mon, 27 Apr 2020 19:15:55 +0200
Subject: [PATCH 1/2] printf: Add %B conversion specifier for printing binary
---
lib/libc/stdio/vfprintf.c | 28 ++++++++++++++++++++--------
lib/libc/stdio/vfwprintf.c | 28 ++++++++++++++++++++--------
2 files changed, 40 insertions(+), 16 deletions(-)
diff --git a/lib/libc/stdio/vfprintf.c b/lib/libc/stdio/vfprintf.c
index 1d451a84f66..1e5cd3ad89b 100644
--- a/lib/libc/stdio/vfprintf.c
+++ b/lib/libc/stdio/vfprintf.c
@@ -310,9 +310,9 @@ __vfprintf(FILE *fp, const char *fmt0, __va_list ap)
char *dtoaresult = NULL;
#endif
- uintmax_t _umax; /* integer arguments %[diouxX] */
- enum { OCT, DEC, HEX } base; /* base for %[diouxX] conversion */
- int dprec; /* a copy of prec if %[diouxX], 0 otherwise */
+ uintmax_t _umax; /* integer arguments %[BdiouxX] */
+ enum { BIN, OCT, DEC, HEX } base; /* base for %[BdiouxX] conversion */
+ int dprec; /* a copy of prec if %[BdiouxX], 0 otherwise */
int realsz; /* field size expanded by dprec */
int size; /* size of converted field or string */
const char *xdigs; /* digits for %[xX] conversion */
@@ -320,7 +320,7 @@ __vfprintf(FILE *fp, const char *fmt0, __va_list ap)
struct __suio uio; /* output information: summary */
struct __siov iov[NIOV];/* ... and individual io vectors */
char buf[BUF]; /* buffer with space for digits of uintmax_t */
- char ox[2]; /* space for 0x; ox[1] is either x, X, or \0 */
+ char ox[2]; /* space for 0x; ox[1] is either x, X,B or \0 */
union arg *argtable; /* args, built due to positional arg */
union arg statargtable[STATIC_ARG_TBL_SIZE];
size_t argtablesiz;
@@ -891,6 +891,10 @@ fp_common:
_umax = UARG();
base = DEC;
goto nosign;
+ case 'B':
+ _umax = UARG();
+ base = BIN;
+ goto bin;
case 'X':
xdigs = xdigs_upper;
goto hex;
@@ -898,8 +902,8 @@ fp_common:
xdigs = xdigs_lower;
hex: _umax = UARG();
base = HEX;
- /* leading 0x/X only if non-zero */
- if (flags & ALT && _umax != 0)
+ /* leading 0x/X/B only if non-zero */
+bin: if (flags & ALT && _umax != 0)
ox[1] = ch;
/* unsigned conversions */
@@ -925,6 +929,13 @@ number: if ((dprec = prec) >= 0)
* a variable; hence this switch.
*/
switch (base) {
+ case BIN:
+ do {
+ *--cp = to_char(_umax & 1);
+ _umax >>= 1;
+ } while (_umax);
+ break;
+
case OCT:
do {
*--cp = to_char(_umax & 7);
@@ -980,7 +991,7 @@ number: if ((dprec = prec) >= 0)
* first be prefixed by any sign or other prefix; otherwise,
* it should be blank padded before the prefix is emitted.
* After any left-hand padding and prefixing, emit zeroes
- * required by a decimal %[diouxX] precision, then print the
+ * required by a decimal %[BdiouxX] precision, then print the
* string proper, then emit zeroes required by any leftover
* floating precision; finally, if LADJUST, pad with blanks.
*
@@ -1000,7 +1011,7 @@ number: if ((dprec = prec) >= 0)
/* prefix */
if (sign)
PRINT(&sign, 1);
- if (ox[1]) { /* ox[1] is either x, X, or \0 */
+ if (ox[1]) { /* ox[1] is either x, X, B, or \0 */
ox[0] = '0';
PRINT(ox, 2);
}
@@ -1349,6 +1360,7 @@ reswitch: switch (ch) {
flags |= LONGINT;
/*FALLTHROUGH*/
case 'u':
+ case 'B':
case 'X':
case 'x':
ADDUARG();
diff --git a/lib/libc/stdio/vfwprintf.c b/lib/libc/stdio/vfwprintf.c
index e28901508fa..8c6227ee76b 100644
--- a/lib/libc/stdio/vfwprintf.c
+++ b/lib/libc/stdio/vfwprintf.c
@@ -319,14 +319,14 @@ __vfwprintf(FILE * __restrict fp, const wchar_t *
__restrict fmt0, __va_list ap)
char *dtoaresult = NULL;
#endif
- uintmax_t _umax; /* integer arguments %[diouxX] */
- enum { OCT, DEC, HEX } base; /* base for %[diouxX] conversion */
- int dprec; /* a copy of prec if %[diouxX], 0 otherwise */
+ uintmax_t _umax; /* integer arguments %[BdiouxX] */
+ enum { BIN, OCT, DEC, HEX } base; /* base for %[BdiouxX] conversion */
+ int dprec; /* a copy of prec if %[BdiouxX], 0 otherwise */
int realsz; /* field size expanded by dprec */
int size; /* size of converted field or string */
const char *xdigs; /* digits for %[xX] conversion */
wchar_t buf[BUF]; /* buffer with space for digits of uintmax_t */
- wchar_t ox[2]; /* space for 0x; ox[1] is either x, X, or \0 */
+ wchar_t ox[2]; /* space for 0x; ox[1] is either x, X,B or \0 */
union arg *argtable; /* args, built due to positional arg */
union arg statargtable[STATIC_ARG_TBL_SIZE];
size_t argtablesiz;
@@ -882,6 +882,10 @@ fp_common:
_umax = UARG();
base = DEC;
goto nosign;
+ case 'B':
+ _umax = UARG();
+ base = BIN;
+ goto bin;
case 'X':
xdigs = xdigs_upper;
goto hex;
@@ -889,8 +893,8 @@ fp_common:
xdigs = xdigs_lower;
hex: _umax = UARG();
base = HEX;
- /* leading 0x/X only if non-zero */
- if (flags & ALT && _umax != 0)
+ /* leading 0x/X/B only if non-zero */
+bin: if (flags & ALT && _umax != 0)
ox[1] = ch;
/* unsigned conversions */
@@ -916,6 +920,13 @@ number: if ((dprec = prec) >= 0)
* a variable; hence this switch.
*/
switch (base) {
+ case BIN:
+ do {
+ *--cp = to_char(_umax & 1);
+ _umax >>= 1;
+ } while (_umax);
+ break;
+
case OCT:
do {
*--cp = to_char(_umax & 7);
@@ -971,7 +982,7 @@ number: if ((dprec = prec) >= 0)
* first be prefixed by any sign or other prefix; otherwise,
* it should be blank padded before the prefix is emitted.
* After any left-hand padding and prefixing, emit zeroes
- * required by a decimal %[diouxX] precision, then print the
+ * required by a decimal %[BdiouxX] precision, then print the
* string proper, then emit zeroes required by any leftover
* floating precision; finally, if LADJUST, pad with blanks.
*
@@ -991,7 +1002,7 @@ number: if ((dprec = prec) >= 0)
/* prefix */
if (sign)
PRINT(&sign, 1);
- if (ox[1]) { /* ox[1] is either x, X, or \0 */
+ if (ox[1]) { /* ox[1] is either x, X, B, or \0 */
ox[0] = '0';
PRINT(ox, 2);
}
@@ -1348,6 +1359,7 @@ reswitch: switch (ch) {
flags |= LONGINT;
/*FALLTHROUGH*/
case 'u':
+ case 'B':
case 'X':
case 'x':
ADDUARG();
--
2.26.2
From d0926562ff31e2f340136fc42947fba3481a0fdd Mon Sep 17 00:00:00 2001
From: Alejandro Colomar <colomar.6....@gmail.com>
Date: Mon, 27 Apr 2020 20:02:31 +0200
Subject: [PATCH 2/2] printf: Document in printf.3 and wprintf.3 the new %B
conversion specifier
---
lib/libc/stdio/printf.3 | 28 ++++++++++++++++++++--------
lib/libc/stdio/wprintf.3 | 30 ++++++++++++++++++++----------
2 files changed, 40 insertions(+), 18 deletions(-)
diff --git a/lib/libc/stdio/printf.3 b/lib/libc/stdio/printf.3
index 82e124a1a58..8e34d1cae2b 100644
--- a/lib/libc/stdio/printf.3
+++ b/lib/libc/stdio/printf.3
@@ -190,15 +190,19 @@ conversions, the precision of the number is
increased to force the first
character of the output string to a zero (except if a zero value is
printed
with an explicit precision of zero).
For
-.Cm x
+.Cm x , X
and
-.Cm X
+.Cm B
conversions, a non-zero result has the string
.Ql 0x
(or
.Ql 0X
for
.Cm X
+conversions, or
+.Ql 0B
+for
+.Cm B
conversions) prepended to it.
For
.Cm a ,
@@ -228,7 +232,8 @@ For all conversions except
.Cm n ,
the converted value is padded on the left with zeros rather than blanks.
If a precision is given with a numeric conversion
-.Pf ( Cm d ,
+.Pf ( Cm B ,
+.Cm d ,
.Cm i ,
.Cm o ,
.Cm u ,
@@ -287,6 +292,7 @@ followed by an
optional digit string.
If the digit string is omitted, the precision is taken as zero.
This gives the minimum number of digits to appear for
+.Cm B ,
.Cm d ,
.Cm i ,
.Cm o ,
@@ -313,13 +319,14 @@ conversions.
.It
An optional length modifier, that specifies the size of the argument.
The following length modifiers are valid for the
+.Cm B ,
.Cm d , i , n ,
.Cm o , u , x ,
or
.Cm X
conversions:
.Bl -column "(deprecated)" "signed char" "unsigned long long" "long
long *"
-.It Sy Modifier Ta Sy "d, i" Ta Sy "o, u, x, X" Ta Sy n
+.It Sy Modifier Ta Sy "d, i" Ta Sy "B, o, u, x, X" Ta Sy n
.It hh Ta "signed char" Ta "unsigned char" Ta "signed char *"
.It h Ta short Ta "unsigned short" Ta "short *"
.It "l (ell)" Ta long Ta "unsigned long" Ta "long *"
@@ -334,7 +341,7 @@ Note:
the
.Cm t
modifier, when applied to an
-.Cm o , u , x ,
+.Cm B , o , u , x ,
or
.Cm X
conversion, indicates that the argument is of an unsigned type
@@ -402,11 +409,13 @@ If a single format directive mixes positional
(nn$) and
non-positional arguments, the results are undefined.
.Pp
The conversion specifiers and their meanings are:
-.Bl -tag -width "diouxX"
-.It Cm diouxX
+.Bl -tag -width "BdiouxX"
+.It Cm BdiouxX
The
.Li int
-(or appropriate variant) argument is converted to signed decimal
+(or appropriate variant) argument is converted to unsigned binary
+.Pq Cm B ,
+signed decimal
.Pf ( Cm d
and
.Cm i ) ,
@@ -830,6 +839,9 @@ and
.Cm \&%U
are not standard and
are provided only for backward compatibility.
+The conversion format
+.Cm \&%B
+is not standard.
The effect of padding the
.Cm %p
format with zeros (either by the
diff --git a/lib/libc/stdio/wprintf.3 b/lib/libc/stdio/wprintf.3
index bac123c2ef6..624606b2627 100644
--- a/lib/libc/stdio/wprintf.3
+++ b/lib/libc/stdio/wprintf.3
@@ -164,15 +164,19 @@ conversions, the precision of the number is
increased to force the first
character of the output string to a zero (except if a zero value is
printed
with an explicit precision of zero).
For
-.Cm x
+.Cm x , X
and
-.Cm X
+.Cm B
conversions, a non-zero result has the string
.Ql 0x
(or
.Ql 0X
for
.Cm X
+conversions, or
+.Ql 0B
+for
+.Cm B
conversions) prepended to it.
For
.Cm a , A , e , E , f , F , g ,
@@ -194,7 +198,7 @@ For all conversions except
.Cm n ,
the converted value is padded on the left with zeros rather than blanks.
If a precision is given with a numeric conversion
-.Cm ( d , i , o , u , i , x ,
+.Cm ( B , d , i , o , u , i , x ,
and
.Cm X ) ,
the
@@ -250,7 +254,7 @@ followed by an
optional digit string.
If the digit string is omitted, the precision is taken as zero.
This gives the minimum number of digits to appear for
-.Cm d , i , o , u , x ,
+.Cm B , d , i , o , u , x ,
and
.Cm X
conversions, the number of digits to appear after the decimal-point for
@@ -268,12 +272,12 @@ conversions.
.It
An optional length modifier that specifies the size of the argument.
The following length modifiers are valid for the
-.Cm d , i , n , o , u , x ,
+.Cm B , d , i , n , o , u , x ,
or
.Cm X
conversion:
.Bl -column "q (deprecated)" "signed char" "unsigned long long" "long
long *"
-.It Sy Modifier Ta Sy "d, i" Ta Sy "o, u, x, X" Ta Sy n
+.It Sy Modifier Ta Sy "d, i" Ta Sy "B, o, u, x, X" Ta Sy n
.It hh Ta "signed char" Ta "unsigned char" Ta "signed char *"
.It h Ta short Ta "unsigned short" Ta "short *"
.It "l (ell)" Ta long Ta "unsigned long" Ta "long *"
@@ -288,7 +292,7 @@ Note:
the
.Cm t
modifier, when applied to a
-.Cm o , u , x ,
+.Cm B , o , u , x ,
or
.Cm X
conversion, indicates that the argument is of an unsigned type
@@ -350,11 +354,13 @@ If a single format directive mixes positional
and non-positional arguments, the results are undefined.
.Pp
The conversion specifiers and their meanings are:
-.Bl -tag -width "diouxX"
-.It Cm diouxX
+.Bl -tag -width "BdiouxX"
+.It Cm BdiouxX
The
.Vt int
-(or appropriate variant) argument is converted to signed decimal
+(or appropriate variant) argument is converted to unsigned binary
+.Pq Cm B ,
+signed decimal
.Cm ( d
and
.Cm i ) ,
@@ -602,6 +608,10 @@ functions
conform to
.St -isoC-99 .
.Sh CAVEATS
+The conversion format
+.Cm \&%B
+is not standard.
+.Pp
On systems other than
.Ox ,
the
--
2.26.2