[PATCH] printf: Add %B conversion specifier for binary

Alejandro Colomar Mon, 27 Apr 2020 11:35:35 -0700

Hi all,

This patch adds a new feature to the ``printf`` family of functions:


``%B`` conversion specifier for printing unsigned numbers in binary.

Behaviour is exactly as with ``%X``, only changing the base (16 -> 2).

``%b`` is already in use by some ``printf(1)`` implementations, so I
didn't use it for binary.  Anyway, binary doesn't have letters, so only
the ``0b``/``0B`` specifier would change.

I also documented the new specifier in the man pages.

Disclaimer: I couldn't test it myself, so test it before applying it.

I also sent today a patch to add this specifier to glibc.  They are
concerned about adding a new non-standard specifier, but if more C libs
are going to add it at the same time, it may become a thing.

                Alex.


From 1a41d44571ccaf9ffaf36b2c2b96dd34e48eb5b7 Mon Sep 17 00:00:00 2001
From: Alejandro Colomar <colomar.6....@gmail.com>
Date: Mon, 27 Apr 2020 19:15:55 +0200
Subject: [PATCH 1/2] printf: Add %B conversion specifier for printing binary

---
 lib/libc/stdio/vfprintf.c  | 28 ++++++++++++++++++++--------
 lib/libc/stdio/vfwprintf.c | 28 ++++++++++++++++++++--------
 2 files changed, 40 insertions(+), 16 deletions(-)

diff --git a/lib/libc/stdio/vfprintf.c b/lib/libc/stdio/vfprintf.c
index 1d451a84f66..1e5cd3ad89b 100644
--- a/lib/libc/stdio/vfprintf.c
+++ b/lib/libc/stdio/vfprintf.c
@@ -310,9 +310,9 @@ __vfprintf(FILE *fp, const char *fmt0, __va_list ap)
        char *dtoaresult = NULL;
 #endif

-       uintmax_t _umax;        /* integer arguments %[diouxX] */
-       enum { OCT, DEC, HEX } base;    /* base for %[diouxX] conversion */
-       int dprec;              /* a copy of prec if %[diouxX], 0 otherwise */
+       uintmax_t _umax;        /* integer arguments %[BdiouxX] */
+       enum { BIN, OCT, DEC, HEX } base; /* base for %[BdiouxX] conversion */
+       int dprec;              /* a copy of prec if %[BdiouxX], 0 otherwise */
        int realsz;             /* field size expanded by dprec */
        int size;               /* size of converted field or string */
        const char *xdigs;      /* digits for %[xX] conversion */
@@ -320,7 +320,7 @@ __vfprintf(FILE *fp, const char *fmt0, __va_list ap)
        struct __suio uio;      /* output information: summary */
        struct __siov iov[NIOV];/* ... and individual io vectors */
        char buf[BUF];          /* buffer with space for digits of uintmax_t */
-       char ox[2];             /* space for 0x; ox[1] is either x, X, or \0 */
+       char ox[2];             /* space for 0x; ox[1] is either x, X,B or \0 */
        union arg *argtable;    /* args, built due to positional arg */
        union arg statargtable[STATIC_ARG_TBL_SIZE];
        size_t argtablesiz;
@@ -891,6 +891,10 @@ fp_common:
                        _umax = UARG();
                        base = DEC;
                        goto nosign;
+               case 'B':
+                       _umax = UARG();
+                       base = BIN;
+                       goto bin;
                case 'X':
                        xdigs = xdigs_upper;
                        goto hex;
@@ -898,8 +902,8 @@ fp_common:
                        xdigs = xdigs_lower;
 hex:                   _umax = UARG();
                        base = HEX;
-                       /* leading 0x/X only if non-zero */
-                       if (flags & ALT && _umax != 0)
+                       /* leading 0x/X/B only if non-zero */
+bin:                   if (flags & ALT && _umax != 0)
                                ox[1] = ch;

                        /* unsigned conversions */
@@ -925,6 +929,13 @@ number:                    if ((dprec = prec) >= 0)
                                 * a variable; hence this switch.
                                 */
                                switch (base) {
+                               case BIN:
+                                       do {
+                                               *--cp = to_char(_umax & 1);
+                                               _umax >>= 1;
+                                       } while (_umax);
+                                       break;
+
                                case OCT:
                                        do {
                                                *--cp = to_char(_umax & 7);
@@ -980,7 +991,7 @@ number:                     if ((dprec = prec) >= 0)
                 * first be prefixed by any sign or other prefix; otherwise,
                 * it should be blank padded before the prefix is emitted.
                 * After any left-hand padding and prefixing, emit zeroes
-                * required by a decimal %[diouxX] precision, then print the
+                * required by a decimal %[BdiouxX] precision, then print the
                 * string proper, then emit zeroes required by any leftover
                 * floating precision; finally, if LADJUST, pad with blanks.
                 *
@@ -1000,7 +1011,7 @@ number:                   if ((dprec = prec) >= 0)
                /* prefix */
                if (sign)
                        PRINT(&sign, 1);
-               if (ox[1]) {    /* ox[1] is either x, X, or \0 */
+               if (ox[1]) {    /* ox[1] is either x, X, B, or \0 */
                        ox[0] = '0';
                        PRINT(ox, 2);
                }
@@ -1349,6 +1360,7 @@ reswitch: switch (ch) {
                        flags |= LONGINT;
                        /*FALLTHROUGH*/
                case 'u':
+               case 'B':
                case 'X':
                case 'x':
                        ADDUARG();
diff --git a/lib/libc/stdio/vfwprintf.c b/lib/libc/stdio/vfwprintf.c
index e28901508fa..8c6227ee76b 100644
--- a/lib/libc/stdio/vfwprintf.c
+++ b/lib/libc/stdio/vfwprintf.c

@@ -319,14 +319,14 @@ __vfwprintf(FILE * __restrict fp, const wchar_t *__restrict fmt0, __va_list ap)

        char *dtoaresult = NULL;
 #endif

-       uintmax_t _umax;        /* integer arguments %[diouxX] */
-       enum { OCT, DEC, HEX } base;    /* base for %[diouxX] conversion */
-       int dprec;              /* a copy of prec if %[diouxX], 0 otherwise */
+       uintmax_t _umax;        /* integer arguments %[BdiouxX] */
+       enum { BIN, OCT, DEC, HEX } base; /* base for %[BdiouxX] conversion */
+       int dprec;              /* a copy of prec if %[BdiouxX], 0 otherwise */
        int realsz;             /* field size expanded by dprec */
        int size;               /* size of converted field or string */
        const char *xdigs;      /* digits for %[xX] conversion */
        wchar_t buf[BUF];       /* buffer with space for digits of uintmax_t */
-       wchar_t ox[2];          /* space for 0x; ox[1] is either x, X, or \0 */
+       wchar_t ox[2];          /* space for 0x; ox[1] is either x, X,B or \0 */
        union arg *argtable;    /* args, built due to positional arg */
        union arg statargtable[STATIC_ARG_TBL_SIZE];
        size_t argtablesiz;
@@ -882,6 +882,10 @@ fp_common:
                        _umax = UARG();
                        base = DEC;
                        goto nosign;
+               case 'B':
+                       _umax = UARG();
+                       base = BIN;
+                       goto bin;
                case 'X':
                        xdigs = xdigs_upper;
                        goto hex;
@@ -889,8 +893,8 @@ fp_common:
                        xdigs = xdigs_lower;
 hex:                   _umax = UARG();
                        base = HEX;
-                       /* leading 0x/X only if non-zero */
-                       if (flags & ALT && _umax != 0)
+                       /* leading 0x/X/B only if non-zero */
+bin:                   if (flags & ALT && _umax != 0)
                                ox[1] = ch;

                        /* unsigned conversions */
@@ -916,6 +920,13 @@ number:                    if ((dprec = prec) >= 0)
                                 * a variable; hence this switch.
                                 */
                                switch (base) {
+                               case BIN:
+                                       do {
+                                               *--cp = to_char(_umax & 1);
+                                               _umax >>= 1;
+                                       } while (_umax);
+                                       break;
+
                                case OCT:
                                        do {
                                                *--cp = to_char(_umax & 7);
@@ -971,7 +982,7 @@ number:                     if ((dprec = prec) >= 0)
                 * first be prefixed by any sign or other prefix; otherwise,
                 * it should be blank padded before the prefix is emitted.
                 * After any left-hand padding and prefixing, emit zeroes
-                * required by a decimal %[diouxX] precision, then print the
+                * required by a decimal %[BdiouxX] precision, then print the
                 * string proper, then emit zeroes required by any leftover
                 * floating precision; finally, if LADJUST, pad with blanks.
                 *
@@ -991,7 +1002,7 @@ number:                    if ((dprec = prec) >= 0)
                /* prefix */
                if (sign)
                        PRINT(&sign, 1);
-               if (ox[1]) {    /* ox[1] is either x, X, or \0 */
+               if (ox[1]) {    /* ox[1] is either x, X, B, or \0 */
                        ox[0] = '0';
                        PRINT(ox, 2);
                }
@@ -1348,6 +1359,7 @@ reswitch: switch (ch) {
                        flags |= LONGINT;
                        /*FALLTHROUGH*/
                case 'u':
+               case 'B':
                case 'X':
                case 'x':
                        ADDUARG();
--
2.26.2


From d0926562ff31e2f340136fc42947fba3481a0fdd Mon Sep 17 00:00:00 2001
From: Alejandro Colomar <colomar.6....@gmail.com>
Date: Mon, 27 Apr 2020 20:02:31 +0200
Subject: [PATCH 2/2] printf: Document in printf.3 and wprintf.3 the new %B
 conversion specifier

---
 lib/libc/stdio/printf.3  | 28 ++++++++++++++++++++--------
 lib/libc/stdio/wprintf.3 | 30 ++++++++++++++++++++----------
 2 files changed, 40 insertions(+), 18 deletions(-)

diff --git a/lib/libc/stdio/printf.3 b/lib/libc/stdio/printf.3
index 82e124a1a58..8e34d1cae2b 100644
--- a/lib/libc/stdio/printf.3
+++ b/lib/libc/stdio/printf.3

@@ -190,15 +190,19 @@ conversions, the precision of the number isincreased to force the firstcharacter of the output string to a zero (except if a zero value isprinted

 with an explicit precision of zero).
 For
-.Cm x
+.Cm x , X
 and
-.Cm X
+.Cm B
 conversions, a non-zero result has the string
 .Ql 0x
 (or
 .Ql 0X
 for
 .Cm X
+conversions, or
+.Ql 0B
+for
+.Cm B
 conversions) prepended to it.
 For
 .Cm a ,
@@ -228,7 +232,8 @@ For all conversions except
 .Cm n ,
 the converted value is padded on the left with zeros rather than blanks.
 If a precision is given with a numeric conversion
-.Pf ( Cm d ,
+.Pf ( Cm B ,
+.Cm d ,
 .Cm i ,
 .Cm o ,
 .Cm u ,
@@ -287,6 +292,7 @@ followed by an
 optional digit string.
 If the digit string is omitted, the precision is taken as zero.
 This gives the minimum number of digits to appear for
+.Cm B ,
 .Cm d ,
 .Cm i ,
 .Cm o ,
@@ -313,13 +319,14 @@ conversions.
 .It
 An optional length modifier, that specifies the size of the argument.
 The following length modifiers are valid for the
+.Cm B ,
 .Cm d , i , n ,
 .Cm o , u , x ,
 or
 .Cm X
 conversions:

.Bl -column "(deprecated)" "signed char" "unsigned long long" "longlong *"

-.It Sy Modifier Ta Sy "d, i" Ta Sy "o, u, x, X" Ta Sy n
+.It Sy Modifier Ta Sy "d, i" Ta Sy "B, o, u, x, X" Ta Sy n
 .It hh Ta "signed char" Ta "unsigned char" Ta "signed char *"
 .It h Ta short Ta "unsigned short" Ta "short *"
 .It "l (ell)" Ta long Ta "unsigned long" Ta "long *"
@@ -334,7 +341,7 @@ Note:
 the
 .Cm t
 modifier, when applied to an
-.Cm o , u , x ,
+.Cm B , o , u , x ,
 or
 .Cm X
 conversion, indicates that the argument is of an unsigned type

@@ -402,11 +409,13 @@ If a single format directive mixes positional(nn$) and

 non-positional arguments, the results are undefined.
 .Pp
 The conversion specifiers and their meanings are:
-.Bl -tag -width "diouxX"
-.It Cm diouxX
+.Bl -tag -width "BdiouxX"
+.It Cm BdiouxX
 The
 .Li int
-(or appropriate variant) argument is converted to signed decimal
+(or appropriate variant) argument is converted to unsigned binary
+.Pq Cm B ,
+signed decimal
 .Pf ( Cm d
 and
 .Cm i ) ,
@@ -830,6 +839,9 @@ and
 .Cm \&%U
 are not standard and
 are provided only for backward compatibility.
+The conversion format
+.Cm \&%B
+is not standard.
 The effect of padding the
 .Cm %p
 format with zeros (either by the
diff --git a/lib/libc/stdio/wprintf.3 b/lib/libc/stdio/wprintf.3
index bac123c2ef6..624606b2627 100644
--- a/lib/libc/stdio/wprintf.3
+++ b/lib/libc/stdio/wprintf.3

@@ -164,15 +164,19 @@ conversions, the precision of the number isincreased to force the firstcharacter of the output string to a zero (except if a zero value isprinted

 with an explicit precision of zero).
 For
-.Cm x
+.Cm x , X
 and
-.Cm X
+.Cm B
 conversions, a non-zero result has the string
 .Ql 0x
 (or
 .Ql 0X
 for
 .Cm X
+conversions, or
+.Ql 0B
+for
+.Cm B
 conversions) prepended to it.
 For
 .Cm a , A , e , E , f , F , g ,
@@ -194,7 +198,7 @@ For all conversions except
 .Cm n ,
 the converted value is padded on the left with zeros rather than blanks.
 If a precision is given with a numeric conversion
-.Cm ( d , i , o , u , i , x ,
+.Cm ( B , d , i , o , u , i , x ,
 and
 .Cm X ) ,
 the
@@ -250,7 +254,7 @@ followed by an
 optional digit string.
 If the digit string is omitted, the precision is taken as zero.
 This gives the minimum number of digits to appear for
-.Cm d , i , o , u , x ,
+.Cm B , d , i , o , u , x ,
 and
 .Cm X
 conversions, the number of digits to appear after the decimal-point for
@@ -268,12 +272,12 @@ conversions.
 .It
 An optional length modifier that specifies the size of the argument.
 The following length modifiers are valid for the
-.Cm d , i , n , o , u , x ,
+.Cm B , d , i , n , o , u , x ,
 or
 .Cm X
 conversion:

.Bl -column "q (deprecated)" "signed char" "unsigned long long" "longlong *"

-.It Sy Modifier Ta Sy "d, i" Ta Sy "o, u, x, X" Ta Sy n
+.It Sy Modifier Ta Sy "d, i" Ta Sy "B, o, u, x, X" Ta Sy n
 .It hh Ta "signed char" Ta "unsigned char" Ta "signed char *"
 .It h Ta short Ta "unsigned short" Ta "short *"
 .It "l (ell)" Ta long Ta "unsigned long" Ta "long *"
@@ -288,7 +292,7 @@ Note:
 the
 .Cm t
 modifier, when applied to a
-.Cm o , u , x ,
+.Cm B , o , u , x ,
 or
 .Cm X
 conversion, indicates that the argument is of an unsigned type
@@ -350,11 +354,13 @@ If a single format directive mixes positional
 and non-positional arguments, the results are undefined.
 .Pp
 The conversion specifiers and their meanings are:
-.Bl -tag -width "diouxX"
-.It Cm diouxX
+.Bl -tag -width "BdiouxX"
+.It Cm BdiouxX
 The
 .Vt int
-(or appropriate variant) argument is converted to signed decimal
+(or appropriate variant) argument is converted to unsigned binary
+.Pq Cm B ,
+signed decimal
 .Cm ( d
 and
 .Cm i ) ,
@@ -602,6 +608,10 @@ functions
 conform to
 .St -isoC-99 .
 .Sh CAVEATS
+The conversion format
+.Cm \&%B
+is not standard.
+.Pp
 On systems other than
 .Ox ,
 the
--
2.26.2

[PATCH] printf: Add %B conversion specifier for binary

Reply via email to