Re: [PATCH] libbb: in xmalloc_fgetline, use getline if enabled

2023-05-28 Thread tito
On Sun, 28 May 2023 19:01:52 +0200
Elvira Khabirova  wrote:

> When xmalloc_fgetline was introduced, getline(3) was a GNU extension
> and was not necessarily implemented in libcs. Since then,
> it's become a part of POSIX.1-2008 and is now implemented in
> glibc, uClibc-ng, and musl.
> 
> Previously, xmalloc_fgetline directly called bb_get_chunk_from_file.
> The issue with that approach is that bb_get_chunk_from_file stops
> both at \n and at \0. This introduces unexpected behavior when tools
> are presented with inputs containing \0. For example, in a comparison
> of GNU core utils cut with busybox cut:
> 
> % echo -ne '\x00\x01\x00\x03\x04\x05\x06' | cut -b1-3 | hexdump -C
>   00 01 00 0a   ||
> 0004
> % echo -ne '\x00\x01\x00\x03\x04\x05\x06' | ./busybox cut -b1-3 | hexdump -C
>   0a 01 0a 03 04 05 0a  |...|
> 0007
> 
> Here, due to bb_get_chunk_from_file splitting lines by \n and \0,
> busybox cut interprets the input as three lines instead of one.
> 
> Also, since xmalloc_fgetline never returned strings with embedded \0,
> cut_file expects strlen of the returned string to match the string's
> total length.
> 
> To fix the behavior of the cut utility, introduce xmalloc_fgetline_n,
> that fully matches the behavior of xmalloc_fgetline,
> but also returns the amount of bytes read.
> 
> Add a configuration option, FEATURE_USE_GETLINE, and enable it
> by default. Use getline in xmalloc_fgetline_n if the feature is enabled.
> 
> Change the behavior of cut_file to use the values returned
> by the new function instead of calling strlen.
> 
> Call xmalloc_fgetline_n from xmalloc_fgetline.
> 
> Add a test for the previously mentioned case.
> 
> With FEATURE_USE_GETLINE:
> 
> function old new   delta
> xmalloc_fgetline_n - 173+173
> xmalloc_fgetline  85  58 -27
> cut_main14061367 -39
> --
> (add/remove: 1/0 grow/shrink: 0/2 up/down: 173/-66)   Total: 107 bytes
> 
> Without FEATURE_USE_GETLINE:
> 
> function old new   delta
> xmalloc_fgetline_n -  41 +41
> xmalloc_fgetline  85  58 -27
> cut_main14061367 -39
> --
> (add/remove: 1/0 grow/shrink: 0/2 up/down: 41/-66)Total: -25 bytes
> 
> Fixes: https://bugs.busybox.net/show_bug.cgi?id=15276
> Signed-off-by: Elvira Khabirova 
> ---
>  coreutils/cut.c|  4 ++--
>  include/libbb.h|  2 ++
>  libbb/Config.src   | 11 +++
>  libbb/get_line_from_file.c | 37 -
>  testsuite/cut.tests|  2 ++
>  5 files changed, 49 insertions(+), 7 deletions(-)
> 
> diff --git a/coreutils/cut.c b/coreutils/cut.c
> index 55bdd9386..7c87467ca 100644
> --- a/coreutils/cut.c
> +++ b/coreutils/cut.c
> @@ -89,6 +89,7 @@ static void cut_file(FILE *file, const char *delim, const 
> char *odelim,
>   const struct cut_list *cut_lists, unsigned nlists)
>  {
>   char *line;
> + size_t linelen = 0;
>   unsigned linenum = 0;   /* keep these zero-based to be consistent */
>   regex_t reg;
>   int spos, shoe = option_mask32 & CUT_OPT_REGEX_FLGS;
> @@ -96,10 +97,9 @@ static void cut_file(FILE *file, const char *delim, const 
> char *odelim,
>   if (shoe) xregcomp(, delim, REG_EXTENDED);
>  
>   /* go through every line in the file */
> - while ((line = xmalloc_fgetline(file)) != NULL) {
> + while ((line = xmalloc_fgetline_n(file, )) != NULL) {
>  
>   /* set up a list so we can keep track of what's been printed */
> - int linelen = strlen(line);
>   char *printed = xzalloc(linelen + 1);
>   char *orig_line = line;
>   unsigned cl_pos = 0;
> diff --git a/include/libbb.h b/include/libbb.h
> index 6191debb1..73d16647a 100644
> --- a/include/libbb.h
> +++ b/include/libbb.h
> @@ -1048,6 +1048,8 @@ extern char *xmalloc_fgetline_str(FILE *file, const 
> char *terminating_string) FA
>  extern char *xmalloc_fgets(FILE *file) FAST_FUNC RETURNS_MALLOC;
>  /* Chops off '\n' from the end, unlike fgets: */
>  extern char *xmalloc_fgetline(FILE *file) FAST_FUNC RETURNS_MALLOC;
> +/* Same as xmalloc_fgetline but returns number of bytes read: */
> +extern char* xmalloc_fgetline_n(FILE *file, size_t *n) FAST_FUNC 
> RETURNS_MALLOC;
>  /* Same, but doesn't try to conserve space (may have some slack after the 
> end) */
>  /* extern char *xmalloc_fgetline_fast(FILE *file) FAST_FUNC RETURNS_MALLOC; 
> */
>  
> 

[PATCH] libbb: in xmalloc_fgetline, use getline if enabled

2023-05-28 Thread Elvira Khabirova
When xmalloc_fgetline was introduced, getline(3) was a GNU extension
and was not necessarily implemented in libcs. Since then,
it's become a part of POSIX.1-2008 and is now implemented in
glibc, uClibc-ng, and musl.

Previously, xmalloc_fgetline directly called bb_get_chunk_from_file.
The issue with that approach is that bb_get_chunk_from_file stops
both at \n and at \0. This introduces unexpected behavior when tools
are presented with inputs containing \0. For example, in a comparison
of GNU core utils cut with busybox cut:

% echo -ne '\x00\x01\x00\x03\x04\x05\x06' | cut -b1-3 | hexdump -C
  00 01 00 0a   ||
0004
% echo -ne '\x00\x01\x00\x03\x04\x05\x06' | ./busybox cut -b1-3 | hexdump -C
  0a 01 0a 03 04 05 0a  |...|
0007

Here, due to bb_get_chunk_from_file splitting lines by \n and \0,
busybox cut interprets the input as three lines instead of one.

Also, since xmalloc_fgetline never returned strings with embedded \0,
cut_file expects strlen of the returned string to match the string's
total length.

To fix the behavior of the cut utility, introduce xmalloc_fgetline_n,
that fully matches the behavior of xmalloc_fgetline,
but also returns the amount of bytes read.

Add a configuration option, FEATURE_USE_GETLINE, and enable it
by default. Use getline in xmalloc_fgetline_n if the feature is enabled.

Change the behavior of cut_file to use the values returned
by the new function instead of calling strlen.

Call xmalloc_fgetline_n from xmalloc_fgetline.

Add a test for the previously mentioned case.

With FEATURE_USE_GETLINE:

function old new   delta
xmalloc_fgetline_n - 173+173
xmalloc_fgetline  85  58 -27
cut_main14061367 -39
--
(add/remove: 1/0 grow/shrink: 0/2 up/down: 173/-66)   Total: 107 bytes

Without FEATURE_USE_GETLINE:

function old new   delta
xmalloc_fgetline_n -  41 +41
xmalloc_fgetline  85  58 -27
cut_main14061367 -39
--
(add/remove: 1/0 grow/shrink: 0/2 up/down: 41/-66)Total: -25 bytes

Fixes: https://bugs.busybox.net/show_bug.cgi?id=15276
Signed-off-by: Elvira Khabirova 
---
 coreutils/cut.c|  4 ++--
 include/libbb.h|  2 ++
 libbb/Config.src   | 11 +++
 libbb/get_line_from_file.c | 37 -
 testsuite/cut.tests|  2 ++
 5 files changed, 49 insertions(+), 7 deletions(-)

diff --git a/coreutils/cut.c b/coreutils/cut.c
index 55bdd9386..7c87467ca 100644
--- a/coreutils/cut.c
+++ b/coreutils/cut.c
@@ -89,6 +89,7 @@ static void cut_file(FILE *file, const char *delim, const 
char *odelim,
const struct cut_list *cut_lists, unsigned nlists)
 {
char *line;
+   size_t linelen = 0;
unsigned linenum = 0;   /* keep these zero-based to be consistent */
regex_t reg;
int spos, shoe = option_mask32 & CUT_OPT_REGEX_FLGS;
@@ -96,10 +97,9 @@ static void cut_file(FILE *file, const char *delim, const 
char *odelim,
if (shoe) xregcomp(, delim, REG_EXTENDED);
 
/* go through every line in the file */
-   while ((line = xmalloc_fgetline(file)) != NULL) {
+   while ((line = xmalloc_fgetline_n(file, )) != NULL) {
 
/* set up a list so we can keep track of what's been printed */
-   int linelen = strlen(line);
char *printed = xzalloc(linelen + 1);
char *orig_line = line;
unsigned cl_pos = 0;
diff --git a/include/libbb.h b/include/libbb.h
index 6191debb1..73d16647a 100644
--- a/include/libbb.h
+++ b/include/libbb.h
@@ -1048,6 +1048,8 @@ extern char *xmalloc_fgetline_str(FILE *file, const char 
*terminating_string) FA
 extern char *xmalloc_fgets(FILE *file) FAST_FUNC RETURNS_MALLOC;
 /* Chops off '\n' from the end, unlike fgets: */
 extern char *xmalloc_fgetline(FILE *file) FAST_FUNC RETURNS_MALLOC;
+/* Same as xmalloc_fgetline but returns number of bytes read: */
+extern char* xmalloc_fgetline_n(FILE *file, size_t *n) FAST_FUNC 
RETURNS_MALLOC;
 /* Same, but doesn't try to conserve space (may have some slack after the end) 
*/
 /* extern char *xmalloc_fgetline_fast(FILE *file) FAST_FUNC RETURNS_MALLOC; */
 
diff --git a/libbb/Config.src b/libbb/Config.src
index b980f19a9..7a37929d6 100644
--- a/libbb/Config.src
+++ b/libbb/Config.src
@@ -147,6 +147,17 @@ config MONOTONIC_SYSCALL
will be used instead (which gives wrong results if date/time
is